aboutsummaryrefslogtreecommitdiffhomepage
path: root/sample/quantize.cpp
diff options
context:
space:
mode:
authorMITSUNARI Shigeo <[email protected]>2010-04-16 10:33:04 +0900
committerMITSUNARI Shigeo <[email protected]>2010-04-16 10:33:04 +0900
commitcbb4ca2178c6bd391b48c130afececfdddc66836 (patch)
tree46c9cfe83e040443af60585e212d071f0f127ebc /sample/quantize.cpp
downloadxbyak-cbb4ca2178c6bd391b48c130afececfdddc66836.tar.gz
xbyak-cbb4ca2178c6bd391b48c130afececfdddc66836.zip
first commit
Diffstat (limited to 'sample/quantize.cpp')
-rw-r--r--sample/quantize.cpp224
1 files changed, 224 insertions, 0 deletions
diff --git a/sample/quantize.cpp b/sample/quantize.cpp
new file mode 100644
index 0000000..8816ca2
--- /dev/null
+++ b/sample/quantize.cpp
@@ -0,0 +1,224 @@
+/*
+ @author herumi
+ @date $Date: 2009/12/09 05:40:52 $
+
+ JPEG quantize sample
+ This program generates a quantization routine by using fast division algorithm in run-time.
+
+ time(sec)
+ quality 1(low) 10 50 100(high)
+ VC2005 8.0 8.0 8.0 8.0
+ Xbyak 1.6 0.8 0.5 0.5
+
+
+; generated code at q = 100
+ push esi
+ push edi
+ mov edi,dword ptr [esp+0Ch]
+ mov esi,dword ptr [esp+10h]
+ mov eax,dword ptr [esi]
+ shr eax,4
+ mov dword ptr [edi],eax
+ mov eax,dword ptr [esi+4]
+ mov edx,0BA2E8BA3h
+ mul eax,edx
+ shr edx,3
+ ...
+
+; generated code at q = 100
+ push esi
+ push edi
+ mov edi,dword ptr [esp+0Ch]
+ mov esi,dword ptr [esp+10h]
+ mov eax,dword ptr [esi]
+ mov dword ptr [edi],eax
+ mov eax,dword ptr [esi+4]
+ mov dword ptr [edi+4],eax
+ mov eax,dword ptr [esi+8]
+ mov dword ptr [edi+8],eax
+ mov eax,dword ptr [esi+0Ch]
+ ...
+
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include "xbyak/xbyak.h"
+#ifdef _MSC_VER
+ #pragma warning(disable : 4996) // scanf
+#endif
+
+typedef Xbyak::uint64 uint64;
+typedef Xbyak::uint32 uint32;
+
+const int N = 64;
+
+class Quantize : public Xbyak::CodeGenerator {
+ static int ilog2(int x)
+ {
+ int shift = 0;
+ while ((1 << shift) <= x) shift++;
+ return shift - 1;
+ }
+public:
+ /*
+ input : esi
+ output : eax = [esi+offset] / dividend
+ destroy : edx
+ */
+ void udiv(uint32 dividend, int offset)
+ {
+ mov(eax, ptr[esi + offset]);
+
+ /* dividend = odd x 2^exponent */
+ int exponent = 0, odd = dividend;
+ while ((odd & 1) == 0) {
+ odd >>= 1; exponent++;
+ }
+
+ if (odd == 1) { // trivial case
+ if (exponent) {
+ shr(eax, exponent);
+ }
+ return;
+ }
+
+ uint64 mLow, mHigh;
+ int len = ilog2(odd) + 1;
+ {
+ uint64 roundUp = uint64(1) << (32 + len);
+ uint64 k = roundUp / (0xFFFFFFFFL - (0xFFFFFFFFL % odd));
+ mLow = roundUp / odd;
+ mHigh = (roundUp + k) / odd;
+ }
+
+ while (((mLow >> 1) < (mHigh >> 1)) && (len > 0)) {
+ mLow >>= 1; mHigh >>= 1; len--;
+ }
+
+ uint64 m; int a;
+ if ((mHigh >> 32) == 0) {
+ m = mHigh; a = 0;
+ } else {
+ len = ilog2(odd);
+ uint64 roundDown = uint64(1) << (32 + len);
+ mLow = roundDown / odd;
+ int r = (int)(roundDown % odd);
+ m = (r <= (odd >> 1)) ? mLow : mLow + 1;
+ a = 1;
+ }
+ while ((m & 1) == 0) {
+ m >>= 1; len--;
+ }
+ len += exponent;
+
+ mov(edx, int(m));
+ mul(edx);
+ if (a) {
+ add(eax, int(m));
+ adc(edx, 0);
+ }
+ if (len) {
+ shr(edx, len);
+ }
+ mov(eax, edx);
+ }
+ /*
+ quantize(uint32 dest[64], const uint32 src[64]);
+ */
+ Quantize(const uint32 qTbl[64])
+ {
+ push(esi);
+ push(edi);
+ const int P_ = 4 * 2;
+ mov(edi, ptr [esp+P_+4]); // dest
+ mov(esi, ptr [esp+P_+8]); // src
+ for (int i = 0; i < N; i++) {
+ udiv(qTbl[i], i * 4);
+ mov(ptr[edi+i*4], eax);
+ }
+ pop(edi);
+ pop(esi);
+ ret();
+ }
+};
+
+void quantize(uint32 dest[64], const uint32 src[64], const uint32 qTbl[64])
+{
+ for (int i = 0; i < N; i++) {
+ dest[i] = src[i] / qTbl[i];
+ }
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef XBYAK64
+ puts("not implemented for 64bit");
+ return 1;
+#endif
+ int q;
+ if (argc > 1) {
+ q = atoi(argv[1]);
+ } else {
+ printf("input quantize=");
+ scanf("%d", &q);
+ }
+ printf("q=%d\n", q);
+ uint32 qTbl[] = {
+ 16, 11, 10, 16, 24, 40, 51, 61,
+ 12, 12, 14, 19, 26, 58, 60, 55,
+ 14, 13, 16, 24, 40, 57, 69, 56,
+ 14, 17, 22, 29, 51, 87, 80, 62,
+ 18, 22, 37, 56, 68, 109, 103, 77,
+ 24, 35, 55, 64, 81, 104, 113, 92,
+ 49, 64, 78, 87, 103, 121, 120, 101,
+ 72, 92, 95, 98, 112, 100, 103, 99
+ };
+
+ for (int i = 0; i < N; i++) {
+ qTbl[i] /= q;
+ if (qTbl[i] == 0) qTbl[i] = 1;
+ }
+
+ try {
+ uint32 src[N];
+ uint32 src2[N];
+ uint32 dest[N];
+ uint32 dest2[N];
+ for (int i = 0; i < N; i++) {
+ src2[i] = src[i] = rand() % 2048;
+ }
+
+ Quantize jit(qTbl);
+//printf("jit size=%d, ptr=%p\n", jit.getSize(), jit.getCode());
+ void (*quantize2)(uint32*, const uint32*, const uint32 *) = (void (*)(uint32*, const uint32*, const uint32 *))jit.getCode();
+
+ quantize(dest, src, qTbl);
+ quantize2(dest2, src, qTbl);
+ for (int i = 0; i < N; i++) {
+ if (dest[i] != dest2[i]) {
+ printf("err[%d] %d %d\n", i, dest[i], dest2[i]);
+ }
+ }
+
+ const int count = 10000000;
+ int begin;
+
+ begin = clock();
+ for (int i = 0; i < count; i++) {
+ quantize(dest, src, qTbl);
+ }
+ printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
+
+ begin = clock();
+ for (int i = 0; i < count; i++) {
+ quantize2(dest, src, qTbl);
+ }
+ printf("time=%.1fsec\n", (clock() - begin) / double(CLOCKS_PER_SEC));
+ } catch (Xbyak::Error err) {
+ printf("ERR:%s(%d)\n", Xbyak::ConvertErrorToString(err), err);
+ } catch (...) {
+ printf("unknown error\n");
+ }
+ return 0;
+}