├── .gitignore ├── README-en.txt ├── README-ja.txt ├── README.md ├── jitasm.h ├── samples ├── example1 │ ├── Makefile │ ├── example1.cpp │ └── example1.vcproj ├── fibonacci │ ├── Makefile │ ├── fibonacci.cpp │ └── fibonacci.vcproj ├── pixel_calc │ ├── pixel_calc.cpp │ ├── pixel_calc.vcproj │ ├── pixel_calc_jit.cpp │ └── pixel_calc_jit.h ├── samples.sln └── tutorial1 │ ├── Makefile │ ├── tutorial1.cpp │ └── tutorial1.vcproj └── test ├── Makefile ├── nasm_x64.nas ├── nasm_x86.nas ├── test.cpp ├── test.h ├── test.sln ├── test.vcxproj ├── test.vcxproj.filters ├── test_backend.cpp ├── x64.asm └── x86.asm /.gitignore: -------------------------------------------------------------------------------- 1 | .vs 2 | *.user 3 | 4 | */x64/Debug/ 5 | */x64/Release/ 6 | */Debug/ 7 | */Release/ 8 | -------------------------------------------------------------------------------- /README-en.txt: -------------------------------------------------------------------------------- 1 | jitasm 2 | x86/x64 JIT Assembler Library 3 | http://code.google.com/p/jitasm/ 4 | 5 | Introduction 6 | ============ 7 | jitasm is C++ library for runtime code generation of x86/x64. You can 8 | write the code like a inline assembler. 9 | 10 | License 11 | ======= 12 | jitasm is open source software. You can distribute it under the terms of 13 | the new BSD license. 14 | 15 | Compiler 16 | ======== 17 | - VisualC++ 2005 or later 18 | - GCC 4.1 or later 19 | 20 | How to use 21 | ========== 22 | You just copy and include jitasm.h. Please see the sample code in 23 | "samples" directory. 24 | -------------------------------------------------------------------------------- /README-ja.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hikaru-i/jitasm/7cb9d72982c793374c49b804bec842deb1506d92/README-ja.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jitasm 2 | jitasm is C++ library for runtime code generation of x86/x64. You can write the code like a inline assembler. 3 | 4 | ## Features 5 | - Header only library. 6 | - Support for x86, x64, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, avx, fma, xop, fma4. 7 | - Automatic code generation of prolog and epilog according to function calling convention. 8 | - Register allocation. 9 | - Support for Windows, Linux, FreeBSD, Mac 10 | 11 | ## License 12 | jitasm is open source software. You can distribute it under the terms of the new BSD license. 13 | 14 | ## Compiler 15 | - VisualC++ 2005 or later 16 | - GCC 4.1 or later 17 | 18 | ## How to use 19 | You just copy and include jitasm.h. 20 | 21 | ## Example 22 | ```C++ 23 | // int plus(int a, int b) 24 | // { 25 | // return a + b; 26 | // } 27 | struct Plus : public jitasm::function 28 | { 29 | Result main(Reg32 a, Reg32 b) 30 | { 31 | add(a, b); 32 | return a; 33 | } 34 | }; 35 | 36 | // Generate plus function and call. 37 | Plus plus; 38 | int c = plus(1, 2); 39 | ``` 40 | -------------------------------------------------------------------------------- /samples/example1/Makefile: -------------------------------------------------------------------------------- 1 | MODE := 32 2 | ifeq ($(shell uname -m), x86_64) 3 | MODE := 64 4 | endif 5 | ifeq ($(shell uname -s), Darwin) 6 | MODE := 64 7 | endif 8 | 9 | JITASM_INCLUDE := ../../ 10 | JITASM_H := $(JITASM_INCLUDE)jitasm.h 11 | TARGET := example1 12 | OBJS := example1.o 13 | CXX := g++ 14 | CXXFLAGS := -fno-operator-names -Wall -I$(JITASM_INCLUDE) 15 | ifeq ($(MODE), 64) 16 | CXXFLAGS += -m64 17 | LDFLAGS += -m64 18 | else 19 | CXXFLAGS += -m32 -march=i686 -mmmx -msse -msse2 20 | LDFLAGS += -m32 21 | endif 22 | ifeq ($(DEBUG), 1) 23 | CXXFLAGS += -g 24 | endif 25 | 26 | .PHONY : all 27 | all: $(TARGET) 28 | ./$(TARGET) 29 | 30 | $(TARGET): $(OBJS) 31 | $(CXX) $(LDFLAGS) -o $@ $^ 32 | 33 | example1.o: example1.cpp $(JITASM_H) 34 | $(CXX) $(CXXFLAGS) -o $@ -c $< 35 | 36 | .PHONY : clean 37 | clean: 38 | $(RM) $(TARGET) $(OBJS) 39 | -------------------------------------------------------------------------------- /samples/example1/example1.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "jitasm.h" 3 | 4 | class example1 : public jitasm::function 5 | { 6 | private: 7 | int val_; 8 | public: 9 | example1(int val) : val_(val) {} 10 | 11 | Result main(Addr a1) 12 | { 13 | mov(ecx, dword_ptr[a1]); 14 | add(ecx, val_); 15 | return ecx; 16 | } 17 | }; 18 | 19 | int main() 20 | { 21 | example1 plus5(5); 22 | int result = plus5(10); 23 | printf("Result : %d\n", result); 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /samples/example1/example1.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 17 | 18 | 19 | 20 | 21 | 28 | 31 | 34 | 37 | 40 | 43 | 56 | 59 | 62 | 65 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 93 | 96 | 97 | 105 | 108 | 111 | 114 | 117 | 120 | 130 | 133 | 136 | 139 | 148 | 151 | 154 | 157 | 160 | 163 | 166 | 169 | 172 | 173 | 180 | 183 | 186 | 189 | 192 | 196 | 209 | 212 | 215 | 218 | 225 | 228 | 231 | 234 | 237 | 240 | 243 | 246 | 249 | 250 | 258 | 261 | 264 | 267 | 270 | 274 | 284 | 287 | 290 | 293 | 302 | 305 | 308 | 311 | 314 | 317 | 320 | 323 | 326 | 327 | 328 | 329 | 330 | 331 | 336 | 339 | 340 | 341 | 346 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /samples/fibonacci/Makefile: -------------------------------------------------------------------------------- 1 | MODE := 32 2 | ifeq ($(shell uname -m), x86_64) 3 | MODE := 64 4 | endif 5 | ifeq ($(shell uname -s), Darwin) 6 | MODE := 64 7 | endif 8 | 9 | JITASM_INCLUDE := ../../ 10 | JITASM_H := $(JITASM_INCLUDE)jitasm.h 11 | TARGET := fibonacci 12 | OBJS := fibonacci.o 13 | CXX := g++ 14 | CXXFLAGS := -fno-operator-names -Wall -I$(JITASM_INCLUDE) 15 | ifeq ($(MODE), 64) 16 | CXXFLAGS += -m64 17 | LDFLAGS += -m64 18 | else 19 | CXXFLAGS += -m32 -march=i686 -mmmx -msse -msse2 20 | LDFLAGS += -m32 21 | endif 22 | ifeq ($(DEBUG), 1) 23 | CXXFLAGS += -g 24 | endif 25 | 26 | .PHONY : all 27 | all: $(TARGET) 28 | ./$(TARGET) 29 | 30 | $(TARGET): $(OBJS) 31 | $(CXX) $(LDFLAGS) -o $@ $^ 32 | 33 | fibonacci.o: fibonacci.cpp $(JITASM_H) 34 | $(CXX) $(CXXFLAGS) -o $@ -c $< 35 | 36 | .PHONY : clean 37 | clean: 38 | $(RM) $(TARGET) $(OBJS) 39 | -------------------------------------------------------------------------------- /samples/fibonacci/fibonacci.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "jitasm.h" 3 | 4 | struct Fibonacci : jitasm::function 5 | { 6 | Result main(Reg32 n) 7 | { 8 | Reg32 r; 9 | If(n == 0); 10 | mov(r, 0); 11 | ElseIf(n == 1 || n == 2); 12 | mov(r, 1); 13 | Else(); 14 | Reg32 a; 15 | Reg32 b; 16 | Reg32 i; 17 | mov(a, 1); 18 | mov(b, 1); 19 | mov(i, 2); 20 | While(i < n); 21 | mov(r, a); 22 | add(r, b); 23 | mov(b, a); 24 | mov(a, r); 25 | inc(i); 26 | EndW(); 27 | EndIf(); 28 | return r; 29 | } 30 | }; 31 | 32 | int main() 33 | { 34 | Fibonacci fibonacci; 35 | for (int n = 0; n < 10; n++) { 36 | printf("%d, ", fibonacci(n)); 37 | } 38 | return 0; 39 | } 40 | -------------------------------------------------------------------------------- /samples/fibonacci/fibonacci.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 17 | 18 | 19 | 20 | 21 | 28 | 31 | 34 | 37 | 40 | 43 | 56 | 59 | 62 | 65 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 93 | 96 | 97 | 104 | 107 | 110 | 113 | 116 | 120 | 133 | 136 | 139 | 142 | 149 | 152 | 155 | 158 | 161 | 164 | 167 | 170 | 173 | 174 | 182 | 185 | 188 | 191 | 194 | 197 | 207 | 210 | 213 | 216 | 225 | 228 | 231 | 234 | 237 | 240 | 243 | 246 | 249 | 250 | 258 | 261 | 264 | 267 | 270 | 274 | 284 | 287 | 290 | 293 | 302 | 305 | 308 | 311 | 314 | 317 | 320 | 323 | 326 | 327 | 328 | 329 | 330 | 331 | 336 | 339 | 340 | 341 | 346 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /samples/pixel_calc/pixel_calc.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009, Hikaru Inoue, Akihiro Yamasaki, 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following 12 | // disclaimer in the documentation and/or other materials provided 13 | // with the distribution. 14 | // * The names of the contributors may not be used to endorse or promote 15 | // products derived from this software without specific prior written 16 | // permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // Pixel Calc sample 31 | 32 | #include 33 | #include 34 | #include 35 | #include "pixel_calc_jit.h" 36 | 37 | #define IDM_LOAD_SOURCE1 100 38 | #define IDM_LOAD_SOURCE2 101 39 | #define IDM_EXIT 105 40 | #define IDC_EXPRESSION 110 41 | #define IDC_RENDER 111 42 | 43 | using namespace Gdiplus; 44 | 45 | HINSTANCE g_hInstance; 46 | Bitmap *g_imgSource1; 47 | Bitmap *g_imgSource2; 48 | Bitmap *g_imgResult; 49 | 50 | /** 51 | * Show Open file dialog and load image 52 | */ 53 | void LoadSourceImage(HWND hWnd, Bitmap **ppBitmap) 54 | { 55 | WCHAR szFile[MAX_PATH]; 56 | szFile[0] = L'\0'; 57 | 58 | OPENFILENAME ofn; 59 | ZeroMemory(&ofn, sizeof(OPENFILENAME)); 60 | ofn.lStructSize = sizeof(OPENFILENAME); 61 | ofn.hwndOwner = hWnd; 62 | ofn.lpstrFilter = L"Bitmap files (*.bmp;*.jpg;*.png;*.tif;*.gif)\0*.bmp;*.jpg;*.png;*.tif;*.gif\0All files (*.*)\0*.*\0\0"; 63 | ofn.lpstrFile = szFile; 64 | ofn.nMaxFile = sizeof(szFile) / sizeof(WCHAR); 65 | ofn.Flags = OFN_FILEMUSTEXIST; 66 | if (GetOpenFileName(&ofn)) 67 | { 68 | delete *ppBitmap; 69 | *ppBitmap = NULL; 70 | 71 | *ppBitmap = Bitmap::FromFile(szFile); 72 | } 73 | } 74 | 75 | void UpdateResultImage(HWND hWnd, BOOL bRecreateResultBuffer) 76 | { 77 | UINT width = -1; 78 | UINT height = -1; 79 | 80 | if (bRecreateResultBuffer) 81 | { 82 | if (g_imgSource1) { 83 | width = g_imgSource1->GetWidth(); 84 | height = g_imgSource1->GetHeight(); 85 | } 86 | if (g_imgSource2) { 87 | width = min(width, g_imgSource2->GetWidth()); 88 | height = min(height, g_imgSource2->GetHeight()); 89 | } 90 | 91 | Bitmap *img = new Bitmap(width, height, PixelFormat32bppARGB); 92 | if (!img) 93 | return; 94 | 95 | delete g_imgResult; 96 | g_imgResult = img; 97 | } 98 | 99 | if (!g_imgResult) 100 | return; 101 | 102 | WCHAR szExpr[1024]; 103 | if (!GetDlgItemText(hWnd, IDC_EXPRESSION, szExpr, sizeof(szExpr) / sizeof(WCHAR))) 104 | return; 105 | 106 | width = g_imgResult->GetWidth(); 107 | height = g_imgResult->GetHeight(); 108 | if (width == 0 || height == 0) 109 | return; 110 | 111 | if (!g_imgSource1) 112 | g_imgSource1 = new Bitmap(width, height, PixelFormat32bppARGB); 113 | 114 | if (!g_imgSource2) 115 | g_imgSource2 = new Bitmap(width, height, PixelFormat32bppARGB); 116 | 117 | Rect rcLock(0, 0, width, height); 118 | 119 | BitmapData src1BmpData; 120 | ZeroMemory(&src1BmpData, sizeof(BitmapData)); 121 | if (g_imgSource1) 122 | g_imgSource1->LockBits(&rcLock, ImageLockModeWrite, PixelFormat32bppARGB, &src1BmpData); 123 | 124 | BitmapData src2BmpData; 125 | ZeroMemory(&src2BmpData, sizeof(BitmapData)); 126 | if (g_imgSource2) 127 | g_imgSource2->LockBits(&rcLock, ImageLockModeWrite, PixelFormat32bppARGB, &src2BmpData); 128 | 129 | BitmapData dstBmpData; 130 | ZeroMemory(&dstBmpData, sizeof(BitmapData)); 131 | g_imgResult->LockBits(&rcLock, ImageLockModeWrite, PixelFormat32bppARGB, &dstBmpData); 132 | 133 | LARGE_INTEGER start, end; 134 | ::QueryPerformanceCounter(&start); 135 | 136 | bool res = RenderJIT(szExpr, dstBmpData.Scan0, dstBmpData.Stride, src1BmpData.Scan0, src1BmpData.Stride, src2BmpData.Scan0, src2BmpData.Stride, width, height); 137 | 138 | ::QueryPerformanceCounter(&end); 139 | 140 | g_imgResult->UnlockBits(&dstBmpData); 141 | if (g_imgSource2) g_imgSource2->UnlockBits(&dstBmpData); 142 | if (g_imgSource1) g_imgSource1->UnlockBits(&dstBmpData); 143 | 144 | LARGE_INTEGER freq; 145 | ::QueryPerformanceFrequency(&freq); 146 | 147 | WCHAR szTitle[128]; 148 | if (res) 149 | swprintf(szTitle, 128, L"Pixel Calc - Render time %f ms", (double)(end.QuadPart - start.QuadPart) / (double)freq.QuadPart * 1000.0); 150 | else 151 | swprintf(szTitle, 128, L"Pixel Calc - Invalid expression!", (double)(end.QuadPart - start.QuadPart) / (double)freq.QuadPart * 1000.0); 152 | SetWindowText(hWnd, szTitle); 153 | 154 | RedrawWindow(hWnd, NULL, NULL, RDW_INVALIDATE | RDW_UPDATENOW | RDW_ERASE); 155 | } 156 | 157 | /* 158 | * Window procedure 159 | */ 160 | LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) 161 | { 162 | switch (message) 163 | { 164 | case WM_CREATE: 165 | CreateWindowEx(WS_EX_CLIENTEDGE, L"EDIT", L"src1 + src2", WS_CHILD | WS_CLIPCHILDREN | WS_VISIBLE, 2, 2, 100, 24, hWnd, (HMENU)IDC_EXPRESSION, g_hInstance, NULL); 166 | break; 167 | 168 | case WM_SIZE: 169 | SetWindowPos(GetDlgItem(hWnd, IDC_EXPRESSION), NULL, 2, 2, LOWORD(lParam) - 4, 24, SWP_NOZORDER); 170 | break; 171 | 172 | case WM_COMMAND: 173 | switch (LOWORD(wParam)) 174 | { 175 | case IDM_LOAD_SOURCE1: 176 | LoadSourceImage(hWnd, &g_imgSource1); 177 | UpdateResultImage(hWnd, TRUE); 178 | break; 179 | 180 | case IDM_LOAD_SOURCE2: 181 | LoadSourceImage(hWnd, &g_imgSource2); 182 | UpdateResultImage(hWnd, TRUE); 183 | break; 184 | 185 | case IDC_RENDER: 186 | UpdateResultImage(hWnd, FALSE); 187 | break; 188 | 189 | case IDM_EXIT: 190 | DestroyWindow(hWnd); 191 | break; 192 | 193 | default: 194 | return DefWindowProc(hWnd, message, wParam, lParam); 195 | } 196 | break; 197 | 198 | case WM_PAINT: 199 | { 200 | PAINTSTRUCT ps; 201 | HDC hdc = BeginPaint(hWnd, &ps); 202 | Graphics g(hdc); 203 | if (g_imgResult) 204 | g.DrawImage(g_imgResult, 0.0f, 30.0f); 205 | EndPaint(hWnd, &ps); 206 | } 207 | break; 208 | 209 | case WM_DESTROY: 210 | PostQuitMessage(0); 211 | break; 212 | 213 | default: 214 | return DefWindowProc(hWnd, message, wParam, lParam); 215 | } 216 | return 0; 217 | } 218 | 219 | int APIENTRY wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nCmdShow) 220 | { 221 | g_hInstance = hInstance; 222 | 223 | // Initialize GDI+. 224 | GdiplusStartupInput gdiplusStartupInput; 225 | ULONG_PTR gdiplusToken; 226 | GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL); 227 | 228 | WNDCLASSEX wcex; 229 | wcex.cbSize = sizeof(WNDCLASSEX); 230 | wcex.style = CS_HREDRAW | CS_VREDRAW; 231 | wcex.lpfnWndProc = WndProc; 232 | wcex.cbClsExtra = 0; 233 | wcex.cbWndExtra = 0; 234 | wcex.hInstance = hInstance; 235 | wcex.hIcon = NULL; 236 | wcex.hCursor = LoadCursor(NULL, IDC_ARROW); 237 | wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); 238 | wcex.lpszMenuName = NULL; 239 | wcex.lpszClassName = L"PIXEL_CALC"; 240 | wcex.hIconSm = NULL; 241 | RegisterClassEx(&wcex); 242 | 243 | HMENU hMenu = CreateMenu(); 244 | if (hMenu) 245 | { 246 | HMENU hFileMenu = CreateMenu(); 247 | InsertMenu(hFileMenu, -1, MF_BYPOSITION | MF_STRING, IDM_LOAD_SOURCE1, L"Load source 1"); 248 | InsertMenu(hFileMenu, -1, MF_BYPOSITION | MF_STRING, IDM_LOAD_SOURCE2, L"Load source 2"); 249 | InsertMenu(hFileMenu, -1, MF_BYPOSITION | MF_STRING, IDM_EXIT, L"&Exit"); 250 | InsertMenu(hMenu, -1, MF_BYPOSITION | MF_STRING | MF_POPUP, (UINT_PTR)hFileMenu, L"&File"); 251 | InsertMenu(hMenu, -1, MF_BYPOSITION | MF_STRING, IDC_RENDER, L"&Render"); 252 | } 253 | 254 | HWND hWnd = CreateWindow(L"PIXEL_CALC", L"Pixel Calc", WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, 0, CW_USEDEFAULT, 0, NULL, hMenu, hInstance, NULL); 255 | if (hWnd) 256 | { 257 | ShowWindow(hWnd, nCmdShow); 258 | UpdateWindow(hWnd); 259 | 260 | // Message Loop 261 | MSG msg; 262 | while (GetMessage(&msg, NULL, 0, 0)) 263 | { 264 | if (msg.message == WM_KEYDOWN && msg.wParam == VK_RETURN) 265 | PostMessage(hWnd, WM_COMMAND, MAKEWPARAM(IDC_RENDER, 0), 0); 266 | 267 | TranslateMessage(&msg); 268 | DispatchMessage(&msg); 269 | } 270 | } 271 | 272 | delete g_imgSource1; 273 | delete g_imgSource2; 274 | delete g_imgResult; 275 | GdiplusShutdown(gdiplusToken); 276 | 277 | return 0; 278 | } 279 | -------------------------------------------------------------------------------- /samples/pixel_calc/pixel_calc.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 17 | 18 | 19 | 20 | 21 | 28 | 31 | 34 | 37 | 40 | 43 | 56 | 59 | 62 | 65 | 74 | 77 | 80 | 83 | 86 | 89 | 92 | 95 | 98 | 99 | 107 | 110 | 113 | 116 | 119 | 122 | 132 | 135 | 138 | 141 | 152 | 155 | 158 | 161 | 164 | 167 | 170 | 173 | 176 | 177 | 184 | 187 | 190 | 193 | 196 | 200 | 213 | 216 | 219 | 222 | 231 | 234 | 237 | 240 | 243 | 246 | 249 | 252 | 255 | 256 | 264 | 267 | 270 | 273 | 276 | 280 | 290 | 293 | 296 | 299 | 310 | 313 | 316 | 319 | 322 | 325 | 328 | 331 | 334 | 335 | 336 | 337 | 338 | 339 | 344 | 347 | 348 | 351 | 352 | 353 | 356 | 359 | 360 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | -------------------------------------------------------------------------------- /samples/pixel_calc/pixel_calc_jit.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009, Hikaru Inoue, Akihiro Yamasaki, 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following 12 | // disclaimer in the documentation and/or other materials provided 13 | // with the distribution. 14 | // * The names of the contributors may not be used to endorse or promote 15 | // products derived from this software without specific prior written 16 | // permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "jitasm.h" 35 | 36 | 37 | class RenderExpr : public jitasm::function 38 | { 39 | private: 40 | struct Calculator : public boost::spirit::grammar 41 | { 42 | RenderExpr& renderExpr_; 43 | Calculator(RenderExpr& renderExpr) : renderExpr_(renderExpr) {} 44 | 45 | template 46 | struct definition 47 | { 48 | definition(Calculator const& self) 49 | { 50 | using namespace boost; 51 | using namespace boost::spirit; 52 | expression 53 | = term 54 | >> *( (L'+' >> term)[boost::bind(&do_add, ref(self.renderExpr_), _1, _2)] 55 | | (L'-' >> term)[boost::bind(&do_sub, ref(self.renderExpr_), _1, _2)] 56 | ); 57 | 58 | term 59 | = factor 60 | >> *( (L'*' >> factor)[boost::bind(&do_mul, ref(self.renderExpr_), _1, _2)] 61 | | (L'/' >> factor)[boost::bind(&do_div, ref(self.renderExpr_), _1, _2)] 62 | ); 63 | 64 | factor 65 | = real_p[boost::bind(&do_real, ref(self.renderExpr_), _1)] 66 | | chseq_p(L"src1")[boost::bind(&do_src1, ref(self.renderExpr_), _1, _2)] 67 | | chseq_p(L"src2")[boost::bind(&do_src2, ref(self.renderExpr_), _1, _2)] 68 | | L'(' >> expression >> L')' 69 | | (L'-' >> factor)[boost::bind(&do_neg, ref(self.renderExpr_), _1, _2)] 70 | | (L'+' >> factor); 71 | } 72 | 73 | boost::spirit::rule expression, term, factor; 74 | 75 | boost::spirit::rule const& start() const { return expression; } 76 | }; 77 | }; 78 | 79 | void do_add(const wchar_t *, const wchar_t *) 80 | { 81 | XmmReg reg1 = variableStack_.back(); 82 | variableStack_.pop_back(); 83 | XmmReg reg2 = variableStack_.back(); 84 | addps(reg2, reg1); 85 | } 86 | 87 | void do_sub(const wchar_t *, const wchar_t *) 88 | { 89 | XmmReg reg1 = variableStack_.back(); 90 | variableStack_.pop_back(); 91 | XmmReg reg2 = variableStack_.back(); 92 | subps(reg2, reg1); 93 | } 94 | 95 | void do_mul(const wchar_t *, const wchar_t *) 96 | { 97 | XmmReg reg1 = variableStack_.back(); 98 | variableStack_.pop_back(); 99 | XmmReg reg2 = variableStack_.back(); 100 | mulps(reg2, reg1); 101 | } 102 | 103 | void do_div(const wchar_t *, const wchar_t *) 104 | { 105 | XmmReg reg1 = variableStack_.back(); 106 | variableStack_.pop_back(); 107 | XmmReg reg2 = variableStack_.back(); 108 | divps(reg2, reg1); 109 | } 110 | 111 | void do_real(double val) 112 | { 113 | float fval = static_cast(val); 114 | mov(eax, *(unsigned int*)&fval); 115 | XmmReg var; 116 | movd(var, eax); 117 | shufps(var, var, 0); 118 | variableStack_.push_back(var); 119 | } 120 | 121 | void do_src(int i) 122 | { 123 | __declspec(align(16)) const static float factor8bpp[4] = {1.0f/255.0f, 1.0f/255.0f, 1.0f/255.0f, 1.0f/255.0f}; 124 | XmmReg src; 125 | movd(src, dword_ptr[i == 0 ? zsi : zbx]); 126 | punpcklbw(src, zero_); 127 | punpcklwd(src, zero_); 128 | cvtdq2ps(src, src); 129 | mov(zax, (uintptr_t)factor8bpp); 130 | mulps(src, xmmword_ptr[zax]); 131 | variableStack_.push_back(src); 132 | } 133 | 134 | void do_src1(const wchar_t *, const wchar_t *) 135 | { 136 | do_src(0); 137 | } 138 | 139 | void do_src2(const wchar_t *, const wchar_t *) 140 | { 141 | do_src(1); 142 | } 143 | 144 | void do_neg(const wchar_t *, const wchar_t *) 145 | { 146 | XmmReg var = variableStack_.back(); 147 | XmmReg tmp; 148 | xorps(tmp, tmp); 149 | subps(tmp, var); 150 | movaps(var, tmp); 151 | } 152 | 153 | public: 154 | RenderExpr(wchar_t *expr) : expr_(expr) {} 155 | 156 | void main(Addr dst, Addr dstSkip, Addr src1, Addr src1Skip, Addr src2, Addr src2Skip, Addr width, Addr height) 157 | { 158 | mov(zsi, ptr[src1]); 159 | mov(zbx, ptr[src2]); 160 | mov(zdi, ptr[dst]); 161 | xorps(zero_, zero_); 162 | 163 | L("LoopY"); 164 | { 165 | mov(ecx, dword_ptr[width]); 166 | 167 | L("LoopX"); 168 | { 169 | Calculator calc(*this); 170 | boost::spirit::parse_info info = boost::spirit::parse(expr_, calc, boost::spirit::space_p); 171 | if (!info.full) 172 | throw info; 173 | 174 | __declspec(align(16)) const static float factor8bpp[4] = {255.0f, 255.0f, 255.0f, 255.0f}; 175 | mov(zax, (uintptr_t)factor8bpp); 176 | XmmReg dstReg = variableStack_.back(); 177 | mulps(dstReg, xmmword_ptr[zax]); 178 | cvtps2dq(dstReg, dstReg); 179 | packssdw(dstReg, dstReg); 180 | packuswb(dstReg, dstReg); 181 | movd(eax, dstReg); 182 | movnti(dword_ptr[zdi], eax); 183 | 184 | add(zsi, 4); 185 | add(zbx, 4); 186 | add(zdi, 4); 187 | dec(ecx); 188 | jnz("LoopX"); 189 | } 190 | 191 | add(zsi, ptr[src1Skip]); 192 | add(zbx, ptr[src2Skip]); 193 | add(zdi, ptr[dstSkip]); 194 | dec(dword_ptr[height]); 195 | jnz("LoopY"); 196 | } 197 | } 198 | 199 | private: 200 | wchar_t *expr_; 201 | std::vector variableStack_; 202 | XmmReg zero_; 203 | }; 204 | 205 | bool RenderJIT(wchar_t *expr, void *dst, size_t dstStride, void *src1, size_t src1Stride, void *src2, size_t src2Stride, int width, int height) 206 | { 207 | try { 208 | RenderExpr renderExpr(expr); 209 | renderExpr(dst, dstStride - width * 4, src1, src1Stride - width * 4, src2, src2Stride - width * 4, width, height); 210 | 211 | //FILE *file = fopen("render.dmp", "wb"); 212 | //fwrite(renderExpr.GetCode(), 1, renderExpr.GetCodeSize(), file); 213 | //fclose(file); 214 | 215 | return true; 216 | } 217 | catch (...) { 218 | return false; 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /samples/pixel_calc/pixel_calc_jit.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009, Hikaru Inoue, Akihiro Yamasaki, 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following 12 | // disclaimer in the documentation and/or other materials provided 13 | // with the distribution. 14 | // * The names of the contributors may not be used to endorse or promote 15 | // products derived from this software without specific prior written 16 | // permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | bool RenderJIT(wchar_t *expr, void *dst, size_t dstStride, void *src1, size_t src1Stride, void *src2, size_t src2Stride, int width, int height); 31 | -------------------------------------------------------------------------------- /samples/samples.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 9.00 3 | # Visual Studio 2005 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example1", "example1\example1.vcproj", "{76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}" 5 | EndProject 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pixel_calc", "pixel_calc\pixel_calc.vcproj", "{3F1917AE-2DBD-426E-B86E-CD0CC50812AB}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fibonacci", "fibonacci\fibonacci.vcproj", "{39CA288F-4BF8-43F4-B34D-3268508B18D8}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tutorial1", "tutorial1\tutorial1.vcproj", "{D6F94489-9A36-4319-8A67-C8DA8E7AB33E}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug|Win32 = Debug|Win32 15 | Debug|x64 = Debug|x64 16 | Release|Win32 = Release|Win32 17 | Release|x64 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 20 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Debug|Win32.ActiveCfg = Debug|Win32 21 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Debug|Win32.Build.0 = Debug|Win32 22 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Debug|x64.ActiveCfg = Debug|x64 23 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Debug|x64.Build.0 = Debug|x64 24 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Release|Win32.ActiveCfg = Release|Win32 25 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Release|Win32.Build.0 = Release|Win32 26 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Release|x64.ActiveCfg = Release|x64 27 | {76D53BF5-EE20-4BA5-BB3B-0210EFD1857A}.Release|x64.Build.0 = Release|x64 28 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Debug|Win32.ActiveCfg = Debug|Win32 29 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Debug|Win32.Build.0 = Debug|Win32 30 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Debug|x64.ActiveCfg = Debug|x64 31 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Debug|x64.Build.0 = Debug|x64 32 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Release|Win32.ActiveCfg = Release|Win32 33 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Release|Win32.Build.0 = Release|Win32 34 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Release|x64.ActiveCfg = Release|x64 35 | {3F1917AE-2DBD-426E-B86E-CD0CC50812AB}.Release|x64.Build.0 = Release|x64 36 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Debug|Win32.ActiveCfg = Debug|Win32 37 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Debug|Win32.Build.0 = Debug|Win32 38 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Debug|x64.ActiveCfg = Debug|x64 39 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Debug|x64.Build.0 = Debug|x64 40 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Release|Win32.ActiveCfg = Release|Win32 41 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Release|Win32.Build.0 = Release|Win32 42 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Release|x64.ActiveCfg = Release|x64 43 | {39CA288F-4BF8-43F4-B34D-3268508B18D8}.Release|x64.Build.0 = Release|x64 44 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Debug|Win32.ActiveCfg = Debug|Win32 45 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Debug|Win32.Build.0 = Debug|Win32 46 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Debug|x64.ActiveCfg = Debug|x64 47 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Debug|x64.Build.0 = Debug|x64 48 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Release|Win32.ActiveCfg = Release|Win32 49 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Release|Win32.Build.0 = Release|Win32 50 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Release|x64.ActiveCfg = Release|x64 51 | {D6F94489-9A36-4319-8A67-C8DA8E7AB33E}.Release|x64.Build.0 = Release|x64 52 | EndGlobalSection 53 | GlobalSection(SolutionProperties) = preSolution 54 | HideSolutionNode = FALSE 55 | EndGlobalSection 56 | EndGlobal 57 | -------------------------------------------------------------------------------- /samples/tutorial1/Makefile: -------------------------------------------------------------------------------- 1 | MODE := 32 2 | ifeq ($(shell uname -m), x86_64) 3 | MODE := 64 4 | endif 5 | ifeq ($(shell uname -s), Darwin) 6 | MODE := 64 7 | endif 8 | 9 | JITASM_INCLUDE := ../../ 10 | JITASM_H := $(JITASM_INCLUDE)jitasm.h 11 | TARGET := tutorial1 12 | OBJS := tutorial1.o 13 | CXX := g++ 14 | CXXFLAGS := -fno-operator-names -Wall -I$(JITASM_INCLUDE) 15 | ifeq ($(MODE), 64) 16 | CXXFLAGS += -m64 17 | LDFLAGS += -m64 18 | else 19 | CXXFLAGS += -m32 -march=i686 -mmmx -msse -msse2 20 | LDFLAGS += -m32 21 | endif 22 | ifeq ($(DEBUG), 1) 23 | CXXFLAGS += -g 24 | endif 25 | 26 | .PHONY : all 27 | all: $(TARGET) 28 | ./$(TARGET) 29 | 30 | $(TARGET): $(OBJS) 31 | $(CXX) $(LDFLAGS) -o $@ $^ 32 | 33 | tutorial1.o: tutorial1.cpp $(JITASM_H) 34 | $(CXX) $(CXXFLAGS) -o $@ -c $< 35 | 36 | .PHONY : clean 37 | clean: 38 | $(RM) $(TARGET) $(OBJS) 39 | -------------------------------------------------------------------------------- /samples/tutorial1/tutorial1.cpp: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "jitasm.h" 3 | 4 | // int add1(int arg1) 5 | // { 6 | // return arg1 + 1; 7 | // } 8 | struct add1 : jitasm::function 9 | { 10 | Result main(Addr a) 11 | { 12 | mov(ecx, dword_ptr[a]); 13 | add(ecx, 1); 14 | return ecx; 15 | } 16 | }; 17 | 18 | int main() 19 | { 20 | // Make function instance 21 | add1 f; 22 | 23 | // Runtime code genaration and run 24 | int result = f(99); 25 | 26 | printf("Result : %d\n", result); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /samples/tutorial1/tutorial1.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 14 | 17 | 18 | 19 | 20 | 21 | 28 | 31 | 34 | 37 | 40 | 43 | 56 | 59 | 62 | 65 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 93 | 96 | 97 | 105 | 108 | 111 | 114 | 117 | 120 | 130 | 133 | 136 | 139 | 148 | 151 | 154 | 157 | 160 | 163 | 166 | 169 | 172 | 173 | 180 | 183 | 186 | 189 | 192 | 196 | 209 | 212 | 215 | 218 | 225 | 228 | 231 | 234 | 237 | 240 | 243 | 246 | 249 | 250 | 258 | 261 | 264 | 267 | 270 | 274 | 284 | 287 | 290 | 293 | 302 | 305 | 308 | 311 | 314 | 317 | 320 | 323 | 326 | 327 | 328 | 329 | 330 | 331 | 336 | 339 | 340 | 341 | 346 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | MODE := 32 2 | ifeq ($(shell uname -m), x86_64) 3 | MODE := 64 4 | endif 5 | ifeq ($(shell uname -s), Darwin) 6 | MODE := 64 7 | endif 8 | 9 | JITASM_INCLUDE := ../ 10 | JITASM_H := $(JITASM_INCLUDE)jitasm.h 11 | TARGET := test 12 | OBJS := test.o test_backend.o 13 | CXX := g++ 14 | CXXFLAGS := -fno-operator-names -Wall -I$(JITASM_INCLUDE) 15 | ifeq ($(MODE), 64) 16 | CXXFLAGS += -m64 17 | LDFLAGS += -m64 18 | else 19 | CXXFLAGS += -m32 -march=i686 -mmmx -msse -msse2 20 | LDFLAGS += -m32 21 | endif 22 | ifeq ($(DEBUG), 1) 23 | CXXFLAGS += -g 24 | endif 25 | 26 | .PHONY : all 27 | all: $(TARGET) 28 | ./$(TARGET) 29 | 30 | $(TARGET): $(OBJS) 31 | $(CXX) $(LDFLAGS) -o $@ $^ 32 | 33 | test.o: test.cpp $(JITASM_H) 34 | $(CXX) $(CXXFLAGS) -o $@ -c $< 35 | 36 | test_backend.o: test_backend.cpp $(JITASM_H) 37 | $(CXX) $(CXXFLAGS) -o $@ -c $< 38 | 39 | .PHONY : clean 40 | clean: 41 | $(RM) $(TARGET) $(OBJS) 42 | -------------------------------------------------------------------------------- /test/nasm_x64.nas: -------------------------------------------------------------------------------- 1 | section .text 2 | align 16 3 | 4 | global nasm_test_mov_disp 5 | nasm_test_mov_disp: 6 | mov al, [1] 7 | mov cl, [1] 8 | mov ax, [1] 9 | mov cx, [1] 10 | mov eax, [1] 11 | mov ecx, [1] 12 | mov [1], al 13 | mov [1], cl 14 | mov [1], ax 15 | mov [1], cx 16 | mov [1], eax 17 | mov [1], ecx 18 | 19 | mov rax, [1] 20 | mov [1], rax 21 | mov rax, [qword 100000000h] 22 | mov [qword 100000000h], rax 23 | 24 | global nasm_test_fma 25 | nasm_test_fma: 26 | vfmadd132pd xmm1, xmm3, xmm5 27 | vfmadd132pd xmm1, xmm3, oword [edi] 28 | vfmadd132pd ymm1, ymm3, ymm5 29 | vfmadd132pd ymm1, ymm3, yword [edi] 30 | vfmadd213pd xmm1, xmm3, xmm5 31 | vfmadd213pd xmm1, xmm3, oword [edi] 32 | vfmadd213pd ymm1, ymm3, ymm5 33 | vfmadd213pd ymm1, ymm3, yword [edi] 34 | vfmadd231pd xmm1, xmm3, xmm5 35 | vfmadd231pd xmm1, xmm3, oword [edi] 36 | vfmadd231pd ymm1, ymm3, ymm5 37 | vfmadd231pd ymm1, ymm3, yword [edi] 38 | vfmadd132ps xmm1, xmm3, xmm5 39 | vfmadd132ps xmm1, xmm3, oword [edi] 40 | vfmadd132ps ymm1, ymm3, ymm5 41 | vfmadd132ps ymm1, ymm3, yword [edi] 42 | vfmadd213ps xmm1, xmm3, xmm5 43 | vfmadd213ps xmm1, xmm3, oword [edi] 44 | vfmadd213ps ymm1, ymm3, ymm5 45 | vfmadd213ps ymm1, ymm3, yword [edi] 46 | vfmadd231ps xmm1, xmm3, xmm5 47 | vfmadd231ps xmm1, xmm3, oword [edi] 48 | vfmadd231ps ymm1, ymm3, ymm5 49 | vfmadd231ps ymm1, ymm3, yword [edi] 50 | vfmadd132sd xmm1, xmm3, xmm5 51 | vfmadd132sd xmm1, xmm3, [edi] 52 | vfmadd213sd xmm1, xmm3, xmm5 53 | vfmadd213sd xmm1, xmm3, [edi] 54 | vfmadd231sd xmm1, xmm3, xmm5 55 | vfmadd231sd xmm1, xmm3, [edi] 56 | vfmadd132ss xmm1, xmm3, xmm5 57 | vfmadd132ss xmm1, xmm3, [edi] 58 | vfmadd213ss xmm1, xmm3, xmm5 59 | vfmadd213ss xmm1, xmm3, [edi] 60 | vfmadd231ss xmm1, xmm3, xmm5 61 | vfmadd231ss xmm1, xmm3, [edi] 62 | vfmaddsub132pd xmm1, xmm3, xmm5 63 | vfmaddsub132pd xmm1, xmm3, oword [edi] 64 | vfmaddsub132pd ymm1, ymm3, ymm5 65 | vfmaddsub132pd ymm1, ymm3, yword [edi] 66 | vfmaddsub213pd xmm1, xmm3, xmm5 67 | vfmaddsub213pd xmm1, xmm3, oword [edi] 68 | vfmaddsub213pd ymm1, ymm3, ymm5 69 | vfmaddsub213pd ymm1, ymm3, yword [edi] 70 | vfmaddsub231pd xmm1, xmm3, xmm5 71 | vfmaddsub231pd xmm1, xmm3, oword [edi] 72 | vfmaddsub231pd ymm1, ymm3, ymm5 73 | vfmaddsub231pd ymm1, ymm3, yword [edi] 74 | vfmaddsub132ps xmm1, xmm3, xmm5 75 | vfmaddsub132ps xmm1, xmm3, oword [edi] 76 | vfmaddsub132ps ymm1, ymm3, ymm5 77 | vfmaddsub132ps ymm1, ymm3, yword [edi] 78 | vfmaddsub213ps xmm1, xmm3, xmm5 79 | vfmaddsub213ps xmm1, xmm3, oword [edi] 80 | vfmaddsub213ps ymm1, ymm3, ymm5 81 | vfmaddsub213ps ymm1, ymm3, yword [edi] 82 | vfmaddsub231ps xmm1, xmm3, xmm5 83 | vfmaddsub231ps xmm1, xmm3, oword [edi] 84 | vfmaddsub231ps ymm1, ymm3, ymm5 85 | vfmaddsub231ps ymm1, ymm3, yword [edi] 86 | vfmsubadd132pd xmm1, xmm3, xmm5 87 | vfmsubadd132pd xmm1, xmm3, oword [edi] 88 | vfmsubadd132pd ymm1, ymm3, ymm5 89 | vfmsubadd132pd ymm1, ymm3, yword [edi] 90 | vfmsubadd213pd xmm1, xmm3, xmm5 91 | vfmsubadd213pd xmm1, xmm3, oword [edi] 92 | vfmsubadd213pd ymm1, ymm3, ymm5 93 | vfmsubadd213pd ymm1, ymm3, yword [edi] 94 | vfmsubadd231pd xmm1, xmm3, xmm5 95 | vfmsubadd231pd xmm1, xmm3, oword [edi] 96 | vfmsubadd231pd ymm1, ymm3, ymm5 97 | vfmsubadd231pd ymm1, ymm3, yword [edi] 98 | vfmsubadd132ps xmm1, xmm3, xmm5 99 | vfmsubadd132ps xmm1, xmm3, oword [edi] 100 | vfmsubadd132ps ymm1, ymm3, ymm5 101 | vfmsubadd132ps ymm1, ymm3, yword [edi] 102 | vfmsubadd213ps xmm1, xmm3, xmm5 103 | vfmsubadd213ps xmm1, xmm3, oword [edi] 104 | vfmsubadd213ps ymm1, ymm3, ymm5 105 | vfmsubadd213ps ymm1, ymm3, yword [edi] 106 | vfmsubadd231ps xmm1, xmm3, xmm5 107 | vfmsubadd231ps xmm1, xmm3, oword [edi] 108 | vfmsubadd231ps ymm1, ymm3, ymm5 109 | vfmsubadd231ps ymm1, ymm3, yword [edi] 110 | vfmsub132pd xmm1, xmm3, xmm5 111 | vfmsub132pd xmm1, xmm3, oword [edi] 112 | vfmsub132pd ymm1, ymm3, ymm5 113 | vfmsub132pd ymm1, ymm3, yword [edi] 114 | vfmsub213pd xmm1, xmm3, xmm5 115 | vfmsub213pd xmm1, xmm3, oword [edi] 116 | vfmsub213pd ymm1, ymm3, ymm5 117 | vfmsub213pd ymm1, ymm3, yword [edi] 118 | vfmsub231pd xmm1, xmm3, xmm5 119 | vfmsub231pd xmm1, xmm3, oword [edi] 120 | vfmsub231pd ymm1, ymm3, ymm5 121 | vfmsub231pd ymm1, ymm3, yword [edi] 122 | vfmsub132ps xmm1, xmm3, xmm5 123 | vfmsub132ps xmm1, xmm3, oword [edi] 124 | vfmsub132ps ymm1, ymm3, ymm5 125 | vfmsub132ps ymm1, ymm3, yword [edi] 126 | vfmsub213ps xmm1, xmm3, xmm5 127 | vfmsub213ps xmm1, xmm3, oword [edi] 128 | vfmsub213ps ymm1, ymm3, ymm5 129 | vfmsub213ps ymm1, ymm3, yword [edi] 130 | vfmsub231ps xmm1, xmm3, xmm5 131 | vfmsub231ps xmm1, xmm3, oword [edi] 132 | vfmsub231ps ymm1, ymm3, ymm5 133 | vfmsub231ps ymm1, ymm3, yword [edi] 134 | vfmsub132sd xmm1, xmm3, xmm5 135 | vfmsub132sd xmm1, xmm3, [edi] 136 | vfmsub213sd xmm1, xmm3, xmm5 137 | vfmsub213sd xmm1, xmm3, [edi] 138 | vfmsub231sd xmm1, xmm3, xmm5 139 | vfmsub231sd xmm1, xmm3, [edi] 140 | vfmsub132ss xmm1, xmm3, xmm5 141 | vfmsub132ss xmm1, xmm3, [edi] 142 | vfmsub213ss xmm1, xmm3, xmm5 143 | vfmsub213ss xmm1, xmm3, [edi] 144 | vfmsub231ss xmm1, xmm3, xmm5 145 | vfmsub231ss xmm1, xmm3, [edi] 146 | vfnmadd132pd xmm1, xmm3, xmm5 147 | vfnmadd132pd xmm1, xmm3, oword [edi] 148 | vfnmadd132pd ymm1, ymm3, ymm5 149 | vfnmadd132pd ymm1, ymm3, yword [edi] 150 | vfnmadd213pd xmm1, xmm3, xmm5 151 | vfnmadd213pd xmm1, xmm3, oword [edi] 152 | vfnmadd213pd ymm1, ymm3, ymm5 153 | vfnmadd213pd ymm1, ymm3, yword [edi] 154 | vfnmadd231pd xmm1, xmm3, xmm5 155 | vfnmadd231pd xmm1, xmm3, oword [edi] 156 | vfnmadd231pd ymm1, ymm3, ymm5 157 | vfnmadd231pd ymm1, ymm3, yword [edi] 158 | vfnmadd132ps xmm1, xmm3, xmm5 159 | vfnmadd132ps xmm1, xmm3, oword [edi] 160 | vfnmadd132ps ymm1, ymm3, ymm5 161 | vfnmadd132ps ymm1, ymm3, yword [edi] 162 | vfnmadd213ps xmm1, xmm3, xmm5 163 | vfnmadd213ps xmm1, xmm3, oword [edi] 164 | vfnmadd213ps ymm1, ymm3, ymm5 165 | vfnmadd213ps ymm1, ymm3, yword [edi] 166 | vfnmadd231ps xmm1, xmm3, xmm5 167 | vfnmadd231ps xmm1, xmm3, oword [edi] 168 | vfnmadd231ps ymm1, ymm3, ymm5 169 | vfnmadd231ps ymm1, ymm3, yword [edi] 170 | vfnmadd132sd xmm1, xmm3, xmm5 171 | vfnmadd132sd xmm1, xmm3, [edi] 172 | vfnmadd213sd xmm1, xmm3, xmm5 173 | vfnmadd213sd xmm1, xmm3, [edi] 174 | vfnmadd231sd xmm1, xmm3, xmm5 175 | vfnmadd231sd xmm1, xmm3, [edi] 176 | vfnmadd132ss xmm1, xmm3, xmm5 177 | vfnmadd132ss xmm1, xmm3, [edi] 178 | vfnmadd213ss xmm1, xmm3, xmm5 179 | vfnmadd213ss xmm1, xmm3, [edi] 180 | vfnmadd231ss xmm1, xmm3, xmm5 181 | vfnmadd231ss xmm1, xmm3, [edi] 182 | vfnmsub132pd xmm1, xmm3, xmm5 183 | vfnmsub132pd xmm1, xmm3, oword [edi] 184 | vfnmsub132pd ymm1, ymm3, ymm5 185 | vfnmsub132pd ymm1, ymm3, yword [edi] 186 | vfnmsub213pd xmm1, xmm3, xmm5 187 | vfnmsub213pd xmm1, xmm3, oword [edi] 188 | vfnmsub213pd ymm1, ymm3, ymm5 189 | vfnmsub213pd ymm1, ymm3, yword [edi] 190 | vfnmsub231pd xmm1, xmm3, xmm5 191 | vfnmsub231pd xmm1, xmm3, oword [edi] 192 | vfnmsub231pd ymm1, ymm3, ymm5 193 | vfnmsub231pd ymm1, ymm3, yword [edi] 194 | vfnmsub132ps xmm1, xmm3, xmm5 195 | vfnmsub132ps xmm1, xmm3, oword [edi] 196 | vfnmsub132ps ymm1, ymm3, ymm5 197 | vfnmsub132ps ymm1, ymm3, yword [edi] 198 | vfnmsub213ps xmm1, xmm3, xmm5 199 | vfnmsub213ps xmm1, xmm3, oword [edi] 200 | vfnmsub213ps ymm1, ymm3, ymm5 201 | vfnmsub213ps ymm1, ymm3, yword [edi] 202 | vfnmsub231ps xmm1, xmm3, xmm5 203 | vfnmsub231ps xmm1, xmm3, oword [edi] 204 | vfnmsub231ps ymm1, ymm3, ymm5 205 | vfnmsub231ps ymm1, ymm3, yword [edi] 206 | vfnmsub132sd xmm1, xmm3, xmm5 207 | vfnmsub132sd xmm1, xmm3, [edi] 208 | vfnmsub213sd xmm1, xmm3, xmm5 209 | vfnmsub213sd xmm1, xmm3, [edi] 210 | vfnmsub231sd xmm1, xmm3, xmm5 211 | vfnmsub231sd xmm1, xmm3, [edi] 212 | vfnmsub132ss xmm1, xmm3, xmm5 213 | vfnmsub132ss xmm1, xmm3, [edi] 214 | vfnmsub213ss xmm1, xmm3, xmm5 215 | vfnmsub213ss xmm1, xmm3, [edi] 216 | vfnmsub231ss xmm1, xmm3, xmm5 217 | vfnmsub231ss xmm1, xmm3, [edi] 218 | 219 | global nasm_test_f16c 220 | nasm_test_f16c: 221 | rdfsbase ecx 222 | rdfsbase rcx 223 | rdgsbase ecx 224 | rdgsbase rcx 225 | rdrand cx 226 | rdrand ecx 227 | rdrand rcx 228 | wrfsbase ecx 229 | wrfsbase rcx 230 | wrgsbase ecx 231 | wrgsbase rcx 232 | vcvtph2ps ymm1, xmm3 233 | vcvtph2ps ymm1, oword[edx] 234 | vcvtph2ps xmm1, xmm3 235 | vcvtph2ps xmm1, qword[edx] 236 | vcvtps2ph xmm1, ymm3, 5 237 | vcvtps2ph oword[edx], ymm3, 5 238 | vcvtps2ph xmm1, xmm3, 5 239 | vcvtps2ph qword[edx], xmm3, 5 240 | 241 | global nasm_test_xop 242 | nasm_test_xop: 243 | vfrczpd xmm1, xmm3 244 | vfrczpd xmm1, oword [edx] 245 | vfrczpd ymm1, ymm3 246 | vfrczpd ymm1, yword [edx] 247 | vfrczps xmm1, xmm3 248 | vfrczps xmm1, oword [edx] 249 | vfrczps ymm1, ymm3 250 | vfrczps ymm1, yword [edx] 251 | vfrczsd xmm1, xmm3 252 | vfrczsd xmm1, qword [edx] 253 | vfrczss xmm1, xmm3 254 | vfrczss xmm1, dword [edx] 255 | vpcmov xmm1, xmm3, xmm5, xmm7 256 | vpcmov xmm1, xmm3, oword [esp], xmm7 257 | vpcmov xmm1, xmm3, xmm5, oword [esi] 258 | vpcmov ymm1, ymm3, ymm5, ymm7 259 | vpcmov ymm1, ymm3, yword [esp], ymm7 260 | vpcmov ymm1, ymm3, ymm5, yword [esi] 261 | vpcomb xmm1, xmm3, xmm5, 2 262 | vpcomb xmm1, xmm3, oword [esp], 2 263 | vpcomd xmm1, xmm3, xmm5, 2 264 | vpcomd xmm1, xmm3, oword [esp], 2 265 | vpcomq xmm1, xmm3, xmm5, 2 266 | vpcomq xmm1, xmm3, oword [esp], 2 267 | ;vpcomub xmm1, xmm3, xmm5, 2 268 | ;vpcomub xmm1, xmm3, oword [esp], 2 269 | ;vpcomud xmm1, xmm3, xmm5, 2 270 | ;vpcomud xmm1, xmm3, oword [esp], 2 271 | ;vpcomuq xmm1, xmm3, xmm5, 2 272 | ;vpcomuq xmm1, xmm3, oword [esp], 2 273 | ;vpcomuw xmm1, xmm3, xmm5, 2 274 | ;vpcomuw xmm1, xmm3, oword [esp], 2 275 | ;vpcomw xmm1, xmm3, xmm5, 2 276 | ;vpcomw xmm1, xmm3, oword [esp], 2 277 | ;vpermil2pd xmm1, xmm3, xmm5, xmm7 278 | ;vpermil2pd xmm1, xmm3, oword [esp], xmm7 279 | ;vpermil2pd xmm1, xmm3, xmm5, oword [esi] 280 | ;vpermil2pd ymm1, ymm3, ymm5, ymm7 281 | ;vpermil2pd ymm1, ymm3, yword [esp], ymm7 282 | ;vpermil2pd ymm1, ymm3, ymm5, yword [esi] 283 | ;vpermil2ps xmm1, xmm3, xmm5, xmm7 284 | ;vpermil2ps xmm1, xmm3, oword [esp], xmm7 285 | ;vpermil2ps xmm1, xmm3, xmm5, oword [esi] 286 | ;vpermil2ps ymm1, ymm3, ymm5, ymm7 287 | ;vpermil2ps ymm1, ymm3, yword [esp], ymm7 288 | ;vpermil2ps ymm1, ymm3, ymm5, yword [esi] 289 | vphaddbd xmm1, xmm3 290 | vphaddbd xmm1, oword [edx] 291 | vphaddbq xmm1, xmm3 292 | vphaddbq xmm1, oword [edx] 293 | vphaddbw xmm1, xmm3 294 | vphaddbw xmm1, oword [edx] 295 | vphadddq xmm1, xmm3 296 | vphadddq xmm1, oword [edx] 297 | vphaddubd xmm1, xmm3 298 | vphaddubd xmm1, oword [edx] 299 | vphaddubq xmm1, xmm3 300 | vphaddubq xmm1, oword [edx] 301 | vphaddubw xmm1, xmm3 302 | vphaddubw xmm1, oword [edx] 303 | ;vphaddudq xmm1, xmm3 304 | ;vphaddudq xmm1, oword [edx] 305 | vphadduwd xmm1, xmm3 306 | vphadduwd xmm1, oword [edx] 307 | vphadduwq xmm1, xmm3 308 | vphadduwq xmm1, oword [edx] 309 | vphaddwd xmm1, xmm3 310 | vphaddwd xmm1, oword [edx] 311 | vphaddwq xmm1, xmm3 312 | vphaddwq xmm1, oword [edx] 313 | vphsubbw xmm1, xmm3 314 | vphsubbw xmm1, oword [edx] 315 | ;vphsubdq xmm1, xmm3 316 | ;vphsubdq xmm1, oword [edx] 317 | vphsubwd xmm1, xmm3 318 | vphsubwd xmm1, oword [edx] 319 | vpmacsdd xmm1, xmm3, xmm5, xmm7 320 | vpmacsdd xmm1, xmm3, oword [esp], xmm7 321 | vpmacsdqh xmm1, xmm3, xmm5, xmm7 322 | vpmacsdqh xmm1, xmm3, oword [esp], xmm7 323 | vpmacsdql xmm1, xmm3, xmm5, xmm7 324 | vpmacsdql xmm1, xmm3, oword [esp], xmm7 325 | vpmacssdd xmm1, xmm3, xmm5, xmm7 326 | vpmacssdd xmm1, xmm3, oword [esp], xmm7 327 | vpmacssdqh xmm1, xmm3, xmm5, xmm7 328 | vpmacssdqh xmm1, xmm3, oword [esp], xmm7 329 | vpmacssdql xmm1, xmm3, xmm5, xmm7 330 | vpmacssdql xmm1, xmm3, oword [esp], xmm7 331 | vpmacsswd xmm1, xmm3, xmm5, xmm7 332 | vpmacsswd xmm1, xmm3, oword [esp], xmm7 333 | vpmacssww xmm1, xmm3, xmm5, xmm7 334 | vpmacssww xmm1, xmm3, oword [esp], xmm7 335 | vpmacswd xmm1, xmm3, xmm5, xmm7 336 | vpmacswd xmm1, xmm3, oword [esp], xmm7 337 | vpmacsww xmm1, xmm3, xmm5, xmm7 338 | vpmacsww xmm1, xmm3, oword [esp], xmm7 339 | vpmadcsswd xmm1, xmm3, xmm5, xmm7 340 | vpmadcsswd xmm1, xmm3, oword [esp], xmm7 341 | vpmadcswd xmm1, xmm3, xmm5, xmm7 342 | vpmadcswd xmm1, xmm3, oword [esp], xmm7 343 | vpperm xmm1, xmm3, xmm5, xmm7 344 | vpperm xmm1, xmm3, oword [esp], xmm7 345 | vpperm xmm1, xmm3, xmm5, oword [esi] 346 | vprotb xmm1, xmm3, xmm5 347 | vprotb xmm1, oword [edx], xmm5 348 | vprotb xmm1, xmm3, oword [esp] 349 | vprotb xmm1, xmm3, 2 350 | vprotb xmm1, oword [edx], 2 351 | vprotd xmm1, xmm3, xmm5 352 | vprotd xmm1, oword [edx], xmm5 353 | vprotd xmm1, xmm3, oword [esp] 354 | vprotd xmm1, xmm3, 2 355 | vprotd xmm1, oword [edx], 2 356 | vprotq xmm1, xmm3, xmm5 357 | vprotq xmm1, oword [edx], xmm5 358 | vprotq xmm1, xmm3, oword [esp] 359 | vprotq xmm1, xmm3, 2 360 | vprotq xmm1, oword [edx], 2 361 | vprotw xmm1, xmm3, xmm5 362 | vprotw xmm1, oword [edx], xmm5 363 | vprotw xmm1, xmm3, oword [esp] 364 | vprotw xmm1, xmm3, 2 365 | vprotw xmm1, oword [edx], 2 366 | vpshab xmm1, xmm3, xmm5 367 | vpshab xmm1, oword [edx], xmm5 368 | vpshab xmm1, xmm3, oword [esp] 369 | vpshad xmm1, xmm3, xmm5 370 | vpshad xmm1, oword [edx], xmm5 371 | vpshad xmm1, xmm3, oword [esp] 372 | vpshaq xmm1, xmm3, xmm5 373 | vpshaq xmm1, oword [edx], xmm5 374 | vpshaq xmm1, xmm3, oword [esp] 375 | vpshaw xmm1, xmm3, xmm5 376 | vpshaw xmm1, oword [edx], xmm5 377 | vpshaw xmm1, xmm3, oword [esp] 378 | vpshlb xmm1, xmm3, xmm5 379 | vpshlb xmm1, oword [edx], xmm5 380 | vpshlb xmm1, xmm3, oword [esp] 381 | vpshld xmm1, xmm3, xmm5 382 | vpshld xmm1, oword [edx], xmm5 383 | vpshld xmm1, xmm3, oword [esp] 384 | vpshlq xmm1, xmm3, xmm5 385 | vpshlq xmm1, oword [edx], xmm5 386 | vpshlq xmm1, xmm3, oword [esp] 387 | vpshlw xmm1, xmm3, xmm5 388 | vpshlw xmm1, oword [edx], xmm5 389 | vpshlw xmm1, xmm3, oword [esp] 390 | 391 | global nasm_test_fma4 392 | nasm_test_fma4: 393 | vfmaddpd xmm1, xmm3, xmm5, xmm7 394 | vfmaddpd xmm1, xmm3, oword [esp], xmm7 395 | vfmaddpd xmm1, xmm3, xmm5, oword [esi] 396 | vfmaddpd ymm1, ymm3, ymm5, ymm7 397 | vfmaddpd ymm1, ymm3, yword [esp], ymm7 398 | vfmaddpd ymm1, ymm3, ymm5, yword [esi] 399 | vfmaddps xmm1, xmm3, xmm5, xmm7 400 | vfmaddps xmm1, xmm3, oword [esp], xmm7 401 | vfmaddps xmm1, xmm3, xmm5, oword [esi] 402 | vfmaddps ymm1, ymm3, ymm5, ymm7 403 | vfmaddps ymm1, ymm3, yword [esp], ymm7 404 | vfmaddps ymm1, ymm3, ymm5, yword [esi] 405 | vfmaddsd xmm1, xmm3, xmm5, xmm7 406 | vfmaddsd xmm1, xmm3, qword [esp], xmm7 407 | vfmaddsd xmm1, xmm3, xmm5, qword [esi] 408 | vfmaddss xmm1, xmm3, xmm5, xmm7 409 | vfmaddss xmm1, xmm3, dword [esp], xmm7 410 | vfmaddss xmm1, xmm3, xmm5, dword [esi] 411 | vfmaddsubpd xmm1, xmm3, xmm5, xmm7 412 | vfmaddsubpd xmm1, xmm3, oword [esp], xmm7 413 | vfmaddsubpd xmm1, xmm3, xmm5, oword [esi] 414 | vfmaddsubpd ymm1, ymm3, ymm5, ymm7 415 | vfmaddsubpd ymm1, ymm3, yword [esp], ymm7 416 | vfmaddsubpd ymm1, ymm3, ymm5, yword [esi] 417 | vfmaddsubps xmm1, xmm3, xmm5, xmm7 418 | vfmaddsubps xmm1, xmm3, oword [esp], xmm7 419 | vfmaddsubps xmm1, xmm3, xmm5, oword [esi] 420 | vfmaddsubps ymm1, ymm3, ymm5, ymm7 421 | vfmaddsubps ymm1, ymm3, yword [esp], ymm7 422 | vfmaddsubps ymm1, ymm3, ymm5, yword [esi] 423 | vfmsubaddpd xmm1, xmm3, xmm5, xmm7 424 | vfmsubaddpd xmm1, xmm3, oword [esp], xmm7 425 | vfmsubaddpd xmm1, xmm3, xmm5, oword [esi] 426 | vfmsubaddpd ymm1, ymm3, ymm5, ymm7 427 | vfmsubaddpd ymm1, ymm3, yword [esp], ymm7 428 | vfmsubaddpd ymm1, ymm3, ymm5, yword [esi] 429 | vfmsubaddps xmm1, xmm3, xmm5, xmm7 430 | vfmsubaddps xmm1, xmm3, oword [esp], xmm7 431 | vfmsubaddps xmm1, xmm3, xmm5, oword [esi] 432 | vfmsubaddps ymm1, ymm3, ymm5, ymm7 433 | vfmsubaddps ymm1, ymm3, yword [esp], ymm7 434 | vfmsubaddps ymm1, ymm3, ymm5, yword [esi] 435 | vfmsubpd xmm1, xmm3, xmm5, xmm7 436 | vfmsubpd xmm1, xmm3, oword [esp], xmm7 437 | vfmsubpd xmm1, xmm3, xmm5, oword [esi] 438 | vfmsubpd ymm1, ymm3, ymm5, ymm7 439 | vfmsubpd ymm1, ymm3, yword [esp], ymm7 440 | vfmsubpd ymm1, ymm3, ymm5, yword [esi] 441 | vfmsubps xmm1, xmm3, xmm5, xmm7 442 | vfmsubps xmm1, xmm3, oword [esp], xmm7 443 | vfmsubps xmm1, xmm3, xmm5, oword [esi] 444 | vfmsubps ymm1, ymm3, ymm5, ymm7 445 | vfmsubps ymm1, ymm3, yword [esp], ymm7 446 | vfmsubps ymm1, ymm3, ymm5, yword [esi] 447 | vfmsubsd xmm1, xmm3, xmm5, xmm7 448 | vfmsubsd xmm1, xmm3, qword [esp], xmm7 449 | vfmsubsd xmm1, xmm3, xmm5, qword [esi] 450 | vfmsubss xmm1, xmm3, xmm5, xmm7 451 | vfmsubss xmm1, xmm3, dword [esp], xmm7 452 | vfmsubss xmm1, xmm3, xmm5, dword [esi] 453 | vfnmaddpd xmm1, xmm3, xmm5, xmm7 454 | vfnmaddpd xmm1, xmm3, oword [esp], xmm7 455 | vfnmaddpd xmm1, xmm3, xmm5, oword [esi] 456 | vfnmaddpd ymm1, ymm3, ymm5, ymm7 457 | vfnmaddpd ymm1, ymm3, yword [esp], ymm7 458 | vfnmaddpd ymm1, ymm3, ymm5, yword [esi] 459 | vfnmaddps xmm1, xmm3, xmm5, xmm7 460 | vfnmaddps xmm1, xmm3, oword [esp], xmm7 461 | vfnmaddps xmm1, xmm3, xmm5, oword [esi] 462 | vfnmaddps ymm1, ymm3, ymm5, ymm7 463 | vfnmaddps ymm1, ymm3, yword [esp], ymm7 464 | vfnmaddps ymm1, ymm3, ymm5, yword [esi] 465 | vfnmaddsd xmm1, xmm3, xmm5, xmm7 466 | vfnmaddsd xmm1, xmm3, qword [esp], xmm7 467 | vfnmaddsd xmm1, xmm3, xmm5, qword [esi] 468 | vfnmaddss xmm1, xmm3, xmm5, xmm7 469 | vfnmaddss xmm1, xmm3, dword [esp], xmm7 470 | vfnmaddss xmm1, xmm3, xmm5, dword [esi] 471 | vfnmsubpd xmm1, xmm3, xmm5, xmm7 472 | vfnmsubpd xmm1, xmm3, oword [esp], xmm7 473 | vfnmsubpd xmm1, xmm3, xmm5, oword [esi] 474 | vfnmsubpd ymm1, ymm3, ymm5, ymm7 475 | vfnmsubpd ymm1, ymm3, yword [esp], ymm7 476 | vfnmsubpd ymm1, ymm3, ymm5, yword [esi] 477 | vfnmsubps xmm1, xmm3, xmm5, xmm7 478 | vfnmsubps xmm1, xmm3, oword [esp], xmm7 479 | vfnmsubps xmm1, xmm3, xmm5, oword [esi] 480 | vfnmsubps ymm1, ymm3, ymm5, ymm7 481 | vfnmsubps ymm1, ymm3, yword [esp], ymm7 482 | vfnmsubps ymm1, ymm3, ymm5, yword [esi] 483 | vfnmsubsd xmm1, xmm3, xmm5, xmm7 484 | vfnmsubsd xmm1, xmm3, qword [esp], xmm7 485 | vfnmsubsd xmm1, xmm3, xmm5, qword [esi] 486 | vfnmsubss xmm1, xmm3, xmm5, xmm7 487 | vfnmsubss xmm1, xmm3, dword [esp], xmm7 488 | vfnmsubss xmm1, xmm3, xmm5, dword [esi] 489 | 490 | global nasm_test_bmi 491 | nasm_test_bmi: 492 | andn edi, ecx, eax 493 | andn edi, ecx, dword [eax] 494 | andn rdi, rcx, rax 495 | andn rdi, rcx, qword [rax] 496 | bextr edi, ecx, eax 497 | bextr edi, dword [ecx], eax 498 | bextr rdi, rcx, rax 499 | bextr rdi, qword [rcx], rax 500 | blsi edi, ecx 501 | blsi edi, dword [ecx] 502 | blsi rdi, rcx 503 | blsi rdi, qword [rcx] 504 | blsmsk edi, ecx 505 | blsmsk edi, dword [ecx] 506 | blsmsk rdi, rcx 507 | blsmsk rdi, qword [rcx] 508 | blsr edi, ecx 509 | blsr edi, dword [ecx] 510 | blsr rdi, rcx 511 | blsr rdi, qword [rcx] 512 | bzhi edi, ecx, eax 513 | bzhi edi, dword [ecx], eax 514 | bzhi rdi, rcx, rax 515 | bzhi rdi, qword [rcx], rax 516 | ; lzcnt di, cx 517 | ; lzcnt di, word [ecx] 518 | ; lzcnt edi, ecx 519 | ; lzcnt edi, dword [ecx] 520 | lzcnt rdi, rcx 521 | lzcnt rdi, qword [rcx] 522 | mulx edi, ecx, eax 523 | mulx edi, ecx, dword [eax] 524 | mulx rdi, rcx, rax 525 | mulx rdi, rcx, qword [rax] 526 | pdep edi, ecx, eax 527 | pdep edi, ecx, dword [eax] 528 | pdep rdi, rcx, rax 529 | pdep rdi, rcx, qword [rax] 530 | pext edi, ecx, eax 531 | pext edi, ecx, dword [eax] 532 | pext rdi, rcx, rax 533 | pext rdi, rcx, qword [rax] 534 | rorx edi, ecx, 1 535 | rorx edi, dword [ecx], 1 536 | rorx rdi, rcx, 1 537 | rorx rdi, qword [rcx], 1 538 | sarx edi, ecx, eax 539 | sarx edi, dword [ecx], eax 540 | sarx rdi, rcx, rax 541 | sarx rdi, qword [rcx], rax 542 | shlx edi, ecx, eax 543 | shlx edi, dword [ecx], eax 544 | shlx rdi, rcx, rax 545 | shlx rdi, qword [rcx], rax 546 | shrx edi, ecx, eax 547 | shrx edi, dword [ecx], eax 548 | shrx rdi, rcx, rax 549 | shrx rdi, qword [rcx], rax 550 | ; tzcnt di, cx 551 | ; tzcnt di, word [ecx] 552 | ; tzcnt edi, ecx 553 | ; tzcnt edi, dword [ecx] 554 | tzcnt rdi, rcx 555 | tzcnt rdi, qword [rcx] 556 | ;invpcid edi, oword [ecx] 557 | invpcid rdi, oword [rcx] 558 | 559 | global nasm_test_avx2_gather 560 | nasm_test_avx2_gather: 561 | vgatherdps xmm2, dword [ebp + xmm5 * 2 + 1], xmm3 562 | vgatherdps ymm5, dword [ebp + ymm5 * 2 + 1], ymm4 563 | vgatherqps xmm2, dword [ebp + xmm6 * 2 + 1], xmm3 564 | vgatherqps xmm2, dword [ebp + ymm6 * 2 + 1], xmm3 565 | vgatherdpd xmm2, qword [ebp + xmm5 * 2 + 1], xmm3 566 | vgatherdpd ymm5, qword [ebp + xmm5 * 2 + 1], ymm4 567 | vgatherqpd xmm2, qword [ebp + xmm6 * 2 + 1], xmm3 568 | vgatherqpd ymm5, qword [ebp + ymm6 * 2 + 1], ymm4 569 | vpgatherdd xmm2, dword [ebp + xmm5 * 2 + 1], xmm3 570 | vpgatherdd ymm5, dword [ebp + ymm5 * 2 + 1], ymm4 571 | vpgatherqd xmm2, dword [ebp + xmm6 * 2 + 1], xmm3 572 | vpgatherqd xmm2, dword [ebp + ymm6 * 2 + 1], xmm3 573 | vpgatherdq xmm2, qword [ebp + xmm5 * 2 + 1], xmm3 574 | vpgatherdq ymm5, qword [ebp + xmm5 * 2 + 1], ymm4 575 | vpgatherqq xmm2, qword [ebp + xmm6 * 2 + 1], xmm3 576 | vpgatherqq ymm5, qword [ebp + ymm6 * 2 + 1], ymm4 577 | 578 | global nasm_test_regalloc_vsib 579 | nasm_test_regalloc_vsib: 580 | vpxor xmm3, xmm3, xmm3 581 | vgatherdps xmm0, dword [ebp + xmm3 * 2 + 1], xmm2 582 | vgatherqps xmm0, dword [ebp + xmm3 * 2 + 1], xmm2 583 | vxorps ymm2, ymm2, ymm2 584 | vgatherdps ymm1, dword [ebp + ymm2 * 2 + 1], ymm0 585 | vgatherqps xmm1, dword [ebp + ymm2 * 2 + 1], xmm0 586 | -------------------------------------------------------------------------------- /test/nasm_x86.nas: -------------------------------------------------------------------------------- 1 | section .text 2 | ;align 16 3 | 4 | global _nasm_test_mov_disp 5 | _nasm_test_mov_disp: 6 | mov al, [1] 7 | mov cl, [1] 8 | mov ax, [1] 9 | mov cx, [1] 10 | mov eax, [1] 11 | mov ecx, [1] 12 | mov [1], al 13 | mov [1], cl 14 | mov [1], ax 15 | mov [1], cx 16 | mov [1], eax 17 | mov [1], ecx 18 | 19 | global _nasm_test_fma 20 | _nasm_test_fma: 21 | vfmadd132pd xmm1, xmm3, xmm5 22 | vfmadd132pd xmm1, xmm3, oword [edi] 23 | vfmadd132pd ymm1, ymm3, ymm5 24 | vfmadd132pd ymm1, ymm3, yword [edi] 25 | vfmadd213pd xmm1, xmm3, xmm5 26 | vfmadd213pd xmm1, xmm3, oword [edi] 27 | vfmadd213pd ymm1, ymm3, ymm5 28 | vfmadd213pd ymm1, ymm3, yword [edi] 29 | vfmadd231pd xmm1, xmm3, xmm5 30 | vfmadd231pd xmm1, xmm3, oword [edi] 31 | vfmadd231pd ymm1, ymm3, ymm5 32 | vfmadd231pd ymm1, ymm3, yword [edi] 33 | vfmadd132ps xmm1, xmm3, xmm5 34 | vfmadd132ps xmm1, xmm3, oword [edi] 35 | vfmadd132ps ymm1, ymm3, ymm5 36 | vfmadd132ps ymm1, ymm3, yword [edi] 37 | vfmadd213ps xmm1, xmm3, xmm5 38 | vfmadd213ps xmm1, xmm3, oword [edi] 39 | vfmadd213ps ymm1, ymm3, ymm5 40 | vfmadd213ps ymm1, ymm3, yword [edi] 41 | vfmadd231ps xmm1, xmm3, xmm5 42 | vfmadd231ps xmm1, xmm3, oword [edi] 43 | vfmadd231ps ymm1, ymm3, ymm5 44 | vfmadd231ps ymm1, ymm3, yword [edi] 45 | vfmadd132sd xmm1, xmm3, xmm5 46 | vfmadd132sd xmm1, xmm3, [edi] 47 | vfmadd213sd xmm1, xmm3, xmm5 48 | vfmadd213sd xmm1, xmm3, [edi] 49 | vfmadd231sd xmm1, xmm3, xmm5 50 | vfmadd231sd xmm1, xmm3, [edi] 51 | vfmadd132ss xmm1, xmm3, xmm5 52 | vfmadd132ss xmm1, xmm3, [edi] 53 | vfmadd213ss xmm1, xmm3, xmm5 54 | vfmadd213ss xmm1, xmm3, [edi] 55 | vfmadd231ss xmm1, xmm3, xmm5 56 | vfmadd231ss xmm1, xmm3, [edi] 57 | vfmaddsub132pd xmm1, xmm3, xmm5 58 | vfmaddsub132pd xmm1, xmm3, oword [edi] 59 | vfmaddsub132pd ymm1, ymm3, ymm5 60 | vfmaddsub132pd ymm1, ymm3, yword [edi] 61 | vfmaddsub213pd xmm1, xmm3, xmm5 62 | vfmaddsub213pd xmm1, xmm3, oword [edi] 63 | vfmaddsub213pd ymm1, ymm3, ymm5 64 | vfmaddsub213pd ymm1, ymm3, yword [edi] 65 | vfmaddsub231pd xmm1, xmm3, xmm5 66 | vfmaddsub231pd xmm1, xmm3, oword [edi] 67 | vfmaddsub231pd ymm1, ymm3, ymm5 68 | vfmaddsub231pd ymm1, ymm3, yword [edi] 69 | vfmaddsub132ps xmm1, xmm3, xmm5 70 | vfmaddsub132ps xmm1, xmm3, oword [edi] 71 | vfmaddsub132ps ymm1, ymm3, ymm5 72 | vfmaddsub132ps ymm1, ymm3, yword [edi] 73 | vfmaddsub213ps xmm1, xmm3, xmm5 74 | vfmaddsub213ps xmm1, xmm3, oword [edi] 75 | vfmaddsub213ps ymm1, ymm3, ymm5 76 | vfmaddsub213ps ymm1, ymm3, yword [edi] 77 | vfmaddsub231ps xmm1, xmm3, xmm5 78 | vfmaddsub231ps xmm1, xmm3, oword [edi] 79 | vfmaddsub231ps ymm1, ymm3, ymm5 80 | vfmaddsub231ps ymm1, ymm3, yword [edi] 81 | vfmsubadd132pd xmm1, xmm3, xmm5 82 | vfmsubadd132pd xmm1, xmm3, oword [edi] 83 | vfmsubadd132pd ymm1, ymm3, ymm5 84 | vfmsubadd132pd ymm1, ymm3, yword [edi] 85 | vfmsubadd213pd xmm1, xmm3, xmm5 86 | vfmsubadd213pd xmm1, xmm3, oword [edi] 87 | vfmsubadd213pd ymm1, ymm3, ymm5 88 | vfmsubadd213pd ymm1, ymm3, yword [edi] 89 | vfmsubadd231pd xmm1, xmm3, xmm5 90 | vfmsubadd231pd xmm1, xmm3, oword [edi] 91 | vfmsubadd231pd ymm1, ymm3, ymm5 92 | vfmsubadd231pd ymm1, ymm3, yword [edi] 93 | vfmsubadd132ps xmm1, xmm3, xmm5 94 | vfmsubadd132ps xmm1, xmm3, oword [edi] 95 | vfmsubadd132ps ymm1, ymm3, ymm5 96 | vfmsubadd132ps ymm1, ymm3, yword [edi] 97 | vfmsubadd213ps xmm1, xmm3, xmm5 98 | vfmsubadd213ps xmm1, xmm3, oword [edi] 99 | vfmsubadd213ps ymm1, ymm3, ymm5 100 | vfmsubadd213ps ymm1, ymm3, yword [edi] 101 | vfmsubadd231ps xmm1, xmm3, xmm5 102 | vfmsubadd231ps xmm1, xmm3, oword [edi] 103 | vfmsubadd231ps ymm1, ymm3, ymm5 104 | vfmsubadd231ps ymm1, ymm3, yword [edi] 105 | vfmsub132pd xmm1, xmm3, xmm5 106 | vfmsub132pd xmm1, xmm3, oword [edi] 107 | vfmsub132pd ymm1, ymm3, ymm5 108 | vfmsub132pd ymm1, ymm3, yword [edi] 109 | vfmsub213pd xmm1, xmm3, xmm5 110 | vfmsub213pd xmm1, xmm3, oword [edi] 111 | vfmsub213pd ymm1, ymm3, ymm5 112 | vfmsub213pd ymm1, ymm3, yword [edi] 113 | vfmsub231pd xmm1, xmm3, xmm5 114 | vfmsub231pd xmm1, xmm3, oword [edi] 115 | vfmsub231pd ymm1, ymm3, ymm5 116 | vfmsub231pd ymm1, ymm3, yword [edi] 117 | vfmsub132ps xmm1, xmm3, xmm5 118 | vfmsub132ps xmm1, xmm3, oword [edi] 119 | vfmsub132ps ymm1, ymm3, ymm5 120 | vfmsub132ps ymm1, ymm3, yword [edi] 121 | vfmsub213ps xmm1, xmm3, xmm5 122 | vfmsub213ps xmm1, xmm3, oword [edi] 123 | vfmsub213ps ymm1, ymm3, ymm5 124 | vfmsub213ps ymm1, ymm3, yword [edi] 125 | vfmsub231ps xmm1, xmm3, xmm5 126 | vfmsub231ps xmm1, xmm3, oword [edi] 127 | vfmsub231ps ymm1, ymm3, ymm5 128 | vfmsub231ps ymm1, ymm3, yword [edi] 129 | vfmsub132sd xmm1, xmm3, xmm5 130 | vfmsub132sd xmm1, xmm3, [edi] 131 | vfmsub213sd xmm1, xmm3, xmm5 132 | vfmsub213sd xmm1, xmm3, [edi] 133 | vfmsub231sd xmm1, xmm3, xmm5 134 | vfmsub231sd xmm1, xmm3, [edi] 135 | vfmsub132ss xmm1, xmm3, xmm5 136 | vfmsub132ss xmm1, xmm3, [edi] 137 | vfmsub213ss xmm1, xmm3, xmm5 138 | vfmsub213ss xmm1, xmm3, [edi] 139 | vfmsub231ss xmm1, xmm3, xmm5 140 | vfmsub231ss xmm1, xmm3, [edi] 141 | vfnmadd132pd xmm1, xmm3, xmm5 142 | vfnmadd132pd xmm1, xmm3, oword [edi] 143 | vfnmadd132pd ymm1, ymm3, ymm5 144 | vfnmadd132pd ymm1, ymm3, yword [edi] 145 | vfnmadd213pd xmm1, xmm3, xmm5 146 | vfnmadd213pd xmm1, xmm3, oword [edi] 147 | vfnmadd213pd ymm1, ymm3, ymm5 148 | vfnmadd213pd ymm1, ymm3, yword [edi] 149 | vfnmadd231pd xmm1, xmm3, xmm5 150 | vfnmadd231pd xmm1, xmm3, oword [edi] 151 | vfnmadd231pd ymm1, ymm3, ymm5 152 | vfnmadd231pd ymm1, ymm3, yword [edi] 153 | vfnmadd132ps xmm1, xmm3, xmm5 154 | vfnmadd132ps xmm1, xmm3, oword [edi] 155 | vfnmadd132ps ymm1, ymm3, ymm5 156 | vfnmadd132ps ymm1, ymm3, yword [edi] 157 | vfnmadd213ps xmm1, xmm3, xmm5 158 | vfnmadd213ps xmm1, xmm3, oword [edi] 159 | vfnmadd213ps ymm1, ymm3, ymm5 160 | vfnmadd213ps ymm1, ymm3, yword [edi] 161 | vfnmadd231ps xmm1, xmm3, xmm5 162 | vfnmadd231ps xmm1, xmm3, oword [edi] 163 | vfnmadd231ps ymm1, ymm3, ymm5 164 | vfnmadd231ps ymm1, ymm3, yword [edi] 165 | vfnmadd132sd xmm1, xmm3, xmm5 166 | vfnmadd132sd xmm1, xmm3, [edi] 167 | vfnmadd213sd xmm1, xmm3, xmm5 168 | vfnmadd213sd xmm1, xmm3, [edi] 169 | vfnmadd231sd xmm1, xmm3, xmm5 170 | vfnmadd231sd xmm1, xmm3, [edi] 171 | vfnmadd132ss xmm1, xmm3, xmm5 172 | vfnmadd132ss xmm1, xmm3, [edi] 173 | vfnmadd213ss xmm1, xmm3, xmm5 174 | vfnmadd213ss xmm1, xmm3, [edi] 175 | vfnmadd231ss xmm1, xmm3, xmm5 176 | vfnmadd231ss xmm1, xmm3, [edi] 177 | vfnmsub132pd xmm1, xmm3, xmm5 178 | vfnmsub132pd xmm1, xmm3, oword [edi] 179 | vfnmsub132pd ymm1, ymm3, ymm5 180 | vfnmsub132pd ymm1, ymm3, yword [edi] 181 | vfnmsub213pd xmm1, xmm3, xmm5 182 | vfnmsub213pd xmm1, xmm3, oword [edi] 183 | vfnmsub213pd ymm1, ymm3, ymm5 184 | vfnmsub213pd ymm1, ymm3, yword [edi] 185 | vfnmsub231pd xmm1, xmm3, xmm5 186 | vfnmsub231pd xmm1, xmm3, oword [edi] 187 | vfnmsub231pd ymm1, ymm3, ymm5 188 | vfnmsub231pd ymm1, ymm3, yword [edi] 189 | vfnmsub132ps xmm1, xmm3, xmm5 190 | vfnmsub132ps xmm1, xmm3, oword [edi] 191 | vfnmsub132ps ymm1, ymm3, ymm5 192 | vfnmsub132ps ymm1, ymm3, yword [edi] 193 | vfnmsub213ps xmm1, xmm3, xmm5 194 | vfnmsub213ps xmm1, xmm3, oword [edi] 195 | vfnmsub213ps ymm1, ymm3, ymm5 196 | vfnmsub213ps ymm1, ymm3, yword [edi] 197 | vfnmsub231ps xmm1, xmm3, xmm5 198 | vfnmsub231ps xmm1, xmm3, oword [edi] 199 | vfnmsub231ps ymm1, ymm3, ymm5 200 | vfnmsub231ps ymm1, ymm3, yword [edi] 201 | vfnmsub132sd xmm1, xmm3, xmm5 202 | vfnmsub132sd xmm1, xmm3, [edi] 203 | vfnmsub213sd xmm1, xmm3, xmm5 204 | vfnmsub213sd xmm1, xmm3, [edi] 205 | vfnmsub231sd xmm1, xmm3, xmm5 206 | vfnmsub231sd xmm1, xmm3, [edi] 207 | vfnmsub132ss xmm1, xmm3, xmm5 208 | vfnmsub132ss xmm1, xmm3, [edi] 209 | vfnmsub213ss xmm1, xmm3, xmm5 210 | vfnmsub213ss xmm1, xmm3, [edi] 211 | vfnmsub231ss xmm1, xmm3, xmm5 212 | vfnmsub231ss xmm1, xmm3, [edi] 213 | 214 | global _nasm_test_f16c 215 | _nasm_test_f16c: 216 | ;rdfsbase ecx 217 | ;rdfsbase rcx 218 | ;rdgsbase ecx 219 | ;rdgsbase rcx 220 | rdrand cx 221 | rdrand ecx 222 | ;rdrand rcx 223 | ;wrfsbase ecx 224 | ;wrfsbase rcx 225 | ;wrgsbase ecx 226 | ;wrgsbase rcx 227 | vcvtph2ps ymm1, xmm3 228 | vcvtph2ps ymm1, oword[edx] 229 | vcvtph2ps xmm1, xmm3 230 | vcvtph2ps xmm1, qword[edx] 231 | vcvtps2ph xmm1, ymm3, 5 232 | vcvtps2ph oword[edx], ymm3, 5 233 | vcvtps2ph xmm1, xmm3, 5 234 | vcvtps2ph qword[edx], xmm3, 5 235 | 236 | global _nasm_test_xop 237 | _nasm_test_xop: 238 | vfrczpd xmm1, xmm3 239 | vfrczpd xmm1, oword [edx] 240 | vfrczpd ymm1, ymm3 241 | vfrczpd ymm1, yword [edx] 242 | vfrczps xmm1, xmm3 243 | vfrczps xmm1, oword [edx] 244 | vfrczps ymm1, ymm3 245 | vfrczps ymm1, yword [edx] 246 | vfrczsd xmm1, xmm3 247 | vfrczsd xmm1, qword [edx] 248 | vfrczss xmm1, xmm3 249 | vfrczss xmm1, dword [edx] 250 | vpcmov xmm1, xmm3, xmm5, xmm7 251 | vpcmov xmm1, xmm3, oword [esp], xmm7 252 | vpcmov xmm1, xmm3, xmm5, oword [esi] 253 | vpcmov ymm1, ymm3, ymm5, ymm7 254 | vpcmov ymm1, ymm3, yword [esp], ymm7 255 | vpcmov ymm1, ymm3, ymm5, yword [esi] 256 | vpcomb xmm1, xmm3, xmm5, 2 257 | vpcomb xmm1, xmm3, oword [esp], 2 258 | vpcomd xmm1, xmm3, xmm5, 2 259 | vpcomd xmm1, xmm3, oword [esp], 2 260 | vpcomq xmm1, xmm3, xmm5, 2 261 | vpcomq xmm1, xmm3, oword [esp], 2 262 | ;vpcomub xmm1, xmm3, xmm5, 2 263 | ;vpcomub xmm1, xmm3, oword [esp], 2 264 | ;vpcomud xmm1, xmm3, xmm5, 2 265 | ;vpcomud xmm1, xmm3, oword [esp], 2 266 | ;vpcomuq xmm1, xmm3, xmm5, 2 267 | ;vpcomuq xmm1, xmm3, oword [esp], 2 268 | ;vpcomuw xmm1, xmm3, xmm5, 2 269 | ;vpcomuw xmm1, xmm3, oword [esp], 2 270 | ;vpcomw xmm1, xmm3, xmm5, 2 271 | ;vpcomw xmm1, xmm3, oword [esp], 2 272 | ;vpermil2pd xmm1, xmm3, xmm5, xmm7 273 | ;vpermil2pd xmm1, xmm3, oword [esp], xmm7 274 | ;vpermil2pd xmm1, xmm3, xmm5, oword [esi] 275 | ;vpermil2pd ymm1, ymm3, ymm5, ymm7 276 | ;vpermil2pd ymm1, ymm3, yword [esp], ymm7 277 | ;vpermil2pd ymm1, ymm3, ymm5, yword [esi] 278 | ;vpermil2ps xmm1, xmm3, xmm5, xmm7 279 | ;vpermil2ps xmm1, xmm3, oword [esp], xmm7 280 | ;vpermil2ps xmm1, xmm3, xmm5, oword [esi] 281 | ;vpermil2ps ymm1, ymm3, ymm5, ymm7 282 | ;vpermil2ps ymm1, ymm3, yword [esp], ymm7 283 | ;vpermil2ps ymm1, ymm3, ymm5, yword [esi] 284 | vphaddbd xmm1, xmm3 285 | vphaddbd xmm1, oword [edx] 286 | vphaddbq xmm1, xmm3 287 | vphaddbq xmm1, oword [edx] 288 | vphaddbw xmm1, xmm3 289 | vphaddbw xmm1, oword [edx] 290 | vphadddq xmm1, xmm3 291 | vphadddq xmm1, oword [edx] 292 | vphaddubd xmm1, xmm3 293 | vphaddubd xmm1, oword [edx] 294 | vphaddubq xmm1, xmm3 295 | vphaddubq xmm1, oword [edx] 296 | vphaddubw xmm1, xmm3 297 | vphaddubw xmm1, oword [edx] 298 | ;vphaddudq xmm1, xmm3 299 | ;vphaddudq xmm1, oword [edx] 300 | vphadduwd xmm1, xmm3 301 | vphadduwd xmm1, oword [edx] 302 | vphadduwq xmm1, xmm3 303 | vphadduwq xmm1, oword [edx] 304 | vphaddwd xmm1, xmm3 305 | vphaddwd xmm1, oword [edx] 306 | vphaddwq xmm1, xmm3 307 | vphaddwq xmm1, oword [edx] 308 | vphsubbw xmm1, xmm3 309 | vphsubbw xmm1, oword [edx] 310 | ;vphsubdq xmm1, xmm3 311 | ;vphsubdq xmm1, oword [edx] 312 | vphsubwd xmm1, xmm3 313 | vphsubwd xmm1, oword [edx] 314 | vpmacsdd xmm1, xmm3, xmm5, xmm7 315 | vpmacsdd xmm1, xmm3, oword [esp], xmm7 316 | vpmacsdqh xmm1, xmm3, xmm5, xmm7 317 | vpmacsdqh xmm1, xmm3, oword [esp], xmm7 318 | vpmacsdql xmm1, xmm3, xmm5, xmm7 319 | vpmacsdql xmm1, xmm3, oword [esp], xmm7 320 | vpmacssdd xmm1, xmm3, xmm5, xmm7 321 | vpmacssdd xmm1, xmm3, oword [esp], xmm7 322 | vpmacssdqh xmm1, xmm3, xmm5, xmm7 323 | vpmacssdqh xmm1, xmm3, oword [esp], xmm7 324 | vpmacssdql xmm1, xmm3, xmm5, xmm7 325 | vpmacssdql xmm1, xmm3, oword [esp], xmm7 326 | vpmacsswd xmm1, xmm3, xmm5, xmm7 327 | vpmacsswd xmm1, xmm3, oword [esp], xmm7 328 | vpmacssww xmm1, xmm3, xmm5, xmm7 329 | vpmacssww xmm1, xmm3, oword [esp], xmm7 330 | vpmacswd xmm1, xmm3, xmm5, xmm7 331 | vpmacswd xmm1, xmm3, oword [esp], xmm7 332 | vpmacsww xmm1, xmm3, xmm5, xmm7 333 | vpmacsww xmm1, xmm3, oword [esp], xmm7 334 | vpmadcsswd xmm1, xmm3, xmm5, xmm7 335 | vpmadcsswd xmm1, xmm3, oword [esp], xmm7 336 | vpmadcswd xmm1, xmm3, xmm5, xmm7 337 | vpmadcswd xmm1, xmm3, oword [esp], xmm7 338 | vpperm xmm1, xmm3, xmm5, xmm7 339 | vpperm xmm1, xmm3, oword [esp], xmm7 340 | vpperm xmm1, xmm3, xmm5, oword [esi] 341 | vprotb xmm1, xmm3, xmm5 342 | vprotb xmm1, oword [edx], xmm5 343 | vprotb xmm1, xmm3, oword [esp] 344 | vprotb xmm1, xmm3, 2 345 | vprotb xmm1, oword [edx], 2 346 | vprotd xmm1, xmm3, xmm5 347 | vprotd xmm1, oword [edx], xmm5 348 | vprotd xmm1, xmm3, oword [esp] 349 | vprotd xmm1, xmm3, 2 350 | vprotd xmm1, oword [edx], 2 351 | vprotq xmm1, xmm3, xmm5 352 | vprotq xmm1, oword [edx], xmm5 353 | vprotq xmm1, xmm3, oword [esp] 354 | vprotq xmm1, xmm3, 2 355 | vprotq xmm1, oword [edx], 2 356 | vprotw xmm1, xmm3, xmm5 357 | vprotw xmm1, oword [edx], xmm5 358 | vprotw xmm1, xmm3, oword [esp] 359 | vprotw xmm1, xmm3, 2 360 | vprotw xmm1, oword [edx], 2 361 | vpshab xmm1, xmm3, xmm5 362 | vpshab xmm1, oword [edx], xmm5 363 | vpshab xmm1, xmm3, oword [esp] 364 | vpshad xmm1, xmm3, xmm5 365 | vpshad xmm1, oword [edx], xmm5 366 | vpshad xmm1, xmm3, oword [esp] 367 | vpshaq xmm1, xmm3, xmm5 368 | vpshaq xmm1, oword [edx], xmm5 369 | vpshaq xmm1, xmm3, oword [esp] 370 | vpshaw xmm1, xmm3, xmm5 371 | vpshaw xmm1, oword [edx], xmm5 372 | vpshaw xmm1, xmm3, oword [esp] 373 | vpshlb xmm1, xmm3, xmm5 374 | vpshlb xmm1, oword [edx], xmm5 375 | vpshlb xmm1, xmm3, oword [esp] 376 | vpshld xmm1, xmm3, xmm5 377 | vpshld xmm1, oword [edx], xmm5 378 | vpshld xmm1, xmm3, oword [esp] 379 | vpshlq xmm1, xmm3, xmm5 380 | vpshlq xmm1, oword [edx], xmm5 381 | vpshlq xmm1, xmm3, oword [esp] 382 | vpshlw xmm1, xmm3, xmm5 383 | vpshlw xmm1, oword [edx], xmm5 384 | vpshlw xmm1, xmm3, oword [esp] 385 | 386 | global _nasm_test_fma4 387 | _nasm_test_fma4: 388 | vfmaddpd xmm1, xmm3, xmm5, xmm7 389 | vfmaddpd xmm1, xmm3, oword [esp], xmm7 390 | vfmaddpd xmm1, xmm3, xmm5, oword [esi] 391 | vfmaddpd ymm1, ymm3, ymm5, ymm7 392 | vfmaddpd ymm1, ymm3, yword [esp], ymm7 393 | vfmaddpd ymm1, ymm3, ymm5, yword [esi] 394 | vfmaddps xmm1, xmm3, xmm5, xmm7 395 | vfmaddps xmm1, xmm3, oword [esp], xmm7 396 | vfmaddps xmm1, xmm3, xmm5, oword [esi] 397 | vfmaddps ymm1, ymm3, ymm5, ymm7 398 | vfmaddps ymm1, ymm3, yword [esp], ymm7 399 | vfmaddps ymm1, ymm3, ymm5, yword [esi] 400 | vfmaddsd xmm1, xmm3, xmm5, xmm7 401 | vfmaddsd xmm1, xmm3, qword [esp], xmm7 402 | vfmaddsd xmm1, xmm3, xmm5, qword [esi] 403 | vfmaddss xmm1, xmm3, xmm5, xmm7 404 | vfmaddss xmm1, xmm3, dword [esp], xmm7 405 | vfmaddss xmm1, xmm3, xmm5, dword [esi] 406 | vfmaddsubpd xmm1, xmm3, xmm5, xmm7 407 | vfmaddsubpd xmm1, xmm3, oword [esp], xmm7 408 | vfmaddsubpd xmm1, xmm3, xmm5, oword [esi] 409 | vfmaddsubpd ymm1, ymm3, ymm5, ymm7 410 | vfmaddsubpd ymm1, ymm3, yword [esp], ymm7 411 | vfmaddsubpd ymm1, ymm3, ymm5, yword [esi] 412 | vfmaddsubps xmm1, xmm3, xmm5, xmm7 413 | vfmaddsubps xmm1, xmm3, oword [esp], xmm7 414 | vfmaddsubps xmm1, xmm3, xmm5, oword [esi] 415 | vfmaddsubps ymm1, ymm3, ymm5, ymm7 416 | vfmaddsubps ymm1, ymm3, yword [esp], ymm7 417 | vfmaddsubps ymm1, ymm3, ymm5, yword [esi] 418 | vfmsubaddpd xmm1, xmm3, xmm5, xmm7 419 | vfmsubaddpd xmm1, xmm3, oword [esp], xmm7 420 | vfmsubaddpd xmm1, xmm3, xmm5, oword [esi] 421 | vfmsubaddpd ymm1, ymm3, ymm5, ymm7 422 | vfmsubaddpd ymm1, ymm3, yword [esp], ymm7 423 | vfmsubaddpd ymm1, ymm3, ymm5, yword [esi] 424 | vfmsubaddps xmm1, xmm3, xmm5, xmm7 425 | vfmsubaddps xmm1, xmm3, oword [esp], xmm7 426 | vfmsubaddps xmm1, xmm3, xmm5, oword [esi] 427 | vfmsubaddps ymm1, ymm3, ymm5, ymm7 428 | vfmsubaddps ymm1, ymm3, yword [esp], ymm7 429 | vfmsubaddps ymm1, ymm3, ymm5, yword [esi] 430 | vfmsubpd xmm1, xmm3, xmm5, xmm7 431 | vfmsubpd xmm1, xmm3, oword [esp], xmm7 432 | vfmsubpd xmm1, xmm3, xmm5, oword [esi] 433 | vfmsubpd ymm1, ymm3, ymm5, ymm7 434 | vfmsubpd ymm1, ymm3, yword [esp], ymm7 435 | vfmsubpd ymm1, ymm3, ymm5, yword [esi] 436 | vfmsubps xmm1, xmm3, xmm5, xmm7 437 | vfmsubps xmm1, xmm3, oword [esp], xmm7 438 | vfmsubps xmm1, xmm3, xmm5, oword [esi] 439 | vfmsubps ymm1, ymm3, ymm5, ymm7 440 | vfmsubps ymm1, ymm3, yword [esp], ymm7 441 | vfmsubps ymm1, ymm3, ymm5, yword [esi] 442 | vfmsubsd xmm1, xmm3, xmm5, xmm7 443 | vfmsubsd xmm1, xmm3, qword [esp], xmm7 444 | vfmsubsd xmm1, xmm3, xmm5, qword [esi] 445 | vfmsubss xmm1, xmm3, xmm5, xmm7 446 | vfmsubss xmm1, xmm3, dword [esp], xmm7 447 | vfmsubss xmm1, xmm3, xmm5, dword [esi] 448 | vfnmaddpd xmm1, xmm3, xmm5, xmm7 449 | vfnmaddpd xmm1, xmm3, oword [esp], xmm7 450 | vfnmaddpd xmm1, xmm3, xmm5, oword [esi] 451 | vfnmaddpd ymm1, ymm3, ymm5, ymm7 452 | vfnmaddpd ymm1, ymm3, yword [esp], ymm7 453 | vfnmaddpd ymm1, ymm3, ymm5, yword [esi] 454 | vfnmaddps xmm1, xmm3, xmm5, xmm7 455 | vfnmaddps xmm1, xmm3, oword [esp], xmm7 456 | vfnmaddps xmm1, xmm3, xmm5, oword [esi] 457 | vfnmaddps ymm1, ymm3, ymm5, ymm7 458 | vfnmaddps ymm1, ymm3, yword [esp], ymm7 459 | vfnmaddps ymm1, ymm3, ymm5, yword [esi] 460 | vfnmaddsd xmm1, xmm3, xmm5, xmm7 461 | vfnmaddsd xmm1, xmm3, qword [esp], xmm7 462 | vfnmaddsd xmm1, xmm3, xmm5, qword [esi] 463 | vfnmaddss xmm1, xmm3, xmm5, xmm7 464 | vfnmaddss xmm1, xmm3, dword [esp], xmm7 465 | vfnmaddss xmm1, xmm3, xmm5, dword [esi] 466 | vfnmsubpd xmm1, xmm3, xmm5, xmm7 467 | vfnmsubpd xmm1, xmm3, oword [esp], xmm7 468 | vfnmsubpd xmm1, xmm3, xmm5, oword [esi] 469 | vfnmsubpd ymm1, ymm3, ymm5, ymm7 470 | vfnmsubpd ymm1, ymm3, yword [esp], ymm7 471 | vfnmsubpd ymm1, ymm3, ymm5, yword [esi] 472 | vfnmsubps xmm1, xmm3, xmm5, xmm7 473 | vfnmsubps xmm1, xmm3, oword [esp], xmm7 474 | vfnmsubps xmm1, xmm3, xmm5, oword [esi] 475 | vfnmsubps ymm1, ymm3, ymm5, ymm7 476 | vfnmsubps ymm1, ymm3, yword [esp], ymm7 477 | vfnmsubps ymm1, ymm3, ymm5, yword [esi] 478 | vfnmsubsd xmm1, xmm3, xmm5, xmm7 479 | vfnmsubsd xmm1, xmm3, qword [esp], xmm7 480 | vfnmsubsd xmm1, xmm3, xmm5, qword [esi] 481 | vfnmsubss xmm1, xmm3, xmm5, xmm7 482 | vfnmsubss xmm1, xmm3, dword [esp], xmm7 483 | vfnmsubss xmm1, xmm3, xmm5, dword [esi] 484 | 485 | global _nasm_test_bmi 486 | _nasm_test_bmi: 487 | andn edi, ecx, eax 488 | andn edi, ecx, dword [eax] 489 | ;andn rdi, rcx, rax 490 | ;andn rdi, rcx, qword [rax] 491 | bextr edi, ecx, eax 492 | bextr edi, dword [ecx], eax 493 | ;bextr rdi, rcx, rax 494 | ;bextr rdi, qword [rcx], rax 495 | blsi edi, ecx 496 | blsi edi, dword [ecx] 497 | ;blsi rdi, rcx 498 | ;blsi rdi, qword [rcx] 499 | blsmsk edi, ecx 500 | blsmsk edi, dword [ecx] 501 | ;blsmsk rdi, rcx 502 | ;blsmsk rdi, qword [rcx] 503 | blsr edi, ecx 504 | blsr edi, dword [ecx] 505 | ;blsr rdi, rcx 506 | ;blsr rdi, qword [rcx] 507 | bzhi edi, ecx, eax 508 | bzhi edi, dword [ecx], eax 509 | ;bzhi rdi, rcx, rax 510 | ;bzhi rdi, qword [rcx], rax 511 | ; lzcnt di, cx 512 | ; lzcnt di, word [ecx] 513 | lzcnt edi, ecx 514 | lzcnt edi, dword [ecx] 515 | ;lzcnt rdi, rcx 516 | ;lzcnt rdi, qword [rcx] 517 | mulx edi, ecx, eax 518 | mulx edi, ecx, dword [eax] 519 | ;mulx rdi, rcx, rax 520 | ;mulx rdi, rcx, qword [rax] 521 | pdep edi, ecx, eax 522 | pdep edi, ecx, dword [eax] 523 | ;pdep rdi, rcx, rax 524 | ;pdep rdi, rcx, qword [rax] 525 | pext edi, ecx, eax 526 | pext edi, ecx, dword [eax] 527 | ;pext rdi, rcx, rax 528 | ;pext rdi, rcx, qword [rax] 529 | rorx edi, ecx, 1 530 | rorx edi, dword [ecx], 1 531 | ;rorx rdi, rcx, 1 532 | ;rorx rdi, qword [rcx], 1 533 | sarx edi, ecx, eax 534 | sarx edi, dword [ecx], eax 535 | ;sarx rdi, rcx, rax 536 | ;sarx rdi, qword [rcx], rax 537 | shlx edi, ecx, eax 538 | shlx edi, dword [ecx], eax 539 | ;shlx rdi, rcx, rax 540 | ;shlx rdi, qword [rcx], rax 541 | shrx edi, ecx, eax 542 | shrx edi, dword [ecx], eax 543 | ;shrx rdi, rcx, rax 544 | ;shrx rdi, qword [rcx], rax 545 | ; tzcnt di, cx 546 | ; tzcnt di, word [ecx] 547 | tzcnt edi, ecx 548 | tzcnt edi, dword [ecx] 549 | ;tzcnt rdi, rcx 550 | ;tzcnt rdi, qword [rcx] 551 | invpcid edi, oword [ecx] 552 | ;invpcid rdi, oword [rcx] 553 | 554 | global _nasm_test_avx2_gather 555 | _nasm_test_avx2_gather: 556 | vgatherdps xmm2, dword [ebp + xmm5 * 2 + 1], xmm3 557 | vgatherdps ymm5, dword [ebp + ymm5 * 2 + 1], ymm4 558 | vgatherqps xmm2, dword [ebp + xmm6 * 2 + 1], xmm3 559 | vgatherqps xmm2, dword [ebp + ymm6 * 2 + 1], xmm3 560 | vgatherdpd xmm2, qword [ebp + xmm5 * 2 + 1], xmm3 561 | vgatherdpd ymm5, qword [ebp + xmm5 * 2 + 1], ymm4 562 | vgatherqpd xmm2, qword [ebp + xmm6 * 2 + 1], xmm3 563 | vgatherqpd ymm5, qword [ebp + ymm6 * 2 + 1], ymm4 564 | vpgatherdd xmm2, dword [ebp + xmm5 * 2 + 1], xmm3 565 | vpgatherdd ymm5, dword [ebp + ymm5 * 2 + 1], ymm4 566 | vpgatherqd xmm2, dword [ebp + xmm6 * 2 + 1], xmm3 567 | vpgatherqd xmm2, dword [ebp + ymm6 * 2 + 1], xmm3 568 | vpgatherdq xmm2, qword [ebp + xmm5 * 2 + 1], xmm3 569 | vpgatherdq ymm5, qword [ebp + xmm5 * 2 + 1], ymm4 570 | vpgatherqq xmm2, qword [ebp + xmm6 * 2 + 1], xmm3 571 | vpgatherqq ymm5, qword [ebp + ymm6 * 2 + 1], ymm4 572 | 573 | global _nasm_test_regalloc_vsib 574 | _nasm_test_regalloc_vsib: 575 | vpxor xmm3, xmm3, xmm3 576 | vgatherdps xmm0, dword [ebp + xmm3 * 2 + 1], xmm2 577 | vgatherqps xmm0, dword [ebp + xmm3 * 2 + 1], xmm2 578 | vxorps ymm2, ymm2, ymm2 579 | vgatherdps ymm1, dword [ebp + ymm2 * 2 + 1], ymm0 580 | vgatherqps xmm1, dword [ebp + ymm2 * 2 + 1], xmm0 581 | -------------------------------------------------------------------------------- /test/test.cpp: -------------------------------------------------------------------------------- 1 | // Workaround for issue #8 2 | #if defined(__APPLE__) && defined(__MACH__) // Mac OS X 3 | #define JITASM_MMINTRIN 0 4 | #endif 5 | 6 | #include 7 | #include "jitasm.h" 8 | #include "test.h" 9 | 10 | int g_test_succeeded = 0; 11 | int g_test_failed = 0; 12 | int g_assemble_time = 0; // us 13 | 14 | 15 | struct test_mmx_sse2 : jitasm::function 16 | { 17 | void main() 18 | { 19 | #ifdef JITASM64 20 | movdqa(xmm0, xmm1); 21 | movdqa(xmm8, xmm1); 22 | movdqa(xmm0, xmm9); 23 | movdqa(xmm0, xmmword_ptr[ecx]); 24 | movdqa(xmm0, xmmword_ptr[rcx]); 25 | movdqa(xmm8, xmmword_ptr[ecx]); 26 | movdqa(xmm8, xmmword_ptr[rcx]); 27 | movdqa(xmmword_ptr[eax], xmm1); 28 | movdqa(xmmword_ptr[rax], xmm1); 29 | movdqa(xmmword_ptr[eax], xmm9); 30 | movdqa(xmmword_ptr[rax], xmm9); 31 | 32 | movdqu(xmm0, xmm1); 33 | movdqu(xmm8, xmm1); 34 | movdqu(xmm0, xmm9); 35 | movdqu(xmm0, xmmword_ptr[ecx]); 36 | movdqu(xmm0, xmmword_ptr[rcx]); 37 | movdqu(xmm8, xmmword_ptr[ecx]); 38 | movdqu(xmm8, xmmword_ptr[rcx]); 39 | movdqu(xmmword_ptr[eax], xmm1); 40 | movdqu(xmmword_ptr[rax], xmm1); 41 | movdqu(xmmword_ptr[eax], xmm9); 42 | movdqu(xmmword_ptr[rax], xmm9); 43 | 44 | pxor(xmm0, xmm1); 45 | pxor(xmm8, xmm1); 46 | pxor(xmm0, xmm9); 47 | pxor(xmm0, xmmword_ptr[ecx]); 48 | pxor(xmm0, xmmword_ptr[rcx]); 49 | pxor(xmm8, xmmword_ptr[ecx]); 50 | pxor(xmm8, xmmword_ptr[rcx]); 51 | #else 52 | movdqa(xmm0, xmm1); 53 | movdqa(xmm0, xmmword_ptr[ecx]); 54 | movdqa(xmmword_ptr[eax], xmm1); 55 | 56 | movdqu(xmm0, xmm1); 57 | movdqu(xmm0, xmmword_ptr[ecx]); 58 | movdqu(xmmword_ptr[eax], xmm1); 59 | 60 | pabsb(mm0, mm1); 61 | pabsb(mm0, mmword_ptr[ecx]); 62 | pabsb(xmm0, xmm1); 63 | pabsb(xmm0, xmmword_ptr[ecx]); 64 | pabsw(mm0, mm1); 65 | pabsw(mm0, mmword_ptr[ecx]); 66 | pabsw(xmm0, xmm1); 67 | pabsw(xmm0, xmmword_ptr[ecx]); 68 | pabsd(mm0, mm1); 69 | pabsd(mm0, mmword_ptr[ecx]); 70 | pabsd(xmm0, xmm1); 71 | pabsd(xmm0, xmmword_ptr[ecx]); 72 | 73 | packsswb(mm0, mm1); 74 | packsswb(mm0, mmword_ptr[ecx]); 75 | packsswb(xmm0, xmm1); 76 | packsswb(xmm0, xmmword_ptr[ecx]); 77 | packssdw(mm0, mm1); 78 | packssdw(mm0, mmword_ptr[ecx]); 79 | packssdw(xmm0, xmm1); 80 | packssdw(xmm0, xmmword_ptr[ecx]); 81 | packuswb(mm0, mm1); 82 | packuswb(mm0, mmword_ptr[ecx]); 83 | packuswb(xmm0, xmm1); 84 | packuswb(xmm0, xmmword_ptr[ecx]); 85 | packusdw(xmm0, xmm1); 86 | packusdw(xmm0, xmmword_ptr[ecx]); 87 | 88 | paddb(mm0, mm1); 89 | paddb(mm0, mmword_ptr[ecx]); 90 | paddb(xmm0, xmm1); 91 | paddb(xmm0, xmmword_ptr[ecx]); 92 | paddw(mm0, mm1); 93 | paddw(mm0, mmword_ptr[ecx]); 94 | paddw(xmm0, xmm1); 95 | paddw(xmm0, xmmword_ptr[ecx]); 96 | paddd(mm0, mm1); 97 | paddd(mm0, mmword_ptr[ecx]); 98 | paddd(xmm0, xmm1); 99 | paddd(xmm0, xmmword_ptr[ecx]); 100 | 101 | pxor(mm0, mm1); 102 | pxor(mm0, mmword_ptr[ecx]); 103 | pxor(xmm0, xmm1); 104 | pxor(xmm0, xmmword_ptr[ecx]); 105 | #endif 106 | } 107 | }; 108 | 109 | //---------------------------------------- 110 | // Call graph 111 | //---------------------------------------- 112 | extern "C" void masm_test_cfg1(); 113 | struct test_cfg1 : jitasm::function_cdecl 114 | { 115 | void naked_main() 116 | { 117 | jitasm::Reg32 a; 118 | L("0"); 119 | mov(a, 2); 120 | mov(edx, 1); 121 | cmp(a, 0); 122 | jle("1"); 123 | dec(edx); 124 | L("1"); 125 | dec(a); 126 | jne("0"); 127 | } 128 | }; 129 | 130 | //---------------------------------------- 131 | // Register allocation 132 | //---------------------------------------- 133 | extern "C" void masm_test_register_allocation1(); 134 | struct test_register_allocation1 : jitasm::function_cdecl 135 | { 136 | void main() 137 | { 138 | jitasm::Reg32 v1, v2, v3, v4, v5, v6, v7, v8; 139 | mov(v8, 2); 140 | mov(v7, 1); 141 | xor(v6, v6); 142 | xor(v5, v5); 143 | xor(v4, v4); 144 | xor(v3, v3); 145 | xor(v2, v2); 146 | xor(v1, v1); 147 | 148 | jitasm::Reg32 a; 149 | mov(a, 10); 150 | { 151 | L("LoopHeadA"); 152 | 153 | cmp(v1, 5); 154 | jg("L1"); 155 | 156 | jitasm::Reg32 i; 157 | mov(i, 10); 158 | jmp("LoopB"); 159 | { 160 | L("LoopB"); 161 | inc(v1); 162 | add(v2, v1); 163 | add(v3, v2); 164 | add(v4, v3); 165 | add(v5, v4); 166 | dec(i); 167 | jnz("LoopB"); 168 | } 169 | dec(a); 170 | jnz("L1"); 171 | jmp("LoopEndA"); 172 | 173 | L("L1"); 174 | dec(v1); 175 | add(v6, v5); 176 | add(v7, v6); 177 | add(v8, v7); 178 | dec(a); 179 | jnz("LoopHeadA"); 180 | 181 | L("LoopEndA"); 182 | } 183 | } 184 | }; 185 | 186 | //---------------------------------------- 187 | // Reassign physical register by register allocator 188 | //---------------------------------------- 189 | extern "C" void masm_test_regalloc_reassign_physical_reg(); 190 | struct test_regalloc_reassign_physical_reg : jitasm::function 191 | { 192 | void naked_main() 193 | { 194 | maskmovdqu(xmm0, xmm1, zdi); 195 | maskmovdqu(xmm0, xmm1, zsi); 196 | } 197 | }; 198 | 199 | //---------------------------------------- 200 | // VSIB register allocation 201 | //---------------------------------------- 202 | extern "C" void nasm_test_regalloc_vsib(); 203 | struct test_regalloc_vsib : jitasm::function 204 | { 205 | void naked_main() 206 | { 207 | jitasm::XmmReg xmm_index; 208 | vpxor(xmm_index, xmm_index, xmm_index); 209 | vgatherdps(xmm0, dword_ptr[ebp + xmm_index * 2 + 1], xmm2); 210 | vgatherqps(xmm0, dword_ptr[ebp + xmm_index * 2 + 1], xmm2); 211 | 212 | jitasm::YmmReg ymm_index; 213 | vxorps(ymm_index, ymm_index, ymm_index); 214 | vgatherdps(ymm1, dword_ptr[ebp + ymm_index * 2 + 1], ymm0); 215 | vgatherqps(xmm1, dword_ptr[ebp + ymm_index * 2 + 1], xmm0); 216 | } 217 | }; 218 | 219 | //---------------------------------------- 220 | // function_cdecl 221 | //---------------------------------------- 222 | struct test_function_return_char : jitasm::function_cdecl 223 | { 224 | Result main(Reg8 a1) 225 | { 226 | return a1; 227 | } 228 | }; 229 | 230 | //---------------------------------------- 231 | // function_cdecl 232 | //---------------------------------------- 233 | struct test_function_return_short : jitasm::function_cdecl 234 | { 235 | Result main(Reg16 a1) 236 | { 237 | return a1; 238 | } 239 | }; 240 | 241 | //---------------------------------------- 242 | // function_cdecl (return immediate) 243 | //---------------------------------------- 244 | struct test_function_return_int_imm : jitasm::function_cdecl 245 | { 246 | Result main() 247 | { 248 | return 0x4AC396D7; // mov eax, 0x4AC396D7 249 | } 250 | }; 251 | 252 | //---------------------------------------- 253 | // function_cdecl (return eax) 254 | //---------------------------------------- 255 | struct test_function_return_int_zero : jitasm::function_cdecl 256 | { 257 | Result main() 258 | { 259 | xor(eax, eax); 260 | return eax; // no instruction. (because mov eax, eax) 261 | } 262 | }; 263 | 264 | //---------------------------------------- 265 | // function_cdecl (return immediate) 266 | //---------------------------------------- 267 | struct test_function_return_float_imm : jitasm::function_cdecl 268 | { 269 | Result main() 270 | { 271 | return 11.0f; 272 | } 273 | }; 274 | 275 | //---------------------------------------- 276 | // function_cdecl (return xmm) 277 | //---------------------------------------- 278 | #if JITASM_XMMINTRIN 279 | struct test_function_return_float_xmm : jitasm::function_cdecl 280 | { 281 | Result main(Addr a1) 282 | { 283 | movss(xmm7, dword_ptr[a1]); 284 | return xmm7; 285 | } 286 | }; 287 | #endif 288 | 289 | //---------------------------------------- 290 | // function_cdecl (return ptr) 291 | //---------------------------------------- 292 | struct test_function_return_float_ptr : jitasm::function_cdecl 293 | { 294 | Result main(Addr a1) 295 | { 296 | return result_ptr[a1]; 297 | } 298 | }; 299 | 300 | //---------------------------------------- 301 | // function_cdecl (return st(0)) 302 | //---------------------------------------- 303 | struct test_function_return_float_st0 : jitasm::function_cdecl 304 | { 305 | Result main(Addr a1) 306 | { 307 | fld(real4_ptr[a1]); 308 | return st0; 309 | } 310 | }; 311 | 312 | //---------------------------------------- 313 | // function_cdecl (return immediate) 314 | //---------------------------------------- 315 | struct test_function_return_double_imm : jitasm::function_cdecl 316 | { 317 | Result main() 318 | { 319 | return 11.0; 320 | } 321 | }; 322 | 323 | //---------------------------------------- 324 | // function_cdecl (return xmm) 325 | //---------------------------------------- 326 | #if JITASM_EMMINTRIN 327 | struct test_function_return_double_xmm : jitasm::function_cdecl 328 | { 329 | Result main(Addr a1) 330 | { 331 | movsd(xmm7, qword_ptr[a1]); 332 | return xmm7; 333 | } 334 | }; 335 | #endif 336 | 337 | //---------------------------------------- 338 | // function_cdecl (return ptr) 339 | //---------------------------------------- 340 | struct test_function_return_double_ptr : jitasm::function_cdecl 341 | { 342 | Result main(Addr a1) 343 | { 344 | return result_ptr[a1]; 345 | } 346 | }; 347 | 348 | //---------------------------------------- 349 | // function_cdecl (return st(0)) 350 | //---------------------------------------- 351 | struct test_function_return_double_st0 : jitasm::function_cdecl 352 | { 353 | Result main(Addr a1) 354 | { 355 | fld(real8_ptr[a1]); 356 | return st0; 357 | } 358 | }; 359 | 360 | #if JITASM_MMINTRIN 361 | //---------------------------------------- 362 | // function_cdecl<__m64, int> (return mm1) 363 | //---------------------------------------- 364 | struct test_function_return_m64_mm1 : jitasm::function_cdecl<__m64, test_function_return_m64_mm1, int> 365 | { 366 | Result main(Addr a1) 367 | { 368 | movd(mm1, dword_ptr[a1]); 369 | punpckldq(mm1, mm1); 370 | paddd(mm1, mm1); 371 | return mm1; 372 | } 373 | }; 374 | 375 | //---------------------------------------- 376 | // function_cdecl<__m64> (return ptr) 377 | //---------------------------------------- 378 | struct test_function_return_m64_ptr : jitasm::function_cdecl<__m64, test_function_return_m64_ptr, __m64> 379 | { 380 | Result main(Addr a1) 381 | { 382 | return result_ptr[a1]; 383 | } 384 | }; 385 | #endif // JITASM_MMINTRIN 386 | 387 | #if JITASM_XMMINTRIN 388 | //---------------------------------------- 389 | // function_cdecl<__m128> (return xmm1) 390 | //---------------------------------------- 391 | struct test_function_return_m128_zero : jitasm::function_cdecl<__m128, test_function_return_m128_zero> 392 | { 393 | Result main() 394 | { 395 | xorps(xmm1, xmm1); 396 | return xmm1; 397 | } 398 | }; 399 | 400 | //---------------------------------------- 401 | // function_cdecl<__m128> (return ptr) 402 | //---------------------------------------- 403 | struct test_function_return_m128_ptr : jitasm::function_cdecl<__m128, test_function_return_m128_ptr, __m128> 404 | { 405 | Result main(Addr a1) 406 | { 407 | return xmmword_ptr[a1]; 408 | } 409 | }; 410 | #endif 411 | 412 | #if JITASM_EMMINTRIN 413 | //---------------------------------------- 414 | // function_cdecl<__m128d> (return xmm1) 415 | //---------------------------------------- 416 | struct test_function_return_m128d_zero : jitasm::function_cdecl<__m128d, test_function_return_m128d_zero> 417 | { 418 | Result main() 419 | { 420 | xorpd(xmm1, xmm1); 421 | return xmm1; 422 | } 423 | }; 424 | 425 | //---------------------------------------- 426 | // function_cdecl<__m128d> (return ptr) 427 | //---------------------------------------- 428 | struct test_function_return_m128d_ptr : jitasm::function_cdecl<__m128d, test_function_return_m128d_ptr, __m128d> 429 | { 430 | Result main(Addr a1) 431 | { 432 | return xmmword_ptr[a1]; 433 | } 434 | }; 435 | 436 | //---------------------------------------- 437 | // function_cdecl<__m128i> (return xmm1) 438 | //---------------------------------------- 439 | struct test_function_return_m128i_zero : jitasm::function_cdecl<__m128i, test_function_return_m128i_zero> 440 | { 441 | Result main() 442 | { 443 | pxor(xmm1, xmm1); 444 | return xmm1; 445 | } 446 | }; 447 | 448 | //---------------------------------------- 449 | // function_cdecl<__m128i> (return ptr) 450 | //---------------------------------------- 451 | struct test_function_return_m128i_ptr : jitasm::function_cdecl<__m128i, test_function_return_m128i_ptr, __m128i> 452 | { 453 | Result main(Addr a1) 454 | { 455 | return xmmword_ptr[a1]; 456 | } 457 | }; 458 | #endif 459 | 460 | void test_register_allocation() 461 | { 462 | TEST_M(test_cfg1) 463 | TEST_M(test_register_allocation1) 464 | TEST_M(test_regalloc_reassign_physical_reg); 465 | TEST_N(test_regalloc_vsib); 466 | } 467 | 468 | void test_calling_convention() 469 | { 470 | TEST_EQUAL((int)test_function_return_char()(0x78), (int)0x78); 471 | TEST_EQUAL(test_function_return_short()(0x7A52), (short)0x7A52); 472 | 473 | test_function_return_int_imm test_function_return_int_imm_obj; 474 | TEST_EQUAL(test_function_return_int_imm_obj(), (int)0x4AC396D7); 475 | 476 | test_function_return_int_zero test_function_return_int_zero_obj; 477 | TEST_EQUAL(test_function_return_int_zero_obj(), 0); 478 | 479 | test_function_return_float_imm test_function_return_float_imm_obj; 480 | TEST_EQUAL(test_function_return_float_imm_obj(), 11.0f); 481 | #if JITASM_XMMINTRIN 482 | TEST_EQUAL(test_function_return_float_xmm()(2.0f), 2.0f); 483 | #endif 484 | TEST_EQUAL(test_function_return_float_ptr()(3.0f), 3.0f); 485 | TEST_EQUAL(test_function_return_float_st0()(4.0f), 4.0f); 486 | test_function_return_double_imm test_function_return_double_imm_obj; 487 | TEST_EQUAL(test_function_return_double_imm_obj(), 11.0); 488 | #if JITASM_EMMINTRIN 489 | TEST_EQUAL(test_function_return_double_xmm()(5.0), 5.0); 490 | #endif 491 | TEST_EQUAL(test_function_return_double_ptr()(6.0), 6.0); 492 | TEST_EQUAL(test_function_return_double_st0()(7.0), 7.0); 493 | #if JITASM_MMINTRIN && !defined(_WIN64) 494 | TEST_EQUAL(_mm_movemask_pi8(_mm_cmpeq_pi32(test_function_return_m64_mm1()(2), _mm_set_pi32(4, 4))), 0xFF); 495 | TEST_EQUAL(_mm_movemask_pi8(_mm_cmpeq_pi32(test_function_return_m64_ptr()(_mm_set_pi32(0x12345678, 0xFEDCBA98)), _mm_set_pi32(0x12345678, 0xFEDCBA98))), 0xFF); 496 | _mm_empty(); 497 | #endif 498 | 499 | #if JITASM_XMMINTRIN 500 | test_function_return_m128_zero test_function_return_m128_zero_obj; 501 | TEST_EQUAL(_mm_movemask_ps(_mm_cmpeq_ps(test_function_return_m128_zero_obj(), _mm_setzero_ps())), 0x0F); 502 | TEST_EQUAL(_mm_movemask_ps(_mm_cmpeq_ps(test_function_return_m128_ptr()(_mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f)), _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f))), 0x0F); 503 | #endif 504 | 505 | #if JITASM_EMMINTRIN 506 | test_function_return_m128d_zero test_function_return_m128d_zero_obj; 507 | TEST_EQUAL(_mm_movemask_pd(_mm_cmpeq_pd(test_function_return_m128d_zero_obj(), _mm_setzero_pd())), 0x03); 508 | TEST_EQUAL(_mm_movemask_pd(_mm_cmpeq_pd(test_function_return_m128d_ptr()(_mm_set_pd(1.0f, 2.0f)), _mm_set_pd(1.0f, 2.0f))), 0x03); 509 | 510 | test_function_return_m128i_zero test_function_return_m128i_zero_obj; 511 | TEST_EQUAL(_mm_movemask_epi8(_mm_cmpeq_epi32(test_function_return_m128i_zero_obj(), _mm_setzero_si128())), 0xFFFF); 512 | TEST_EQUAL(_mm_movemask_epi8(_mm_cmpeq_epi32(test_function_return_m128i_ptr()(_mm_set_epi32(1, 2, 3, 4)), _mm_set_epi32(1, 2, 3, 4))), 0xFFFF); 513 | #endif 514 | } 515 | 516 | struct test_cfg : jitasm::function_cdecl 517 | { 518 | void main() 519 | { 520 | jitasm::Reg32 i; 521 | mov(i, 10); 522 | While(i != 0); 523 | If(ecx == 1 || ecx == 2); 524 | mov(ecx, i); 525 | ElseIf(edx == 0); 526 | mov(edx, 1); 527 | Else(); 528 | jmp("Exit"); 529 | EndIf(); 530 | dec(i); 531 | EndW(); 532 | 533 | L("Exit"); 534 | } 535 | }; 536 | 537 | #if JITASM_MMINTRIN 538 | struct test_m64_args3 : jitasm::function_cdecl<__m64, test_m64_args3, __m64, __m64, __m64> 539 | { 540 | Result main(MmxReg v1, Addr v2, MmxReg v3) 541 | { 542 | MmxReg res; 543 | movq(res, v1); 544 | paddd(res, qword_ptr[v2]); 545 | paddd(res, v3); 546 | return res; 547 | } 548 | }; 549 | #endif 550 | 551 | #if JITASM_XMMINTRIN 552 | struct test_m128_args3 : jitasm::function_cdecl<__m128, test_m128_args3, __m128, __m128, __m128> 553 | { 554 | Result main(XmmReg v1, Addr v2, XmmReg v3) 555 | { 556 | XmmReg res; 557 | movaps(res, v1); 558 | addps(res, xmmword_ptr[v2]); 559 | addps(res, v3); 560 | return res; 561 | } 562 | }; 563 | #endif 564 | 565 | #if !defined(_WIN32) 566 | #if JITASM_MMINTRIN 567 | struct test_m64_args5 : jitasm::function_cdecl<__m64, test_m64_args5, __m64, __m64, __m64, __m64, __m64> 568 | { 569 | Result main(MmxReg v1, Addr v2, MmxReg v3, MmxReg v4, Addr v5) 570 | { 571 | MmxReg res; 572 | movq(res, v1); 573 | paddd(res, qword_ptr[v2]); 574 | paddd(res, v3); 575 | paddd(res, v4); 576 | paddd(res, qword_ptr[v5]); 577 | return res; 578 | } 579 | }; 580 | #endif 581 | 582 | #if JITASM_XMMINTRIN 583 | struct test_m128_args5 : jitasm::function_cdecl<__m128, test_m128_args5, __m128, __m128, __m128, __m128, __m128> 584 | { 585 | Result main(XmmReg v1, Addr v2, XmmReg v3, XmmReg v4, Addr v5) 586 | { 587 | XmmReg res; 588 | movaps(res, v1); 589 | addps(res, xmmword_ptr[v2]); 590 | addps(res, v3); 591 | addps(res, v4); 592 | addps(res, xmmword_ptr[v5]); 593 | return res; 594 | } 595 | }; 596 | #endif 597 | #endif 598 | 599 | #if JITASM_MMINTRIN && JITASM_XMMINTRIN && JITASM_EMMINTRIN 600 | struct test_mix_args : jitasm::function_cdecl<__m128, test_mix_args, int, __m64, __m128i, __m64, __m128> 601 | { 602 | Result main(Reg32 n, MmxReg v1, XmmReg v2, MmxReg v3, XmmReg v4) 603 | { 604 | XmmReg res; 605 | xorps(res, res); 606 | If(n == 0); 607 | cvtpi2ps(res, v1); 608 | ElseIf(n == 1); 609 | cvtdq2ps(res, v2); 610 | ElseIf(n == 2); 611 | cvtpi2ps(res, v3); 612 | ElseIf(n == 3); 613 | movaps(res, v4); 614 | EndIf(); 615 | return res; 616 | } 617 | }; 618 | #endif 619 | 620 | struct test_ipow1 : jitasm::function_cdecl 621 | { 622 | Result main(Reg32 a, Reg32 b) 623 | { 624 | Reg32 i; 625 | Reg32 c; 626 | mov(c, 1); 627 | xor(i, i); 628 | While(i < b); 629 | imul(c, a); 630 | inc(i); 631 | EndW(); 632 | return c; 633 | } 634 | }; 635 | 636 | struct test_ipow2 : jitasm::function_cdecl 637 | { 638 | Result main(Addr a, Reg32 b) 639 | { 640 | Reg32 i; 641 | Reg32 c; 642 | mov(c, 1); 643 | xor(i, i); 644 | While(i < b); 645 | imul(c, dword_ptr[a]); 646 | inc(i); 647 | EndW(); 648 | return c; 649 | } 650 | }; 651 | 652 | struct test_fibonacci : jitasm::function_cdecl 653 | { 654 | Result main(Reg32 n) 655 | { 656 | Reg32 r; 657 | If(n == 0); 658 | mov(r, 0); 659 | ElseIf(n == 1 || n == 2); 660 | mov(r, 1); 661 | Else(); 662 | Reg32 a; 663 | Reg32 b; 664 | Reg32 i; 665 | mov(a, 1); 666 | mov(b, 1); 667 | mov(i, 2); 668 | While(i < n); 669 | mov(r, a); 670 | add(r, b); 671 | mov(b, a); 672 | mov(a, r); 673 | inc(i); 674 | EndW(); 675 | EndIf(); 676 | return r; 677 | } 678 | }; 679 | 680 | void test_execute() 681 | { 682 | // MMX test 683 | #if JITASM_MMINTRIN && !defined(_WIN64) // VC does not support MMX intrinsics on x64. 684 | { 685 | __m64 v1 = _mm_set_pi32(1, 2); 686 | __m64 v2 = _mm_set_pi32(3, 4); 687 | __m64 v3 = _mm_set_pi32(7, 8); 688 | TEST_EQUAL(_mm_movemask_pi8(_mm_cmpeq_pi32(test_m64_args3()(v1, v2, v3), _mm_set_pi32(11, 14))), 0xFF); 689 | _mm_empty(); 690 | } 691 | 692 | #if defined(_WIN32) && JITASM_XMMINTRIN && JITASM_EMMINTRIN // VC doest not support following parameter passing. 693 | { 694 | __m64 v1 = _mm_set_pi32(1, 2); 695 | __m128i v2 = _mm_set_epi32(3, 4, 5, 6); 696 | __m64 v3 = _mm_set_pi32(7, 8); 697 | __m128 v4 = _mm_set_ps(9.0f, 10.0f, 11.0f, 12.0f); 698 | TEST_EQUAL(_mm_movemask_ps(_mm_cmpeq_ps(test_mix_args()(0, v1, v2, v3, v4), _mm_set_ps(0.0f, 0.0f, 1.0f, 2.0f))), 0x0F); 699 | _mm_empty(); 700 | } 701 | #endif // defined(_WIN32) 702 | #endif // JITASM_MMINTRIN && !defined(_WIN64) 703 | 704 | #if JITASM_XMMINTRIN 705 | { 706 | __m128 v1 = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f); 707 | __m128 v2 = _mm_set_ps(5.0f, 6.0f, 7.0f, 8.0f); 708 | __m128 v3 = _mm_set_ps(9.0f, 10.0f, 11.0f, 12.0f); 709 | TEST_EQUAL(_mm_movemask_ps(_mm_cmpeq_ps(test_m128_args3()(v1, v2, v3), _mm_set_ps(15.0f, 18.0f, 21.0f, 24.0f))), 0x0F) 710 | } 711 | #endif 712 | 713 | TEST_EQUAL(test_ipow1()(2, 0), 1); 714 | TEST_EQUAL(test_ipow1()(2, 3), 8); 715 | TEST_EQUAL(test_ipow2()(2, 0), 1); 716 | TEST_EQUAL(test_ipow2()(2, 3), 8); 717 | TEST_EQUAL(test_fibonacci()(0), 0U); 718 | TEST_EQUAL(test_fibonacci()(10), 55U); 719 | TEST_EQUAL(test_fibonacci()(47), 2971215073U); 720 | } 721 | 722 | void test_backend(); 723 | 724 | int main() 725 | { 726 | test_backend(); 727 | test_register_allocation(); 728 | test_calling_convention(); 729 | test_execute(); 730 | 731 | printf("TEST RESULT - %d passed, %d failed\n", g_test_succeeded, g_test_failed); 732 | printf("Assemble time - %d us\n", g_assemble_time); 733 | } 734 | -------------------------------------------------------------------------------- /test/test.h: -------------------------------------------------------------------------------- 1 | #include 2 | #if defined(_WIN32) 3 | #else 4 | #include 5 | #include 6 | #endif 7 | 8 | #define _TOSTR(s) #s 9 | #define TOSTR(s) _TOSTR(s) 10 | #if defined(MASM_TEST) 11 | #define TEST_M(func_name) {test_impl(TOSTR(func_name), masm_ ## func_name);} 12 | #else 13 | #define TEST_M(func_name) 14 | #endif 15 | #if defined(NASM_TEST) 16 | #define TEST_N(func_name) {test_impl(TOSTR(func_name), nasm_ ## func_name);} 17 | #else 18 | #define TEST_N(func_name) 19 | #endif 20 | #define TEST_EQUAL(actual, expected) {long long int beg_time = get_time(); test_equal_impl(TOSTR(actual), (actual), (expected), beg_time);} 21 | 22 | extern int g_test_succeeded; 23 | extern int g_test_failed; 24 | extern int g_assemble_time; // us 25 | 26 | inline long long int get_time() 27 | { 28 | #if defined(_WIN32) 29 | LARGE_INTEGER t, f; 30 | ::QueryPerformanceCounter(&t); 31 | ::QueryPerformanceFrequency(&f); 32 | return t.QuadPart * 1000 * 1000 / f.QuadPart; 33 | #else 34 | rusage t; 35 | getrusage(RUSAGE_SELF, &t); 36 | return t.ru_utime.tv_sec * 1000 * 1000 + t.ru_utime.tv_usec; 37 | #endif 38 | } 39 | 40 | template 41 | void test_impl(const char* func_name, Fn2 fn2) 42 | { 43 | Fn1 fn1; 44 | 45 | const long long int beg_time = get_time(); 46 | fn1.Assemble(); 47 | const long long int end_time = get_time(); 48 | g_assemble_time += static_cast(end_time - beg_time); 49 | 50 | size_t size = fn1.GetCodeSize(); 51 | 52 | unsigned char* p1 = (unsigned char*) fn1.GetCode(); 53 | unsigned char* p2 = (unsigned char*) fn2; 54 | if (*p2 == 0xE9) p2 = (unsigned char*) (p2 + (unsigned long&) *(p2 + 1) + 5); 55 | 56 | for (size_t i = 0; i < size; i++) { 57 | if (p1[i] != p2[i]) { 58 | size_t min_i = i > 10 ? i - 10 : 0; 59 | size_t max_i = min_i + 20 < size ? min_i + 20 : size; 60 | 61 | printf("<%s> ... failed (dump %d-%d)\n", func_name, min_i, max_i); 62 | 63 | printf(" expected -> "); 64 | for (size_t j = min_i; j < max_i; j++) printf("%02X,", p2[j]); 65 | printf("\n"); 66 | 67 | printf(" actual -> "); 68 | for (size_t j = min_i; j < max_i; j++) printf("%02X,", p1[j]); 69 | printf("\n"); 70 | 71 | printf(" %*c^^\n", (i - min_i) * 3 + 1, ' '); 72 | 73 | g_test_failed++; 74 | return; 75 | } 76 | } 77 | g_test_succeeded++; 78 | } 79 | 80 | template 81 | void test_equal_impl(const char* func_name, T actual, T expected, long long int beg_time) 82 | { 83 | const long long int end_time = get_time(); 84 | g_assemble_time += static_cast(end_time - beg_time); 85 | 86 | if (actual == expected) { 87 | g_test_succeeded++; 88 | } else { 89 | std::cout << "<" << func_name << "> ... failed."; 90 | std::cout << " expected: " << expected; 91 | std::cout << " actual: " << actual << std::endl; 92 | g_test_failed++; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /test/test.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test.vcxproj", "{3476677A-5842-4A3C-BCC0-A5C695C2A4C9}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug - No NASM|Win32 = Debug - No NASM|Win32 9 | Debug - No NASM|x64 = Debug - No NASM|x64 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release - No NASM|Win32 = Release - No NASM|Win32 13 | Release - No NASM|x64 = Release - No NASM|x64 14 | Release|Win32 = Release|Win32 15 | Release|x64 = Release|x64 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug - No NASM|Win32.ActiveCfg = Debug - No NASM|Win32 19 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug - No NASM|Win32.Build.0 = Debug - No NASM|Win32 20 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug - No NASM|x64.ActiveCfg = Debug - No NASM|x64 21 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug - No NASM|x64.Build.0 = Debug - No NASM|x64 22 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug|Win32.ActiveCfg = Debug|Win32 23 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug|Win32.Build.0 = Debug|Win32 24 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug|x64.ActiveCfg = Debug|x64 25 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Debug|x64.Build.0 = Debug|x64 26 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release - No NASM|Win32.ActiveCfg = Release - No NASM|Win32 27 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release - No NASM|Win32.Build.0 = Release - No NASM|Win32 28 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release - No NASM|x64.ActiveCfg = Release - No NASM|x64 29 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release - No NASM|x64.Build.0 = Release - No NASM|x64 30 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release|Win32.ActiveCfg = Release|Win32 31 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release|Win32.Build.0 = Release|Win32 32 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release|x64.ActiveCfg = Release|x64 33 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9}.Release|x64.Build.0 = Release|x64 34 | EndGlobalSection 35 | GlobalSection(SolutionProperties) = preSolution 36 | HideSolutionNode = FALSE 37 | EndGlobalSection 38 | EndGlobal 39 | -------------------------------------------------------------------------------- /test/test.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug - No NASM 6 | Win32 7 | 8 | 9 | Debug - No NASM 10 | x64 11 | 12 | 13 | Debug 14 | Win32 15 | 16 | 17 | Debug 18 | x64 19 | 20 | 21 | Release - No NASM 22 | Win32 23 | 24 | 25 | Release - No NASM 26 | x64 27 | 28 | 29 | Release 30 | Win32 31 | 32 | 33 | Release 34 | x64 35 | 36 | 37 | 38 | {3476677A-5842-4A3C-BCC0-A5C695C2A4C9} 39 | test 40 | Win32Proj 41 | 42 | 43 | 44 | Application 45 | Unicode 46 | true 47 | v110 48 | 49 | 50 | Application 51 | Unicode 52 | v110 53 | 54 | 55 | Application 56 | Unicode 57 | true 58 | v110 59 | 60 | 61 | Application 62 | Unicode 63 | v110 64 | 65 | 66 | Application 67 | Unicode 68 | true 69 | v110 70 | 71 | 72 | Application 73 | Unicode 74 | v110 75 | 76 | 77 | Application 78 | Unicode 79 | true 80 | v110 81 | 82 | 83 | Application 84 | Unicode 85 | v110 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | <_ProjectFileVersion>10.0.30319.1 118 | $(SolutionDir)$(Configuration)\ 119 | $(Configuration)\ 120 | true 121 | $(SolutionDir)$(Platform)\$(Configuration)\ 122 | $(Platform)\$(Configuration)\ 123 | true 124 | $(SolutionDir)$(Configuration)\ 125 | $(Configuration)\ 126 | false 127 | $(SolutionDir)$(Platform)\$(Configuration)\ 128 | $(Platform)\$(Configuration)\ 129 | false 130 | $(SolutionDir)$(Configuration)\ 131 | $(Configuration)\ 132 | true 133 | $(SolutionDir)$(Platform)\$(Configuration)\ 134 | $(Platform)\$(Configuration)\ 135 | true 136 | $(SolutionDir)$(Configuration)\ 137 | $(Configuration)\ 138 | false 139 | $(SolutionDir)$(Platform)\$(Configuration)\ 140 | $(Platform)\$(Configuration)\ 141 | false 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | Disabled 152 | ..\;%(AdditionalIncludeDirectories) 153 | WIN32;_DEBUG;_CONSOLE;MASM_TEST;NASM_TEST;%(PreprocessorDefinitions) 154 | true 155 | Default 156 | MultiThreadedDebugDLL 157 | false 158 | 159 | 160 | Level3 161 | EditAndContinue 162 | 163 | 164 | true 165 | Console 166 | 167 | 168 | MachineX86 169 | false 170 | 171 | 172 | Testing... 173 | "$(OutDir)$(ProjectName).exe" 174 | 175 | 176 | 177 | 178 | X64 179 | 180 | 181 | Disabled 182 | ..\;%(AdditionalIncludeDirectories) 183 | WIN32;_DEBUG;_CONSOLE;MASM_TEST;NASM_TEST;%(PreprocessorDefinitions) 184 | true 185 | Default 186 | MultiThreadedDebugDLL 187 | false 188 | 189 | 190 | Level3 191 | ProgramDatabase 192 | 193 | 194 | true 195 | Console 196 | 197 | 198 | MachineX64 199 | 200 | 201 | Testing... 202 | IF EXIST %windir%\SysWow64 ("$(OutDir)$(ProjectName).exe") ELSE echo Skipped. 203 | 204 | 205 | 206 | 207 | MaxSpeed 208 | true 209 | ..\;%(AdditionalIncludeDirectories) 210 | WIN32;NDEBUG;_CONSOLE;MASM_TEST;NASM_TEST;%(PreprocessorDefinitions) 211 | Default 212 | MultiThreadedDLL 213 | false 214 | true 215 | 216 | 217 | Level4 218 | ProgramDatabase 219 | 220 | 221 | true 222 | Console 223 | true 224 | true 225 | UseLinkTimeCodeGeneration 226 | MachineX86 227 | false 228 | 229 | 230 | Testing... 231 | "$(OutDir)$(ProjectName).exe" 232 | 233 | 234 | 235 | 236 | X64 237 | 238 | 239 | MaxSpeed 240 | true 241 | ..\;%(AdditionalIncludeDirectories) 242 | WIN32;NDEBUG;_CONSOLE;MASM_TEST;NASM_TEST;%(PreprocessorDefinitions) 243 | Default 244 | MultiThreadedDLL 245 | false 246 | true 247 | 248 | 249 | Level4 250 | ProgramDatabase 251 | 252 | 253 | true 254 | Console 255 | true 256 | true 257 | UseLinkTimeCodeGeneration 258 | MachineX64 259 | 260 | 261 | Testing... 262 | IF EXIST %windir%\SysWow64 ("$(OutDir)$(ProjectName).exe") ELSE echo Skipped. 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | Disabled 274 | ..\;%(AdditionalIncludeDirectories) 275 | WIN32;_DEBUG;_CONSOLE;MASM_TEST;%(PreprocessorDefinitions) 276 | true 277 | Default 278 | MultiThreadedDebugDLL 279 | false 280 | 281 | 282 | Level3 283 | EditAndContinue 284 | 285 | 286 | true 287 | Console 288 | 289 | 290 | MachineX86 291 | false 292 | 293 | 294 | Testing... 295 | "$(OutDir)$(ProjectName).exe" 296 | 297 | 298 | 299 | 300 | X64 301 | 302 | 303 | Disabled 304 | ..\;%(AdditionalIncludeDirectories) 305 | WIN32;_DEBUG;_CONSOLE;MASM_TEST;%(PreprocessorDefinitions) 306 | true 307 | Default 308 | MultiThreadedDebugDLL 309 | false 310 | 311 | 312 | Level3 313 | ProgramDatabase 314 | 315 | 316 | true 317 | Console 318 | 319 | 320 | MachineX64 321 | 322 | 323 | Testing... 324 | IF EXIST %windir%\SysWow64 ("$(OutDir)$(ProjectName).exe") ELSE echo Skipped. 325 | 326 | 327 | 328 | 329 | MaxSpeed 330 | true 331 | ..\;%(AdditionalIncludeDirectories) 332 | WIN32;NDEBUG;_CONSOLE;MASM_TEST;%(PreprocessorDefinitions) 333 | Default 334 | MultiThreadedDLL 335 | false 336 | true 337 | 338 | 339 | Level4 340 | ProgramDatabase 341 | 342 | 343 | true 344 | Console 345 | true 346 | true 347 | 348 | 349 | MachineX86 350 | false 351 | 352 | 353 | Testing... 354 | "$(OutDir)$(ProjectName).exe" 355 | 356 | 357 | 358 | 359 | X64 360 | 361 | 362 | MaxSpeed 363 | true 364 | ..\;%(AdditionalIncludeDirectories) 365 | WIN32;NDEBUG;_CONSOLE;MASM_TEST;%(PreprocessorDefinitions) 366 | Default 367 | MultiThreadedDLL 368 | false 369 | true 370 | 371 | 372 | Level4 373 | ProgramDatabase 374 | 375 | 376 | true 377 | Console 378 | true 379 | true 380 | 381 | 382 | MachineX64 383 | 384 | 385 | Testing... 386 | IF EXIST %windir%\SysWow64 ("$(OutDir)$(ProjectName).exe") ELSE echo Skipped. 387 | 388 | 389 | 390 | 391 | true 392 | true 393 | [yasm] assembling... 394 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win64 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=amd64 "%(FullPath)" 395 | $(IntDir)%(Filename).obj;%(Outputs) 396 | true 397 | [yasm] assembling... 398 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win64 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=amd64 "%(FullPath)" 399 | $(IntDir)%(Filename).obj;%(Outputs) 400 | true 401 | true 402 | [yasm] assembling... 403 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win64 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=amd64 "%(FullPath)" 404 | $(IntDir)%(Filename).obj;%(Outputs) 405 | true 406 | [yasm] assembling... 407 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win64 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=amd64 "%(FullPath)" 408 | $(IntDir)%(Filename).obj;%(Outputs) 409 | 410 | 411 | true 412 | [yasm] assembling... 413 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win32 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=x86 "%(FullPath)" 414 | $(IntDir)%(Filename).obj;%(Outputs) 415 | true 416 | [yasm] assembling... 417 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win32 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=x86 "%(FullPath)" 418 | $(IntDir)%(Filename).obj;%(Outputs) 419 | true 420 | true 421 | [yasm] assembling... 422 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win32 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=x86 "%(FullPath)" 423 | $(IntDir)%(Filename).obj;%(Outputs) 424 | true 425 | [yasm] assembling... 426 | yasm-1.2.0-win32.exe --arch=x86 --parser=nasm --preproc=nasm --oformat=win32 --lformat=nasm --list="$(IntDir)%(Filename).lst" --objfile="$(IntDir)%(Filename).obj" --machine=x86 "%(FullPath)" 427 | $(IntDir)%(Filename).obj;%(Outputs) 428 | true 429 | 430 | 431 | true 432 | [masm] assembling... 433 | "$(VSInstallDir)VC\bin\x86_amd64\ml64" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 434 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 435 | true 436 | [masm] assembling... 437 | "$(VSInstallDir)VC\bin\x86_amd64\ml64" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 438 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 439 | true 440 | [masm] assembling... 441 | "$(VSInstallDir)VC\bin\x86_amd64\ml64" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 442 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 443 | true 444 | [masm] assembling... 445 | "$(VSInstallDir)VC\bin\x86_amd64\ml64" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 446 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 447 | Document 448 | true 449 | $(IntDir)%(FileName).lst 450 | true 451 | $(IntDir)%(FileName).lst 452 | true 453 | $(IntDir)%(FileName).lst 454 | true 455 | $(IntDir)%(FileName).lst 456 | 457 | 458 | [masm] assembling... 459 | "$(VSInstallDir)VC\bin\ml" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 460 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 461 | true 462 | [masm] assembling... 463 | "$(VSInstallDir)VC\bin\ml" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 464 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 465 | true 466 | [masm] assembling... 467 | "$(VSInstallDir)VC\bin\ml" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 468 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 469 | true 470 | [masm] assembling... 471 | "$(VSInstallDir)VC\bin\ml" /c /Zi /Sa "/Fl$(IntDir)%(Filename).lst" "/Fo$(IntDir)%(Filename).obj" "%(FullPath)" 472 | $(IntDir)%(Filename).obj;$(IntDir)%(Filename).lst;%(Outputs) 473 | true 474 | Document 475 | $(IntDir)%(FileName).lst 476 | $(IntDir)%(FileName).lst 477 | $(IntDir)%(FileName).lst 478 | $(IntDir)%(FileName).lst 479 | true 480 | true 481 | true 482 | true 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | -------------------------------------------------------------------------------- /test/test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | 14 | 15 | Source Files 16 | 17 | 18 | Source Files 19 | 20 | 21 | 22 | 23 | Header Files 24 | 25 | 26 | Header Files 27 | 28 | 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | 38 | 39 | Source Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | --------------------------------------------------------------------------------