├── elftoc
├── sstrip
├── calc-hash.sh
├── selfmd5-test
├── trim-asm.sh
├── .github
    └── workflows
    │   └── ccpp.yml
├── .gitignore
├── change-asm.sh
├── LICENSE
├── main-src.s
├── main-src.c
├── selfmd5.h
├── trim-src.c
├── calc-hash.c
└── README.md


/elftoc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fengjixuchui/selfmd5/HEAD/elftoc


--------------------------------------------------------------------------------
/sstrip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fengjixuchui/selfmd5/HEAD/sstrip


--------------------------------------------------------------------------------
/calc-hash.sh:
--------------------------------------------------------------------------------
1 | gcc -o calc-hash calc-hash.c && ./calc-hash selfmd5-test
2 | 


--------------------------------------------------------------------------------
/selfmd5-test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fengjixuchui/selfmd5/HEAD/selfmd5-test


--------------------------------------------------------------------------------
/trim-asm.sh:
--------------------------------------------------------------------------------
1 | ./elftoc selfmd5 > selfmd5.h
2 | g++ -g -o trim-elf trim-src.c && ./trim-elf
3 | 


--------------------------------------------------------------------------------
/.github/workflows/ccpp.yml:
--------------------------------------------------------------------------------
 1 | name: C/C++ CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: build
17 |       run: |
18 |         ./selfmd5-test
19 |         md5sum selfmd5-test
20 |     
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/change-asm.sh:
--------------------------------------------------------------------------------
 1 | sed -i "s/.section.*//g" main-src.s
 2 | sed -i "s/.globl	main/.globl	_start/g" main-src.s
 3 | sed -i "s/.type	main, @function/.type	_start, @function/g" main-src.s
 4 | sed -i "s/main:/_start:/g" main-src.s
 5 | sed -i "s/.ident	\"GCC: (GNU) 9.3.0\"//g" main-src.s
 6 | sed -i "s/call	open/mov	\$2, %al\n  syscall/g" main-src.s
 7 | sed -i "s/call	read/xor	%al, %al\n  syscall/g" main-src.s
 8 | sed -i "s/call	write/mov	\$1, %al\n  syscall/g" main-src.s
 9 | sed -i "s/call	exit/mov	\$60, %al\n  syscall/g" main-src.s
10 | sed -i "s/pushq.*//g" main-src.s
11 | sed -i "s/.size	main, .-main//g" main-src.s
12 | sed -i "s/.align 4//g" main-src.s
13 | sed -i "s/movl	\$1, %edx/mov \$1, %dl/g" main-src.s
14 | sed -i "s/.align 16//g" main-src.s
15 | sed -i "s/vmovdqa/vmovdqu/g" main-src.s
16 | sed -i "s/movsbq	%r11b, %r11//g" main-src.s
17 | sed -i "s/movslq	%ecx, %rcx//g" main-src.s
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 zhao xin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/main-src.s:
--------------------------------------------------------------------------------
  1 | 	.file	"main-src.c"
  2 | 	.text
  3 | 	
  4 | 	.globl	_start
  5 | 	.type	_start, @function
  6 | _start:
  7 | 
  8 | 	subq	$64, %rsp
  9 | 	xorl	%edx, %edx
 10 | 	movb	$-128, 4194778
 11 | 	vmovdqu	.LC0(%rip), %xmm0
 12 | 	vmovaps	%xmm0, 32(%rsp)
 13 | 	vmovdqu	.LC2(%rip), %xmm0
 14 | 	movl	36(%rsp), %edi
 15 | 	movl	32(%rsp), %r9d
 16 | 	movl	40(%rsp), %esi
 17 | 	movq	$3792, 4194808
 18 | 	movl	44(%rsp), %r8d
 19 | 	vmovaps	%xmm0, 48(%rsp)
 20 | .L6:
 21 | 	movl	%edx, %eax
 22 | 	movl	%edx, %ecx
 23 | 	sarb	$4, %al
 24 | 	movsbl	%al, %r10d
 25 | 	cmpb	$2, %al
 26 | 	je	.L2
 27 | 	cmpb	$3, %al
 28 | 	je	.L3
 29 | 	decb	%al
 30 | 	je	.L4
 31 | 	movl	%esi, %eax
 32 | 	movl	%edx, %r11d
 33 | 	xorl	%r8d, %eax
 34 | 	andl	%edi, %eax
 35 | 	xorl	%r8d, %eax
 36 | 	jmp	.L5
 37 | .L4:
 38 | 	movl	%edi, %eax
 39 | 	leal	(%rdx,%rdx,4), %r11d
 40 | 	xorl	%esi, %eax
 41 | 	incl	%r11d
 42 | 	andl	%r8d, %eax
 43 | 	xorl	%esi, %eax
 44 | 	jmp	.L15
 45 | .L2:
 46 | 	movl	%edi, %eax
 47 | 	leal	(%rdx,%rdx,2), %r11d
 48 | 	xorl	%esi, %eax
 49 | 	addl	$5, %r11d
 50 | 	xorl	%r8d, %eax
 51 | 	jmp	.L15
 52 | .L3:
 53 | 	movl	%r8d, %eax
 54 | 	imull	$7, %edx, %r11d
 55 | 	notl	%eax
 56 | 	orl	%edi, %eax
 57 | 	xorl	%esi, %eax
 58 | .L15:
 59 | 	andl	$15, %r11d
 60 | .L5:
 61 | 	incl	%edx
 62 | 	movl	%edx, 8(%rsp)
 63 | 	fildl	8(%rsp)
 64 | #APP
 65 | # 21 "main-src.c" 1
 66 | 	fsin
 67 | 	
 68 | # 0 "" 2
 69 | #NO_APP
 70 | 	fabs
 71 | 	fmuls	.LC1(%rip)
 72 | 	andl	$3, %ecx
 73 | 	
 74 | 	leal	(%rcx,%r10,4), %ecx
 75 | 	
 76 | 	movsbl	48(%rsp,%rcx), %ecx
 77 | 	fisttpq	8(%rsp)
 78 | 	movq	8(%rsp), %rbx
 79 | 	addl	%ebx, %eax
 80 | 	addl	4194752(,%r11,4), %eax
 81 | 	addl	%r9d, %eax
 82 | 	movl	%r8d, %r9d
 83 | 	roll	%cl, %eax
 84 | 	addl	%edi, %eax
 85 | 	cmpl	$64, %edx
 86 | 	je	.L17
 87 | 	movl	%esi, %r8d
 88 | 	movl	%edi, %esi
 89 | 	movl	%eax, %edi
 90 | 	jmp	.L6
 91 | .L17:
 92 | 	addl	%r8d, 32(%rsp)
 93 | 	xorl	%ebx, %ebx
 94 | 	addl	%eax, 36(%rsp)
 95 | 	addl	%edi, 40(%rsp)
 96 | 	addl	%esi, 44(%rsp)
 97 | .L10:
 98 | 	movl	%ebx, %eax
 99 | 	movl	%ebx, %edx
100 | 	shrb	%al
101 | 	andl	$1, %edx
102 | 	cmpb	$1, %dl
103 | 	movzbl	%al, %eax
104 | 	sbbl	%ecx, %ecx
105 | 	movzbl	32(%rsp,%rax), %eax
106 | 	andl	$4, %ecx
107 | 	sarl	%cl, %eax
108 | 	andl	$15, %eax
109 | 	leal	48(%rax), %edx
110 | 	cmpb	$9, %al
111 | 	jle	.L9
112 | 	leal	87(%rax), %edx
113 | .L9:
114 | 	movb	%dl, 31(%rsp)
115 | 	leaq	31(%rsp), %rsi
116 | 	mov $1, %dl
117 | 	incl	%ebx
118 | 	movl	$1, %edi
119 | 	mov	$1, %al
120 |   syscall
121 | 	cmpb	$32, %bl
122 | 	jne	.L10
123 | 	xorl	%edi, %edi
124 | 	mov	$60, %al
125 |   syscall
126 | 	
127 | 	
128 | 
129 | 	
130 | .LC1:
131 | 	.long	1333788672
132 | 
133 | .LC2:
134 | 	.quad	1445102447882210311
135 | 	.quad	1517442620720155396
136 | 
137 | .LC0:
138 | 	.long	1552271408
139 | 	.long	1380605251
140 | 	.long	293175058
141 | 	.long	-760172391
142 | 


--------------------------------------------------------------------------------
/main-src.c:
--------------------------------------------------------------------------------
  1 | #include <fcntl.h>
  2 | #include <unistd.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #define BLOCK_LEN 64  // In bytes
  6 | 
  7 | /****************************** MACROS ******************************/
  8 | #define ROTLEFT(a, b)  ((a << b) | (a >> (32 - b)))
  9 | 
 10 | #define FF(x, y, z)  ((x & y) | (~x & z))
 11 | 
 12 | #define GG(x, y, z)  ((x & z) | (y & ~z))
 13 | 
 14 | #define HH(x, y, z) (x ^ y ^ z)
 15 | 
 16 | #define II(x, y, z)  (y ^ (x | ~z))
 17 | 
 18 | static long double fsin_my(long double a) {
 19 |     long double res;
 20 |     // prof wiht register
 21 |     asm __volatile__("fsin\n\t"
 22 |     :"=t"(res)
 23 |     :"0"(a)
 24 |     :"memory");
 25 | 
 26 |     return (res) > 0 ? res : -res;
 27 | }
 28 | 
 29 | typedef unsigned int v4si __attribute__ ((vector_size (16)));
 30 | 
 31 | #define START 0x400000
 32 | 
 33 | int main(int argc, char *argv[]) {
 34 | 
 35 |     char *data = (char *) START;
 36 | 
 37 |     v4si hash = {(unsigned int) (0x67452301), (unsigned int) (0xEFCDAB89), (unsigned int) (0x98BADCFE),
 38 |                    (unsigned int) (0x10325476)};
 39 | 
 40 |     const short len = 474;
 41 |     const short new_len = ((((len + 8) / 64) + 1) * 64) - 8;
 42 |     data[len] = 0x80;
 43 |     *(unsigned long long *) (data + new_len) = len << 3;
 44 |     const short off = new_len - (new_len % BLOCK_LEN);
 45 | 
 46 |     unsigned int *m = (unsigned int *) &data[off];
 47 | 
 48 |     unsigned int A = hash[0];
 49 |     unsigned int B = hash[1];
 50 |     unsigned int C = hash[2];
 51 |     unsigned int D = hash[3];
 52 | 
 53 |     const char ss[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};
 54 | 
 55 |     for (char i = 0; i < 64; ++i) {
 56 | 
 57 |         unsigned int F;
 58 |         char g;
 59 |         switch (i / 16) {
 60 |             case 0:
 61 |                 F = FF(B, C, D);
 62 |                 g = i;
 63 |                 break;
 64 |             case 1:
 65 |                 F = GG(B, C, D);
 66 |                 g = (5 * i + 1) % 16;
 67 |                 break;
 68 |             case 2:
 69 |                 F = HH(B, C, D);
 70 |                 g = (3 * i + 5) % 16;
 71 |                 break;
 72 |             case 3:
 73 |                 F = II(B, C, D);
 74 |                 g = (7 * i) % 16;
 75 |                 break;
 76 |         }
 77 | 
 78 |         unsigned int K = (unsigned int) (((unsigned long long) 1 << 32) * fsin_my(i + 1));
 79 | 
 80 |         F += A + K + m[g];
 81 | 
 82 |         A = D;
 83 |         D = C;
 84 |         C = B;
 85 |         B = B + ROTLEFT(F, ss[(i / 16) * 4 + (i % 4)]);
 86 |     }
 87 | 
 88 |     hash[0] += A;
 89 |     hash[1] += B;
 90 |     hash[2] += C;
 91 |     hash[3] += D;
 92 | 
 93 |     unsigned char *buf = (unsigned char *) &hash[0];
 94 |     for (
 95 |             unsigned char i = 0;
 96 |             i < 32; i++) {
 97 |         char a = (buf[i / 2] >> (4 * (1 - i % 2))) & 0xF;
 98 |         char c = a >= 10 ? a + ('a' - 10) : a + '0';
 99 |         write(1, &c, 1);
100 |     }
101 | 
102 |     exit(0);
103 | }
104 | 


--------------------------------------------------------------------------------
/selfmd5.h:
--------------------------------------------------------------------------------
 1 | #include <stddef.h>
 2 | #include <elf.h>
 3 | 
 4 | #define ADDR_TEXT 0x00400000
 5 | 
 6 | typedef struct elf
 7 | {
 8 |   Elf64_Ehdr      ehdr;
 9 |   Elf64_Phdr      phdrs[1];
10 |   unsigned char   text[368];
11 | } elf;
12 | 
13 | elf foo =
14 | {
15 |   /* ehdr */
16 |   {
17 |     { 0x7F, 'E', 'L', 'F', ELFCLASS64, ELFDATA2LSB, EV_CURRENT, ELFOSABI_SYSV,
18 |       0, 0, 0, 0, 0, 0, 0, 0 },
19 |     ET_EXEC, EM_X86_64, EV_CURRENT, ADDR_TEXT + offsetof(elf, text),
20 |     offsetof(elf, phdrs), 0, 0, sizeof(Elf64_Ehdr), sizeof(Elf64_Phdr), 1,
21 |     sizeof(Elf64_Shdr), 0, SHN_UNDEF
22 |   },
23 |   /* phdrs */
24 |   {
25 |     { PT_LOAD, PF_R | PF_W | PF_X, offsetof(elf, text),
26 |       ADDR_TEXT + offsetof(elf, text), ADDR_TEXT + offsetof(elf, text),
27 |       sizeof foo.text, sizeof foo.text, 4 }
28 |   },
29 |   /* text */
30 |   {
31 |     0x48, 0x83, 0xEC, 0x40, 0x31, 0xD2, 0xC6, 0x04, 0x25, 0xDA, 0x01, 0x40,
32 |     0x00, 0x80, 0xC5, 0xFA, 0x6F, 0x05, 0x4A, 0x01, 0x00, 0x00, 0xC5, 0xF8,
33 |     0x29, 0x44, 0x24, 0x20, 0xC5, 0xFA, 0x6F, 0x05, 0x2C, 0x01, 0x00, 0x00,
34 |     0x8B, 0x7C, 0x24, 0x24, 0x44, 0x8B, 0x4C, 0x24, 0x20, 0x8B, 0x74, 0x24,
35 |     0x28, 0x48, 0xC7, 0x04, 0x25, 0xF8, 0x01, 0x40, 0x00, 0xD0, 0x0E, 0x00,
36 |     0x00, 0x44, 0x8B, 0x44, 0x24, 0x2C, 0xC5, 0xF8, 0x29, 0x44, 0x24, 0x30,
37 |     0x89, 0xD0, 0x89, 0xD1, 0xC0, 0xF8, 0x04, 0x44, 0x0F, 0xBE, 0xD0, 0x3C,
38 |     0x02, 0x74, 0x29, 0x3C, 0x03, 0x74, 0x36, 0xFE, 0xC8, 0x74, 0x0F, 0x89,
39 |     0xF0, 0x41, 0x89, 0xD3, 0x44, 0x31, 0xC0, 0x21, 0xF8, 0x44, 0x31, 0xC0,
40 |     0xEB, 0x34, 0x89, 0xF8, 0x44, 0x8D, 0x1C, 0x92, 0x31, 0xF0, 0x41, 0xFF,
41 |     0xC3, 0x44, 0x21, 0xC0, 0x31, 0xF0, 0xEB, 0x1E, 0x89, 0xF8, 0x44, 0x8D,
42 |     0x1C, 0x52, 0x31, 0xF0, 0x41, 0x83, 0xC3, 0x05, 0x44, 0x31, 0xC0, 0xEB,
43 |     0x0D, 0x44, 0x89, 0xC0, 0x44, 0x6B, 0xDA, 0x07, 0xF7, 0xD0, 0x09, 0xF8,
44 |     0x31, 0xF0, 0x41, 0x83, 0xE3, 0x0F, 0xFF, 0xC2, 0x89, 0x54, 0x24, 0x08,
45 |     0xDB, 0x44, 0x24, 0x08, 0xD9, 0xFE, 0xD9, 0xE1, 0xD8, 0x0D, 0x96, 0x00,
46 |     0x00, 0x00, 0x83, 0xE1, 0x03, 0x42, 0x8D, 0x0C, 0x91, 0x0F, 0xBE, 0x4C,
47 |     0x0C, 0x30, 0xDD, 0x4C, 0x24, 0x08, 0x48, 0x8B, 0x5C, 0x24, 0x08, 0x01,
48 |     0xD8, 0x42, 0x03, 0x04, 0x9D, 0xC0, 0x01, 0x40, 0x00, 0x44, 0x01, 0xC8,
49 |     0x45, 0x89, 0xC1, 0xD3, 0xC0, 0x01, 0xF8, 0x83, 0xFA, 0x40, 0x74, 0x0C,
50 |     0x41, 0x89, 0xF0, 0x89, 0xFE, 0x89, 0xC7, 0xE9, 0x58, 0xFF, 0xFF, 0xFF,
51 |     0x44, 0x01, 0x44, 0x24, 0x20, 0x31, 0xDB, 0x01, 0x44, 0x24, 0x24, 0x01,
52 |     0x7C, 0x24, 0x28, 0x01, 0x74, 0x24, 0x2C, 0x89, 0xD8, 0x89, 0xDA, 0xD0,
53 |     0xE8, 0x83, 0xE2, 0x01, 0x80, 0xFA, 0x01, 0x0F, 0xB6, 0xC0, 0x19, 0xC9,
54 |     0x0F, 0xB6, 0x44, 0x04, 0x20, 0x83, 0xE1, 0x04, 0xD3, 0xF8, 0x83, 0xE0,
55 |     0x0F, 0x8D, 0x50, 0x30, 0x3C, 0x09, 0x7E, 0x03, 0x8D, 0x50, 0x57, 0x88,
56 |     0x54, 0x24, 0x1F, 0x48, 0x8D, 0x74, 0x24, 0x1F, 0xB2, 0x01, 0xFF, 0xC3,
57 |     0xBF, 0x01, 0x00, 0x00, 0x00, 0xB0, 0x01, 0x0F, 0x05, 0x80, 0xFB, 0x20,
58 |     0x75, 0xBD, 0x31, 0xFF, 0xB0, 0x3C, 0x0F, 0x05, 0x00, 0x00, 0x80, 0x4F,
59 |     0x07, 0x0C, 0x11, 0x16, 0x05, 0x09, 0x0E, 0x14, 0x04, 0x0B, 0x10, 0x17,
60 |     0x06, 0x0A, 0x0F, 0x15, 0x30, 0xC8, 0x85, 0x5C, 0x43, 0x5D, 0x4A, 0x52,
61 |     0x12, 0x7F, 0x79, 0x11, 0x99, 0xB0, 0xB0, 0xD2
62 |   }
63 | };
64 | 


--------------------------------------------------------------------------------
/trim-src.c:
--------------------------------------------------------------------------------
 1 | #include "selfmd5.h"
 2 | #include <fcntl.h>
 3 | #include <unistd.h>
 4 | #include <stdlib.h>
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | //typedef struct {
 9 | //    unsigned char e_ident[EI_NIDENT];     /* Magic number and other info */
10 | //    Elf64_Half e_type;                 /* Object file type */
11 | //    Elf64_Half e_machine;              /* Architecture */
12 | //    Elf64_Word e_version;              /* Object file version */
13 | //    Elf64_Addr e_entry;                /* Entry point virtual address */
14 | //    Elf64_Off e_phoff;                /* Program header table file offset */
15 | //    Elf64_Off e_shoff;                /* Section header table file offset */
16 | //    Elf64_Word e_flags;                /* Processor-specific flags */
17 | //    Elf64_Half e_ehsize;               /* ELF header size in bytes */
18 | //    Elf64_Half e_phentsize;            /* Program header table entry size */
19 | //    Elf64_Half e_phnum;                /* Program header table entry count */
20 | //    Elf64_Half e_shentsize;            /* Section header table entry size */
21 | //    Elf64_Half e_shnum;                /* Section header table entry count */
22 | //    Elf64_Half e_shstrndx;             /* Section header string table index */
23 | //} Elf64_Ehdr;
24 | 
25 | //typedef struct
26 | //{
27 | //    Elf64_Word    p_type;                 /* Segment type */
28 | //    Elf64_Word    p_flags;                /* Segment flags */
29 | //    Elf64_Off     p_offset;               /* Segment file offset */
30 | //    Elf64_Addr    p_vaddr;                /* Segment virtual address */
31 | //    Elf64_Addr    p_paddr;                /* Segment physical address */
32 | //    Elf64_Xword   p_filesz;               /* Segment size in file */
33 | //    Elf64_Xword   p_memsz;                /* Segment size in memory */
34 | //    Elf64_Xword   p_align;                /* Segment alignment */
35 | //} Elf64_Phdr;
36 | 
37 | int main(int argc, char *argv[]) {
38 | 
39 |     size_t size = sizeof(foo);// - sizeof(foo._end);
40 | 
41 |     for (int i = 8; i < 16; ++i) {
42 |         foo.ehdr.e_ident[i] = 0xFF;
43 |     }
44 |     foo.ehdr.e_version = 0xFFFFFFFF;
45 |     foo.ehdr.e_shoff = 0xFFFFFFFFFFFFFFFF;
46 |     foo.ehdr.e_flags = 0xFFFFFFFF;
47 | 
48 |     printf("text offset %d\n", offsetof(elf, text));
49 | 
50 |     foo.ehdr.e_entry = ADDR_TEXT + offsetof(elf, ehdr) + 8;
51 |     printf("new entry %d\n", offsetof(elf, ehdr) + 8);
52 | 
53 |     // copy 4 bytes
54 |     foo.ehdr.e_ident[8] = foo.text[0];
55 |     foo.ehdr.e_ident[9] = foo.text[1];
56 |     foo.ehdr.e_ident[10] = foo.text[2];
57 |     foo.ehdr.e_ident[11] = foo.text[3];
58 |     foo.ehdr.e_ident[12] = 0xEB;
59 |     foo.ehdr.e_ident[13] = (offsetof(elf, ehdr) + offsetof(Elf64_Ehdr, e_version)) - (offsetof(elf, ehdr) + 14);
60 |     printf("jmp %d\n", foo.ehdr.e_ident[13]);
61 | 
62 |     for (int i = 0; i < sizeof(foo.text) - 4; ++i) {
63 |         foo.text[i] = foo.text[i + 4];
64 |     }
65 |     size -= 4;
66 | 
67 |     // copy 2 bytes
68 |     ((char *) (&foo.ehdr.e_version))[0] = foo.text[0];
69 |     ((char *) (&foo.ehdr.e_version))[1] = foo.text[1];
70 |     ((char *) (&foo.ehdr.e_version))[2] = 0xEB;
71 |     ((char *) (&foo.ehdr.e_version))[3] =
72 |             (offsetof(elf, ehdr) + offsetof(Elf64_Ehdr, e_shoff)) -
73 |             (offsetof(elf, ehdr) + offsetof(Elf64_Ehdr, e_version) + 4);
74 | 
75 |     for (int i = 0; i < sizeof(foo.text) - 2; ++i) {
76 |         foo.text[i] = foo.text[i + 2];
77 |     }
78 |     size -= 2;
79 | 
80 |     // copy 8 bytes
81 |     memcpy(&foo.ehdr.e_shoff, &foo.text[0], 8);
82 |     ((char *) (&foo.ehdr.e_shoff))[8] = 0xEB;
83 |     ((char *) (&foo.ehdr.e_shoff))[9] = offsetof(elf, text) -
84 |                                          (offsetof(elf, ehdr) + offsetof(Elf64_Ehdr, e_shoff) + 10);
85 |     printf("jmp %d\n", ((char *) (&foo.ehdr.e_shoff))[9]);
86 | 
87 |     for (int i = 0; i < sizeof(foo.text) - 8; ++i) {
88 |         foo.text[i] = foo.text[i + 8];
89 |     }
90 |     size -= 8;
91 | 
92 |     // output
93 |     FILE *fd = fopen("selfmd5-test", "wb");
94 |     size_t n = fwrite(&foo, size, 1, fd);
95 |     printf("%d %d\n", size, n);
96 |     return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/calc-hash.c:
--------------------------------------------------------------------------------
  1 | #include <fcntl.h>
  2 | #include <unistd.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | 
  6 | #define BLOCK_LEN 64  // In bytes
  7 | #define STATE_LEN 4  // In words
  8 | 
  9 | #define BYTE unsigned char
 10 | 
 11 | /****************************** MACROS ******************************/
 12 | #define ROTLEFT(a, b)  ((a << b) | (a >> (32 - b)))
 13 | 
 14 | #define F(x, y, z)  ((x & y) | (~x & z))
 15 | 
 16 | #define G(x, y, z)  ((x & z) | (y & ~z))
 17 | 
 18 | #define H(x, y, z) (x ^ y ^ z)
 19 | 
 20 | #define I(x, y, z)  (y ^ (x | ~z))
 21 | 
 22 | static long double fsin_my(long double a) {
 23 |     long double res;
 24 |     // prof wiht register
 25 |     asm __volatile__("fsin\n\t"
 26 |     :"=t"(res)
 27 |     :"0"(a)
 28 |     :"memory");
 29 | 
 30 |     return (res) > 0 ? res : -res;
 31 | }
 32 | 
 33 | typedef unsigned int v4si __attribute__ ((vector_size (16)));
 34 | 
 35 | int main(int argc, char *argv[]) {
 36 |     char data[102400];
 37 |     short len = read(open(argv[1], 0, 0), data, sizeof(data));
 38 | 
 39 |     v4si hash = {(unsigned int) (0x67452301), (unsigned int) (0xEFCDAB89), (unsigned int) (0x98BADCFE),
 40 |                  (unsigned int) (0x10325476)};
 41 | 
 42 |     unsigned char *buf = (unsigned char *) &hash[0];
 43 |     for (char i = 7; i >= 0; i--) {
 44 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
 45 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
 46 |         write(1, &c, 1);
 47 |     }
 48 |     write(1, "\n", 1);
 49 | 
 50 |     buf = (unsigned char *) &hash[1];
 51 |     for (char i = 7; i >= 0; i--) {
 52 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
 53 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
 54 |         write(1, &c, 1);
 55 |     }
 56 |     write(1, "\n", 1);
 57 | 
 58 |     buf = (unsigned char *) &hash[2];
 59 |     for (char i = 7; i >= 0; i--) {
 60 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
 61 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
 62 |         write(1, &c, 1);
 63 |     }
 64 |     write(1, "\n", 1);
 65 | 
 66 |     buf = (unsigned char *) &hash[3];
 67 |     for (char i = 7; i >= 0; i--) {
 68 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
 69 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
 70 |         write(1, &c, 1);
 71 |     }
 72 |     write(1, "\n", 1);
 73 |     write(1, "\n", 1);
 74 | 
 75 |     printf("%d\n", (int) hash[0]);
 76 |     printf("%d\n", (int) hash[1]);
 77 |     printf("%d\n", (int) hash[2]);
 78 |     printf("%d\n", (int) hash[3]);
 79 | 
 80 |     short new_len = ((((len + 8) / 64) + 1) * 64) - 8;
 81 |     data[len] = 0x80;
 82 |     *(unsigned long long *) (data + new_len) = len << 3;
 83 |     short off = 0;
 84 |     for (off = 0; off < new_len - BLOCK_LEN; off += BLOCK_LEN) {
 85 |         unsigned int *m = (unsigned int *) &data[off];
 86 | 
 87 |         v4si tmp = hash;
 88 | 
 89 |         const char p1[] = {0, 3, 2, 1};
 90 |         const char mmstart[] = {0, 1, 5, 0};
 91 |         const char mmstep[] = {1, 5, 3, 7};
 92 |         const char ss[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};
 93 |         for (int i = 0; i < 64; ++i) {
 94 |             unsigned int inc;
 95 |             unsigned int b = tmp[p1[(i + 3) % 4]];
 96 |             unsigned int c = tmp[p1[(i + 2) % 4]];
 97 |             unsigned int d = tmp[p1[(i + 1) % 4]];
 98 |             switch (i / 16) {
 99 |                 case 0:
100 |                     inc = F(b, c, d);
101 |                     break;
102 |                 case 1:
103 |                     inc = G(b, c, d);
104 |                     break;
105 |                 case 2:
106 |                     inc = H(b, c, d);
107 |                     break;
108 |                 case 3:
109 |                     inc = I(b, c, d);
110 |                     break;
111 |             }
112 | 
113 |             unsigned int mm = m[(mmstart[i / 16] + (i % 16) * mmstep[i / 16]) % 16];
114 |             unsigned int s = ss[(i / 16) * 4 + (i % 4)];
115 |             unsigned int t = (unsigned int) ((unsigned long long) 4294967296 * fsin_my(i + 1));
116 | 
117 |             tmp[p1[i % 4]] += inc + mm + t;
118 |             tmp[p1[i % 4]] = b + ROTLEFT(tmp[p1[i % 4]], s);
119 |         }
120 | 
121 |         hash += tmp;
122 |     }
123 | 
124 |     write(1, "\n", 1);
125 |     printf("off=%d len=%d new_len=%d\n", off, len, new_len);
126 |     write(1, "\n", 1);
127 | 
128 |     buf = (unsigned char *) &hash[0];
129 |     for (char i = 7; i >= 0; i--) {
130 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
131 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
132 |         write(1, &c, 1);
133 |     }
134 |     write(1, "\n", 1);
135 | 
136 |     buf = (unsigned char *) &hash[1];
137 |     for (char i = 7; i >= 0; i--) {
138 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
139 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
140 |         write(1, &c, 1);
141 |     }
142 |     write(1, "\n", 1);
143 | 
144 |     buf = (unsigned char *) &hash[2];
145 |     for (char i = 7; i >= 0; i--) {
146 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
147 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
148 |         write(1, &c, 1);
149 |     }
150 |     write(1, "\n", 1);
151 | 
152 |     buf = (unsigned char *) &hash[3];
153 |     for (char i = 7; i >= 0; i--) {
154 |         char a = (buf[i / 2] >> (4 * (i % 2))) & 0xF;
155 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
156 |         write(1, &c, 1);
157 |     }
158 |     write(1, "\n", 1);
159 |     write(1, "\n", 1);
160 | 
161 |     printf("%d\n", (int) hash[0]);
162 |     printf("%d\n", (int) hash[1]);
163 |     printf("%d\n", (int) hash[2]);
164 |     printf("%d\n", (int) hash[3]);
165 |     write(1, "\n", 1);
166 | 
167 |     unsigned int *m = (unsigned int *) &data[off];
168 | 
169 |     v4si tmp = hash;
170 | 
171 |     const char p1[] = {0, 3, 2, 1};
172 |     const char mmstart[] = {0, 1, 5, 0};
173 |     const char mmstep[] = {1, 5, 3, 7};
174 |     const char ss[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};
175 |     for (int i = 0; i < 64; ++i) {
176 |         unsigned int inc;
177 |         unsigned int b = tmp[p1[(i + 3) % 4]];
178 |         unsigned int c = tmp[p1[(i + 2) % 4]];
179 |         unsigned int d = tmp[p1[(i + 1) % 4]];
180 |         switch (i / 16) {
181 |             case 0:
182 |                 inc = F(b, c, d);
183 |                 break;
184 |             case 1:
185 |                 inc = G(b, c, d);
186 |                 break;
187 |             case 2:
188 |                 inc = H(b, c, d);
189 |                 break;
190 |             case 3:
191 |                 inc = I(b, c, d);
192 |                 break;
193 |         }
194 | 
195 |         unsigned int mm = m[(mmstart[i / 16] + (i % 16) * mmstep[i / 16]) % 16];
196 |         unsigned int s = ss[(i / 16) * 4 + (i % 4)];
197 |         unsigned int t = (unsigned int) ((unsigned long long) 4294967296 * fsin_my(i + 1));
198 | 
199 |         tmp[p1[i % 4]] += inc + mm + t;
200 |         tmp[p1[i % 4]] = b + ROTLEFT(tmp[p1[i % 4]], s);
201 |     }
202 | 
203 |     hash += tmp;
204 | 
205 |     buf = (unsigned char *) &hash[0];
206 |     for (unsigned char i = 0; i < 32; i++) {
207 |         char a = (buf[i / 2] >> (4 * (1 - i % 2))) & 0xF;
208 |         char c = a >= 10 ? 'a' + (a - 10) : '0' + a;
209 |         write(1, &c, 1);
210 |     }
211 | 
212 |     write(1, "\n", 1);
213 | 
214 |     exit(0);
215 | }
216 | 
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 说明
  2 | selfmd5项目为参加公司一个内部比赛所写，要求输出自身md5的最小程序，必须是64位ELF文件，
  3 | 不能使用socket系统调用。
  4 | 
  5 | 最终以大小决定名次，越小的排名越高。本项目最终大小**474**字节。
  6 | 
  7 | 下面是运行效果:
  8 | ```
  9 | # md5sum selfmd5-test
 10 | 45d0637e0de0eca20e7456b0bad6ee99  selfmd5-test
 11 | # ./selfmd5-test
 12 | 45d0637e0de0eca20e7456b0bad6ee99
 13 | ```
 14 | 
 15 | # 原理
 16 | 实现无外乎两种：
 17 | 1. 打开自己文件，读取，计算md5，输出。这也是最容易想到的方式。
 18 | 2. 某种算法构造出这样一种ELF，恰好能输出自己的md5。
 19 | 
 20 | 由于本人不会第二种方法，所以只能采用第一种方法，用纯工程的方式来构造最小ELF。
 21 | 
 22 | # 实现
 23 | 实现很简单，分为下面3步：
 24 | 1. 代码编写，又分成了读取文件、MD5算法、打印结果三个部分。
 25 | 2. 代码优化
 26 | 3. ELF裁剪
 27 | 
 28 | # 代码编写
 29 | ## 读取文件
 30 | 最简单的也最容易想到的读取代码如下：
 31 | ```
 32 | char data[1024];
 33 | short len = read(open(argv[0], 0, 0), data, sizeof(data));
 34 | ```
 35 | 这里不再赘述，后面优化的时候会再动到这里。
 36 | 
 37 | ## MD5算法
 38 | 从github上随便找点md5的算法，基本都类似下面这样:
 39 | ```
 40 | FF (a, b, c, d, x[ 0], s11, 0xd76aa478);
 41 | FF (d, a, b, c, x[ 1], s12, 0xe8c7b756);
 42 | FF (c, d, a, b, x[ 2], s13, 0x242070db);
 43 | FF (b, c, d, a, x[ 3], s14, 0xc1bdceee);
 44 | FF (a, b, c, d, x[ 4], s11, 0xf57c0faf);
 45 | FF (d, a, b, c, x[ 5], s12, 0x4787c62a);
 46 | FF (c, d, a, b, x[ 6], s13, 0xa8304613);
 47 | FF (b, c, d, a, x[ 7], s14, 0xfd469501);
 48 | FF (a, b, c, d, x[ 8], s11, 0x698098d8);
 49 | FF (d, a, b, c, x[ 9], s12, 0x8b44f7af);
 50 | FF (c, d, a, b, x[10], s13, 0xffff5bb1);
 51 | FF (b, c, d, a, x[11], s14, 0x895cd7be);
 52 | FF (a, b, c, d, x[12], s11, 0x6b901122);
 53 | FF (d, a, b, c, x[13], s12, 0xfd987193);
 54 | FF (c, d, a, b, x[14], s13, 0xa679438e);
 55 | FF (b, c, d, a, x[15], s14, 0x49b40821);
 56 | ...
 57 | ...
 58 | ```
 59 | 可以看到，这种写法固然速度很快，但是编译出来的字节码会很多，大小不符合要求，
 60 | 那么很容易想到，改成循环的是不是就好了呢？
 61 | 
 62 | 通过md5 wiki，可以看到官方实现的算法，基本就是我们最终想要的
 63 | ```
 64 | for each 512-bit chunk of padded message do
 65 |     break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
 66 |     // Initialize hash value for this chunk:
 67 |     var int A := a0
 68 |     var int B := b0
 69 |     var int C := c0
 70 |     var int D := d0
 71 |     // Main loop:
 72 |     for i from 0 to 63 do
 73 |         var int F, g
 74 |         if 0 ≤ i ≤ 15 then
 75 |             F := (B and C) or ((not B) and D)
 76 |             g := i
 77 |         else if 16 ≤ i ≤ 31 then
 78 |             F := (D and B) or ((not D) and C)
 79 |             g := (5×i + 1) mod 16
 80 |         else if 32 ≤ i ≤ 47 then
 81 |             F := B xor C xor D
 82 |             g := (3×i + 5) mod 16
 83 |         else if 48 ≤ i ≤ 63 then
 84 |             F := C xor (B or (not D))
 85 |             g := (7×i) mod 16
 86 |         // Be wary of the below definitions of a,b,c,d
 87 |         F := F + A + K[i] + M[g]  // M[g] must be a 32-bits block
 88 |         A := D
 89 |         D := C
 90 |         C := B
 91 |         B := B + leftrotate(F, s[i])
 92 |     end for
 93 |     // Add this chunk's hash to result so far:
 94 |     a0 := a0 + A
 95 |     b0 := b0 + B
 96 |     c0 := c0 + C
 97 |     d0 := d0 + D
 98 | end for
 99 | ```
100 | 根据wiki的伪码，优化后最终的C代码为:
101 | ```
102 | for (short off = 0; off < new_len; off += BLOCK_LEN) {
103 |     unsigned int *m = (unsigned int *) &data[off];
104 | 
105 |     unsigned int A = hash[0];
106 |     unsigned int B = hash[1];
107 |     unsigned int C = hash[2];
108 |     unsigned int D = hash[3];
109 | 
110 |     const char ss[] = {7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21};
111 | 
112 |     for (char i = 0; i < 64; ++i) {
113 | 
114 |         unsigned int F;
115 |         char g;
116 |         switch (i / 16) {
117 |             case 0:
118 |                 F = FF(B, C, D);
119 |                 g = i;
120 |                 break;
121 |             case 1:
122 |                 F = GG(B, C, D);
123 |                 g = (5 * i + 1) % 16;
124 |                 break;
125 |             case 2:
126 |                 F = HH(B, C, D);
127 |                 g = (3 * i + 5) % 16;
128 |                 break;
129 |             case 3:
130 |                 F = II(B, C, D);
131 |                 g = (7 * i) % 16;
132 |                 break;
133 |         }
134 | 
135 |         unsigned int K = (unsigned int) (((unsigned long long) 1 << 32) * fsin_my(i + 1));
136 | 
137 |         F += A + K + m[g];
138 | 
139 |         A = D;
140 |         D = C;
141 |         C = B;
142 |         B = B + ROTLEFT(F, ss[(i / 16) * 4 + (i % 4)]);
143 |     }
144 | 
145 |     hash[0] += A;
146 |     hash[1] += B;
147 |     hash[2] += C;
148 |     hash[3] += D;
149 | }
150 | ```
151 | 这就是MD5算法的主要部分。这里其实还有优化的地方，后面会讲到。
152 | 
153 | ## 打印结果
154 | 从网上随便找点代码，打印16进制的字符串如下:
155 | ```
156 | for (int i = 0; i < sizeof arr; i ++) {
157 |     printf("%2x", arr[i]);
158 | }
159 | ```
160 | 这里不再赘述，后面优化的时候会再动到这里。
161 | 
162 | # 代码优化
163 | ## 思路
164 | 前面的写法，写完用gcc编译，编译出来的大小基本就是8KB+，显然很大，那么需要优化。
165 | 
166 | 从网上查点资料，比如搜索"最小的hello world"，里面就会有介绍，不能使用libc，用汇编+syscall的方式来减少体积。
167 | 
168 | 那改为用纯汇编来写吗？那应该大概率写的不如gcc好，那么我们就下载最新的gcc 9.3.0，请它帮我们把.c编译成.s汇编代码
169 | 
170 | 转换的指令如下：
171 | ```
172 | gcc -S main-src.c  -Os -mavx -msse -mavx2 -ffast-math -fsingle-precision-constant -fno-verbose-asm -fno-unroll-loops -fno-asynchronous-unwind-tables
173 | ```
174 | 会生成main-src.s的汇编文件，然后我们只需要修改这个main-src.s即可。
175 | 
176 | ## 汇编优化
177 | 观察生成的原始汇编文件，如下:
178 | ```
179 | 	.file	"main-src.c"
180 | 	.text
181 | 	.section	.text.startup,"ax",@progbits
182 | 	.globl	main
183 | 	.type	main, @function
184 | main:
185 | 	movabsq	$-1167088121787636991, %rax
186 | 	pushq	%r14
187 | 	movabsq	$1445102447882210311, %r8
188 | 	movabsq	$1517442620720155396, %r9
189 | 	pushq	%r12
190 | 	pushq	%rbp
191 | 	pushq	%rbx
192 | ```
193 | 既然我们要不依赖libc，那么入口必须改为_start，同时一些没用的指令，如pushq都可以干掉。
194 | 
195 | 我们再观察原始的函数调用：
196 | ```
197 | 	movl	$1, %edi
198 | 	call	write
199 | 	cmpb	$32, %bl
200 | 	jne	.L11
201 | 	xorl	%edi, %edi
202 | 	call	exit
203 | 	.size	main, .-main
204 | ```
205 | 里面的call write和call exit，都要改成syscall的方式，例如：
206 | ```
207 | 	movl	$1, %edi
208 | 	mov	$1, %al
209 | 	syscall
210 | 	cmpb	$32, %bl
211 | 	jne	.L11
212 | 	xorl	%edi, %edi
213 | 	mov	$60, %al
214 | 	syscall
215 | ```
216 | 最终优化汇编的操作放到了./change-asm.sh里，每次gcc转换完汇编后，执行一下即可。
217 | 
218 | ## 读取文件优化
219 | 前面提到，读取文件是用的open、read，但是其实有更简单的方法读取自己，如下：
220 | ```
221 | #define START 0x400000
222 | char *data = (char *) START;
223 | const short len = 474;
224 | ```
225 | 这个0x400000是ELF头里指定的Segment virtual address，从这里就能直接开始读自己在内存中的映射。
226 | 
227 | 并且最终文件大小是固定的，所以len可以直接写死。
228 | 
229 | 但是MD5算法有一个写buffer的操作，如下：
230 | ```
231 | // Pre-processing: adding a single 1 bit
232 | append "1" bit to message    
233 | // Notice: the input bytes are considered as bits strings,
234 | //  where the first bit is the most significant bit of the byte.[50]
235 | 
236 | // Pre-processing: padding with zeros
237 | append "0" bit until message length in bits ≡ 448 (mod 512)
238 | append original length in bits mod 264 to message
239 | ```
240 | text段是无法写的，如果copy出来，又会多余一些操作指令，网上搜一搜，gcc添加一条编译指令，让它可写即可
241 | ```
242 | -Wl,--omagic 
243 | ```
244 | 所以最后gcc编译汇编的指令如下：
245 | ```
246 | gcc -Wl,--omagic -Os -fdata-sections -ffunction-sections -flto main-src.s -o selfmd5 -Wl,--gc-sections -Wl,--strip-all -nostdlib -nostdinc
247 | ```
248 | 
249 | ## MD5算法优化
250 | 观察前面的MD5算法，可以看到由两层循环组成，每64个字节执行一轮MD5计算。
251 | 
252 | 但是其实可以只计算最后一轮，把除掉最后一轮，之前轮的结果都计算出来，然后放到初始化中。这样就能少一层循环。
253 | 
254 | 即构造一个初始hash值，只计算一轮，输出结果，代码如下：
255 | ```
256 | unsigned int A = hash[0];
257 | unsigned int B = hash[1];
258 | unsigned int C = hash[2];
259 | unsigned int D = hash[3];
260 | 
261 | for (char i = 0; i < 64; ++i) {
262 | 
263 |     unsigned int F;
264 |     char g;
265 |     switch (i / 16) {
266 |         case 0:
267 |             F = FF(B, C, D);
268 |             g = i;
269 |             break;
270 |         case 1:
271 |             F = GG(B, C, D);
272 |             g = (5 * i + 1) % 16;
273 |             break;
274 |         case 2:
275 |             F = HH(B, C, D);
276 |             g = (3 * i + 5) % 16;
277 |             break;
278 |         case 3:
279 |             F = II(B, C, D);
280 |             g = (7 * i) % 16;
281 |             break;
282 |     }
283 | 
284 |     unsigned int K = (unsigned int) (((unsigned long long) 1 << 32) * fsin_my(i + 1));
285 | 
286 |     F += A + K + m[g];
287 | 
288 |     A = D;
289 |     D = C;
290 |     C = B;
291 |     B = B + ROTLEFT(F, ss[(i / 16) * 4 + (i % 4)]);
292 | }
293 | 
294 | hash[0] += A;
295 | hash[1] += B;
296 | hash[2] += C;
297 | hash[3] += D;
298 | ```
299 | 这里有个要求，就是hash值必须放在ELF的最后64字节内，这个可以通过调整汇编的段位置解决。
300 | 
301 | 并且需要先编译汇编，然后用工具计算初始hash值，修改汇编，再重新编译一次，两次编译的ELF用工具计算出来的初始hash值一样即可。
302 | 
303 | 计算初始hash值的工具也很简单，去掉最后一轮的计算，打印即可，代码参考calc-hash.c，使用方法：
304 | ```
305 | # ./calc-hash.sh 
306 | 67452301
307 | efcdab89
308 | 98badcfe
309 | 10325476
310 | 
311 | 1732584193
312 | -271733879
313 | -1732584194
314 | 271733878
315 | 
316 | off=448 len=474 new_len=504
317 | 
318 | 5c85c830
319 | 524a5d43
320 | 11797f12
321 | d2b0b099
322 | 
323 | 1552271408
324 | 1380605251
325 | 293175058
326 | -760172391
327 | 
328 | 45d0637e0de0eca20e7456b0bad6ee99
329 | ```
330 | 最后的4个10进制数字，即为初始hash值，复制到汇编中替换即可
331 | ```
332 | .LC0:
333 | 	.long	1552271408
334 | 	.long	1380605251
335 | 	.long	293175058
336 | 	.long	-760172391
337 | ```
338 | 
339 | ## 打印结果优化
340 | 前面说到不能使用libc，所以printf也不能用了，因此必须自己实现一个打印16进制的代码。如下：
341 | ```
342 | for (unsigned char i = 0; i < 32; i++) {
343 |     char a = (buf[i / 2] >> (4 * (1 - i % 2))) & 0xF;
344 |     char c = a >= 10 ? a + ('a' - 10) : a + '0';
345 |     write(1, &c, 1);
346 | }
347 | ```
348 | 注意这里每次循环只打印一个字符，也是为了缩减代码指令的考虑。
349 | 
350 | # ELF裁剪
351 | 前面汇编，编译出的结果基本在1k以内了。所以现在开始对ELF下手。
352 | 
353 | 网上搜一搜相关工具，有个[ELFkickers](https://github.com/BR903/ELFkickers)工具集，里面有很多小工具。
354 | 
355 | 我们需要用到的工具有两个：
356 | 1. sstrip，类似于strip，去掉ELF没用的东西
357 | 2. elftoc，把ELF文件转成C源文件定义
358 | 
359 | ## sstrip裁剪
360 | 很简单，直接运行  
361 | ```
362 | sstrip ./selfmd5  
363 | ```
364 | 能去掉200字节左右
365 | 
366 | ## ELF头裁剪
367 | ELF头部其实有很多字节是可以被修改的，不影响运行，所以可以把code移到ELF头里，通过JMP串联起来
368 | 
369 | 先用elftoc把ELF转成selfmd5.h文件
370 | ```
371 | elftoc ./selfmd5 > selfmd5.h
372 | ```
373 | 这时候观察selfmd5.h，代码如下：
374 | ```
375 | #include <stddef.h>
376 | #include <elf.h>
377 | 
378 | #define ADDR_TEXT 0x00400000
379 | 
380 | typedef struct elf
381 | {
382 |   Elf64_Ehdr      ehdr;
383 |   Elf64_Phdr      phdrs[1];
384 |   unsigned char   text[400];
385 | } elf;
386 | 
387 | elf foo =
388 | {
389 |   /* ehdr */
390 |   {
391 |     { 0x7F, 'E', 'L', 'F', ELFCLASS64, ELFDATA2LSB, EV_CURRENT, ELFOSABI_SYSV,
392 |       0, 0, 0, 0, 0, 0, 0, 0 },
393 |     ET_EXEC, EM_X86_64, EV_CURRENT, ADDR_TEXT + offsetof(elf, text),
394 |     offsetof(elf, phdrs), 0, 0, sizeof(Elf64_Ehdr), sizeof(Elf64_Phdr), 1,
395 |     sizeof(Elf64_Shdr), 0, SHN_UNDEF
396 |   },
397 |   /* phdrs */
398 |   {
399 |     { PT_LOAD, PF_R | PF_W | PF_X, offsetof(elf, text),
400 |       ADDR_TEXT + offsetof(elf, text), ADDR_TEXT + offsetof(elf, text),
401 |       sizeof foo.text, sizeof foo.text, 4 }
402 |   },
403 |   /* text */
404 |   {
405 |     0x48, 0x83, 0xEC, 0x38, 0x31, 0xDB, 0x48, 0xB8, 0x01, 0x23, 0x45, 0x67,
406 |     ...
407 |     ...
408 |     0x00, 0x00, 0x80, 0x4F
409 |   }
410 | };
411 | ```
412 | 可以看到，elftoc把ELF文件以一种易读懂、易操作的方式展现出来了，只需要把foo整块内存写成文件，就是一个ELF可执行程序。
413 | 
414 | 然后通过查阅资料和试验，可以得出修改ELF头的foo.ehdr.e_ident[8]-e_ident[15]、foo.ehdr.e_version、foo.ehdr.e_shoff、foo.ehdr.e_flags不影响运行。ELF的定义在/usr/include/elf.h中。
415 | 
416 | 那么剩下的就是把text里的code挪入到上面的那几个空位，然后再用2字节JMP回来。
417 | 
418 | 写一个trim-src.c完成这些事情，部分代码如下：
419 | ```
420 | // copy 4 bytes
421 | foo.ehdr.e_ident[8] = foo.text[0];
422 | foo.ehdr.e_ident[9] = foo.text[1];
423 | foo.ehdr.e_ident[10] = foo.text[2];
424 | foo.ehdr.e_ident[11] = foo.text[3];
425 | foo.ehdr.e_ident[12] = 0xEB;
426 | foo.ehdr.e_ident[13] = (offsetof(elf, ehdr) + offsetof(Elf64_Ehdr, e_version)) - (offsetof(elf, ehdr) + 14);
427 | printf("jmp %d\n", foo.ehdr.e_ident[13]);
428 | 
429 | for (int i = 0; i < sizeof(foo.text) - 4; ++i) {
430 |     foo.text[i] = foo.text[i + 4];
431 | }
432 | size -= 4;
433 | ```
434 | 需要注意的是，填坑的时候，要注意汇编指令的完整性，比如一条指令10个字节，不能只copy 5个过去。
435 | 
436 | 所以最终的需要对main-src.s汇编文件的前几句汇编指令做下顺序的挪动。
437 | 
438 | # 编译
439 | 下面是具体的操作步骤
440 | 1. 下载最新gcc9.3，也可用docker
441 | 2. 把main-src.c编译成汇编main-src.s
442 | ```
443 | gcc -S main-src.c  -Os -mavx -msse -mavx2 -ffast-math -fsingle-precision-constant -fno-verbose-asm -fno-unroll-loops -fno-asynchronous-unwind-tables
444 | ```
445 | 3. 优化汇编main-src.s
446 | ```
447 | ./change-asm.sh
448 | ```
449 | 4. 手调main-src.s，_start入口的地方，原始如下
450 | ```
451 | xorl	%edx, %edx
452 | subq	$64, %rsp
453 | vmovdqu	.LC0(%rip), %xmm0
454 | vmovaps	%xmm0, 32(%rsp)
455 | vmovdqu	.LC2(%rip), %xmm0
456 | movb	$-128, 4194778
457 | ```
458 | 前三指令调整成4字节+2字节+10字节的形式，如下:
459 | ```
460 | subq	$64, %rsp
461 | xorl	%edx, %edx
462 | movb	$-128, 4194778
463 | vmovdqu	.LC0(%rip), %xmm0
464 | vmovaps	%xmm0, 32(%rsp)
465 | vmovdqu	.LC2(%rip), %xmm0
466 | ```
467 | 再调整.LC0的位置，放到文件末尾：
468 | ```
469 | .LC1:
470 | 	.long	1333788672
471 | 	
472 | .LC2:
473 | 	.quad	1445102447882210311
474 | 	.quad	1517442620720155396
475 | 
476 | .LC0:
477 | 	.long	1732584193
478 | 	.long	-271733879
479 | 	.long	-1732584194
480 | 	.long	271733878
481 | ```
482 | 
483 | 5. 编译main-src.s，并sstrip
484 | ```
485 | ./build-asm.sh
486 | ```
487 | 6. ELF头裁剪
488 | ```
489 | ./trim-asm.sh
490 | ```
491 | 7. 计算初始hash值
492 | ```
493 | ./calc-hash.sh 
494 | ```
495 | 8. 复制4个数字到main-src.s，替换.LC0
496 | ```
497 | .LC0:
498 | 	.long	1552271408
499 | 	.long	1380605251
500 | 	.long	293175058
501 | 	.long	-760172391
502 | ```
503 | 9. 重新执行5、6、7，确保初始hash值没变
504 | ```
505 | ./build-asm.sh
506 | ./trim-asm.sh
507 | ./calc-hash.sh
508 | ```
509 | 10. 最终结果
510 | ```
511 | # md5sum selfmd5-test
512 | 45d0637e0de0eca20e7456b0bad6ee99  selfmd5-test
513 | # ./selfmd5-test
514 | 45d0637e0de0eca20e7456b0bad6ee99
515 | ll selfmd5-test
516 | -rwxr-xr-x 1 root root 474 4月  20 10:35 selfmd5-test
517 | ```
518 | 


--------------------------------------------------------------------------------