├── .gitattributes ├── .gitignore ├── Byte frequency ├── 32bit.txt ├── 64bit.txt ├── byte_frequency_tabulate.py └── ida_get_byte_frequency.py ├── LICENSE.txt ├── Main.cpp ├── README.md ├── Search.cpp ├── Settings.h ├── SigMaker.h ├── SigMakerEx.sln ├── SigMakerEx.vcxproj ├── SigMakerEx.vcxproj.filters ├── SigMakerEx.vcxproj.user ├── Signature.cpp ├── StdAfx.h └── images ├── main.png ├── minimal_func_example.png └── options.png /.gitattributes: -------------------------------------------------------------------------------- 1 | *.png merge=binary 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Misc 3 | .gitconfig 4 | desktop.ini 5 | README.md.backup 6 | Downloads/ 7 | LocalData/ 8 | 9 | # Visual Studio 10 | .vs/ 11 | x64/ 12 | *.aps 13 | *.vcxproj.user 14 | 15 | # PyCharm 16 | .idea/ 17 | -------------------------------------------------------------------------------- /Byte frequency/32bit.txt: -------------------------------------------------------------------------------- 1 | 2 | Largest byte count: 4,278,311 3 | 4 | Byte - Percent - Count 5 | 92: 0.475 20,325 6 | A2: 0.483 20,675 7 | AE: 0.499 21,336 8 | AD: 0.501 21,440 9 | 9A: 0.525 22,466 10 | B2: 0.567 24,243 11 | D5: 0.569 24,359 12 | A6: 0.571 24,449 13 | 67: 0.586 25,086 14 | A9: 0.626 26,765 15 | A7: 0.635 27,163 16 | BB: 0.638 27,314 17 | A5: 0.64 27,393 18 | B3: 0.644 27,534 19 | B1: 0.655 28,013 20 | 91: 0.661 28,263 21 | 93: 0.684 29,269 22 | 99: 0.685 29,327 23 | B5: 0.691 29,546 24 | 27: 0.695 29,738 25 | E7: 0.698 29,865 26 | D7: 0.703 30,095 27 | 37: 0.719 30,776 28 | 63: 0.722 30,910 29 | 9B: 0.726 31,048 30 | AA: 0.726 31,081 31 | 6D: 0.727 31,111 32 | 26: 0.73 31,236 33 | 8F: 0.731 31,264 34 | AF: 0.731 31,286 35 | 6E: 0.736 31,468 36 | 69: 0.739 31,608 37 | BA: 0.74 31,658 38 | 62: 0.754 32,242 39 | 6F: 0.755 32,315 40 | 97: 0.755 32,316 41 | 3A: 0.767 32,832 42 | 2D: 0.776 33,183 43 | 36: 0.789 33,774 44 | DA: 0.792 33,897 45 | E3: 0.794 33,973 46 | 22: 0.815 34,889 47 | 6B: 0.823 35,208 48 | 3E: 0.825 35,275 49 | 9F: 0.827 35,380 50 | 2E: 0.837 35,792 51 | 7B: 0.837 35,819 52 | 7A: 0.839 35,910 53 | 9D: 0.845 36,131 54 | E6: 0.846 36,215 55 | ED: 0.85 36,375 56 | 96: 0.856 36,616 57 | BD: 0.859 36,770 58 | 2A: 0.862 36,894 59 | 65: 0.869 37,187 60 | 2F: 0.877 37,532 61 | BF: 0.888 37,976 62 | 5A: 0.904 38,669 63 | 31: 0.922 39,443 64 | 71: 0.934 39,977 65 | 9E: 0.938 40,126 66 | 1B: 0.962 41,156 67 | EF: 0.973 41,637 68 | 21: 0.977 41,815 69 | 1A: 0.991 42,408 70 | 35: 0.996 42,602 71 | CD: 1.0 42,763 72 | 19: 1.01 43,337 73 | 17: 1.03 44,080 74 | 1E: 1.04 44,298 75 | AB: 1.04 44,419 76 | 3D: 1.04 44,691 77 | 87: 1.05 44,951 78 | 82: 1.05 44,958 79 | 1D: 1.06 45,318 80 | EA: 1.06 45,358 81 | 23: 1.06 45,525 82 | D3: 1.08 46,241 83 | BE: 1.08 46,296 84 | 79: 1.09 46,421 85 | DE: 1.09 46,718 86 | E2: 1.12 47,855 87 | DF: 1.15 49,353 88 | 3F: 1.15 49,365 89 | 8E: 1.17 50,018 90 | 13: 1.19 51,009 91 | 25: 1.19 51,112 92 | EE: 1.21 51,943 93 | D6: 1.22 52,020 94 | 9C: 1.23 52,490 95 | 98: 1.23 52,572 96 | 1F: 1.23 52,628 97 | E1: 1.24 53,155 98 | BC: 1.26 53,987 99 | B4: 1.26 53,988 100 | AC: 1.27 54,151 101 | 32: 1.31 55,835 102 | DD: 1.31 55,945 103 | 12: 1.32 56,399 104 | 16: 1.32 56,436 105 | A0: 1.33 57,029 106 | 4B: 1.34 57,360 107 | 7F: 1.36 58,029 108 | 77: 1.36 58,106 109 | C5: 1.36 58,143 110 | FA: 1.41 60,231 111 | D1: 1.42 60,745 112 | 6C: 1.44 61,593 113 | 76: 1.44 61,615 114 | 29: 1.44 61,619 115 | B7: 1.44 61,632 116 | F2: 1.45 62,181 117 | 60: 1.46 62,257 118 | CA: 1.47 62,864 119 | 94: 1.5 64,258 120 | A3: 1.51 64,695 121 | F5: 1.53 65,573 122 | 61: 1.57 67,332 123 | 4F: 1.58 67,537 124 | 8C: 1.59 67,898 125 | DB: 1.62 69,282 126 | 43: 1.64 70,303 127 | FB: 1.65 70,491 128 | 86: 1.66 71,045 129 | 95: 1.68 71,715 130 | CB: 1.68 71,731 131 | A4: 1.7 72,641 132 | CF: 1.7 72,704 133 | 70: 1.72 73,503 134 | 3C: 1.74 74,314 135 | D4: 1.74 74,454 136 | 0E: 1.75 74,688 137 | 73: 1.77 75,665 138 | 34: 1.77 75,681 139 | F7: 1.84 78,791 140 | 49: 1.87 80,040 141 | DC: 1.89 80,797 142 | 39: 1.89 80,939 143 | B0: 1.93 82,681 144 | FD: 1.93 82,722 145 | 4A: 1.96 83,822 146 | A8: 1.96 83,964 147 | B9: 1.97 84,370 148 | 78: 1.98 84,725 149 | 2C: 1.99 85,010 150 | 72: 2.04 87,323 151 | F9: 2.07 88,431 152 | 0B: 2.08 89,031 153 | 38: 2.1 89,690 154 | A1: 2.1 89,995 155 | 0A: 2.14 91,468 156 | 2B: 2.15 91,911 157 | E4: 2.22 94,965 158 | F1: 2.28 97,371 159 | 8A: 2.29 97,866 160 | 15: 2.38 101,801 161 | 7C: 2.46 105,287 162 | 09: 2.47 105,508 163 | D2: 2.49 106,572 164 | 81: 2.64 112,974 165 | CE: 2.67 114,440 166 | 4E: 2.79 119,437 167 | 47: 2.8 119,785 168 | 7E: 2.84 121,331 169 | B6: 2.85 121,888 170 | 5B: 2.97 126,861 171 | 5C: 2.98 127,562 172 | 58: 3.0 128,223 173 | 30: 3.04 129,885 174 | 07: 3.06 130,914 175 | 42: 3.18 135,905 176 | 88: 3.2 136,842 177 | F6: 3.22 137,585 178 | 11: 3.29 140,552 179 | 59: 3.31 141,560 180 | 4C: 3.36 143,873 181 | 66: 3.38 144,548 182 | E0: 3.42 146,354 183 | 0D: 3.48 148,690 184 | 20: 3.48 148,923 185 | 1C: 3.54 151,633 186 | C9: 3.6 154,032 187 | 53: 3.61 154,285 188 | 54: 3.63 155,118 189 | 7D: 3.63 155,400 190 | B8: 3.76 161,066 191 | D0: 3.83 163,780 192 | 48: 3.86 165,315 193 | 5F: 3.88 165,786 194 | 06: 3.91 167,093 195 | 90: 3.96 169,354 196 | 84: 3.99 170,758 197 | 80: 4.06 173,708 198 | D8: 4.06 173,880 199 | 41: 4.18 179,039 200 | 52: 4.21 180,055 201 | E5: 4.27 182,475 202 | 05: 4.39 187,708 203 | 18: 4.57 195,498 204 | 57: 4.76 203,601 205 | 40: 4.77 203,986 206 | FE: 4.78 204,384 207 | F4: 4.78 204,510 208 | 28: 4.89 209,122 209 | 64: 4.98 213,210 210 | F3: 5.24 224,115 211 | C8: 5.26 224,953 212 | 44: 5.44 232,647 213 | C6: 5.47 234,028 214 | 46: 5.51 235,856 215 | 3B: 5.6 239,587 216 | F0: 5.66 242,290 217 | EB: 5.87 251,141 218 | 03: 6.09 260,465 219 | 14: 6.14 262,656 220 | 33: 6.16 263,457 221 | 5E: 6.23 266,513 222 | 68: 6.29 268,982 223 | C2: 7.06 302,003 224 | F8: 7.19 307,625 225 | 6A: 7.23 309,334 226 | 56: 7.25 310,074 227 | D9: 7.5 320,944 228 | C1: 7.59 324,833 229 | 51: 7.89 337,415 230 | 02: 8.63 369,018 231 | C3: 8.63 369,273 232 | C4: 8.72 373,235 233 | 75: 9.63 411,852 234 | E9: 10.5 448,747 235 | C0: 10.8 461,664 236 | 5D: 11.2 477,353 237 | 85: 11.3 481,442 238 | C7: 11.3 483,738 239 | EC: 11.6 495,916 240 | 10: 12.1 516,535 241 | 74: 12.1 517,451 242 | FC: 12.3 525,462 243 | 55: 13.0 556,974 244 | 0C: 13.1 560,789 245 | 50: 13.2 564,154 246 | 24: 16.7 714,384 247 | 4D: 17.0 727,035 248 | 8D: 18.7 798,872 249 | 83: 20.5 876,370 250 | 04: 20.7 887,105 251 | 89: 22.0 942,295 252 | 08: 22.2 951,362 253 | 45: 22.3 951,999 254 | 0F: 22.5 962,164 255 | E8: 26.9 1,149,816 256 | 01: 27.4 1,173,520 257 | FF: 55.1 2,359,115 258 | CC: 73.3 3,137,643 259 | 8B: 76.3 3,265,159 260 | 00: 1e+02 4,278,311 261 | -------------------------------------------------------------------------------- /Byte frequency/64bit.txt: -------------------------------------------------------------------------------- 1 | 2 | Largest byte count: 15,464,905 3 | 4 | Byte - Percent - Count 5 | A6: 0.386 59,704 6 | 9A: 0.391 60,501 7 | A2: 0.392 60,641 8 | 9B: 0.4 61,903 9 | AE: 0.425 65,747 10 | A3: 0.427 66,033 11 | 6A: 0.429 66,326 12 | B2: 0.44 68,077 13 | 96: 0.447 69,100 14 | 62: 0.449 69,423 15 | 9E: 0.456 70,592 16 | A1: 0.457 70,708 17 | AD: 0.467 72,262 18 | A9: 0.472 72,924 19 | 61: 0.475 73,523 20 | A5: 0.482 74,525 21 | 99: 0.492 76,142 22 | 92: 0.517 79,982 23 | AB: 0.529 81,771 24 | 93: 0.551 85,212 25 | E5: 0.553 85,574 26 | 3A: 0.556 85,935 27 | 71: 0.558 86,335 28 | 7A: 0.561 86,787 29 | 36: 0.562 86,902 30 | A4: 0.567 87,680 31 | B5: 0.57 88,161 32 | 91: 0.572 88,496 33 | B3: 0.572 88,517 34 | 9D: 0.578 89,338 35 | 26: 0.586 90,693 36 | 31: 0.602 93,116 37 | 37: 0.613 94,819 38 | EE: 0.614 94,918 39 | DD: 0.617 95,407 40 | B1: 0.618 95,499 41 | 69: 0.623 96,331 42 | 9F: 0.626 96,887 43 | 2D: 0.627 96,950 44 | A7: 0.634 98,005 45 | F5: 0.642 99,285 46 | 67: 0.659 101,891 47 | AC: 0.659 101,955 48 | 3E: 0.671 103,845 49 | 6D: 0.673 104,041 50 | 2E: 0.681 105,253 51 | DC: 0.683 105,582 52 | 21: 0.687 106,202 53 | E6: 0.696 107,571 54 | 6E: 0.701 108,457 55 | 52: 0.706 109,257 56 | BD: 0.708 109,503 57 | 22: 0.719 111,214 58 | 35: 0.722 111,717 59 | 1A: 0.726 112,317 60 | 5A: 0.732 113,250 61 | BB: 0.735 113,721 62 | F4: 0.743 114,976 63 | 8E: 0.744 114,988 64 | DE: 0.751 116,196 65 | 97: 0.752 116,322 66 | 27: 0.76 117,536 67 | 1E: 0.765 118,255 68 | 2A: 0.766 118,475 69 | E7: 0.77 119,042 70 | D5: 0.789 121,960 71 | BE: 0.799 123,512 72 | AF: 0.805 124,438 73 | 82: 0.806 124,691 74 | 94: 0.807 124,763 75 | BC: 0.808 124,912 76 | 16: 0.814 125,950 77 | B4: 0.819 126,604 78 | E2: 0.825 127,541 79 | D4: 0.825 127,583 80 | AA: 0.834 129,013 81 | E4: 0.839 129,813 82 | ED: 0.859 132,843 83 | 79: 0.861 133,184 84 | 32: 0.887 137,248 85 | EA: 0.892 137,956 86 | 8A: 0.894 138,251 87 | 86: 0.896 138,613 88 | EF: 0.92 142,266 89 | 76: 0.94 145,306 90 | 2F: 0.943 145,899 91 | 12: 0.949 146,799 92 | 2C: 0.955 147,738 93 | E3: 0.956 147,824 94 | 51: 0.958 148,154 95 | 64: 0.962 148,803 96 | 19: 0.968 149,706 97 | BF: 0.97 150,069 98 | 6B: 0.971 150,135 99 | CD: 0.979 151,429 100 | 1D: 0.983 151,977 101 | 34: 0.994 153,664 102 | 65: 0.997 154,151 103 | 13: 1.01 155,535 104 | 8F: 1.03 158,833 105 | 25: 1.05 163,087 106 | F1: 1.06 163,204 107 | DF: 1.06 163,400 108 | 6F: 1.06 164,472 109 | 17: 1.08 166,548 110 | 23: 1.08 167,707 111 | 3F: 1.09 169,081 112 | 4A: 1.09 169,122 113 | 95: 1.1 169,913 114 | DA: 1.13 175,141 115 | 3C: 1.14 176,332 116 | 1B: 1.14 177,047 117 | E1: 1.15 178,015 118 | 9C: 1.16 179,263 119 | 3D: 1.16 179,827 120 | 77: 1.16 180,002 121 | C5: 1.18 183,220 122 | B7: 1.21 187,104 123 | 7E: 1.22 188,831 124 | D6: 1.25 193,600 125 | 7B: 1.26 194,447 126 | 98: 1.28 197,804 127 | 87: 1.31 202,135 128 | 0E: 1.31 202,819 129 | A8: 1.33 206,213 130 | 1C: 1.34 207,711 131 | D9: 1.38 214,093 132 | 8C: 1.4 216,024 133 | FB: 1.43 221,349 134 | 4E: 1.45 224,615 135 | CA: 1.47 227,566 136 | D1: 1.53 237,376 137 | 09: 1.54 238,845 138 | 73: 1.57 242,678 139 | A0: 1.61 248,509 140 | 63: 1.64 252,873 141 | D3: 1.65 254,586 142 | DB: 1.66 255,963 143 | F7: 1.67 258,714 144 | 7D: 1.68 259,437 145 | 4F: 1.68 260,087 146 | D7: 1.71 265,130 147 | FC: 1.74 269,210 148 | 72: 1.75 270,535 149 | FA: 1.75 270,585 150 | 14: 1.77 273,109 151 | 0A: 1.79 276,904 152 | F9: 1.8 277,813 153 | 53: 1.8 278,009 154 | 56: 1.81 280,002 155 | F2: 1.83 283,093 156 | 29: 1.86 286,897 157 | 5E: 1.86 288,239 158 | 0B: 1.89 291,748 159 | FD: 1.9 294,226 160 | 5B: 1.91 294,784 161 | B6: 1.95 301,794 162 | BA: 2.02 312,980 163 | B9: 2.03 313,672 164 | 6C: 2.05 317,527 165 | 46: 2.1 324,404 166 | 42: 2.11 325,877 167 | 5D: 2.13 329,947 168 | F6: 2.16 333,586 169 | B0: 2.16 334,515 170 | 7F: 2.18 336,381 171 | 2B: 2.18 337,235 172 | 39: 2.21 341,618 173 | CE: 2.24 346,847 174 | 68: 2.28 353,083 175 | 78: 2.31 356,665 176 | D2: 2.35 363,279 177 | CF: 2.5 386,567 178 | 7C: 2.51 388,546 179 | C2: 2.57 397,005 180 | 1F: 2.65 409,822 181 | 47: 2.67 412,928 182 | 4B: 2.69 416,404 183 | 70: 2.7 418,264 184 | E0: 2.71 418,841 185 | 07: 2.72 419,966 186 | D8: 2.76 426,355 187 | 06: 2.83 438,131 188 | 0C: 2.85 441,108 189 | 59: 2.93 452,630 190 | 81: 2.96 457,481 191 | 54: 2.99 462,497 192 | CB: 3.03 468,289 193 | 5F: 3.08 476,707 194 | EC: 3.11 481,363 195 | 15: 3.13 484,301 196 | B8: 3.14 486,170 197 | 88: 3.18 492,315 198 | 55: 3.22 497,856 199 | 60: 3.32 513,385 200 | F0: 3.43 529,948 201 | EB: 3.46 534,995 202 | 57: 3.58 553,922 203 | D0: 3.61 557,810 204 | E9: 3.79 586,091 205 | FE: 3.82 590,153 206 | 0D: 3.99 617,323 207 | C9: 4.08 631,687 208 | C6: 4.12 637,097 209 | 11: 4.14 639,973 210 | 80: 4.24 655,556 211 | 58: 4.25 657,483 212 | C4: 4.3 665,571 213 | 3B: 4.33 669,964 214 | F8: 4.34 671,067 215 | 66: 4.5 696,228 216 | 18: 4.74 732,378 217 | C8: 4.77 737,063 218 | 38: 4.91 758,774 219 | 90: 5.21 805,303 220 | 5C: 5.26 813,574 221 | 84: 5.33 823,839 222 | 33: 5.34 825,857 223 | 43: 5.64 872,960 224 | C1: 6.08 940,658 225 | 75: 6.09 941,661 226 | 50: 6.28 970,445 227 | C3: 6.46 999,274 228 | 4D: 6.47 1,000,960 229 | 30: 6.5 1,005,124 230 | F3: 6.85 1,058,650 231 | 04: 7.08 1,095,163 232 | C7: 7.55 1,167,534 233 | 02: 8.06 1,246,660 234 | 28: 8.17 1,264,030 235 | 05: 8.43 1,304,210 236 | 03: 8.54 1,320,763 237 | 40: 8.86 1,370,603 238 | 85: 9.73 1,504,206 239 | 45: 10.3 1,585,285 240 | 20: 10.6 1,632,210 241 | C0: 10.7 1,661,628 242 | 74: 11.2 1,737,576 243 | 08: 11.8 1,821,748 244 | 10: 13.0 2,003,234 245 | 44: 13.4 2,069,068 246 | 49: 13.8 2,127,233 247 | 83: 14.3 2,205,932 248 | 41: 15.8 2,443,053 249 | E8: 17.1 2,643,922 250 | 01: 18.5 2,855,602 251 | 4C: 18.8 2,910,679 252 | FF: 24.2 3,750,053 253 | 8D: 24.8 3,842,065 254 | 89: 26.3 4,074,751 255 | 24: 27.4 4,230,122 256 | 0F: 32.6 5,042,621 257 | CC: 34.5 5,336,078 258 | 8B: 56.7 8,767,032 259 | 00: 93.7 14,489,657 260 | 48: 1e+02 15,464,905 261 | -------------------------------------------------------------------------------- /Byte frequency/byte_frequency_tabulate.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | Tabulate JSON byte frequency files previously generated using 'ida_get_byte_frequency.py' within IDA Pro 4 | 5 | Args: 6 | One or more .json input files. 7 | """ 8 | import sys 9 | import json 10 | from pprint import pprint 11 | 12 | assert len(sys.argv) >= 2, "Expected at least one byte frequency .json file input" 13 | 14 | # Combine the byte frequency dictionaries into one 15 | tabulated = {} 16 | for i in range(1, len(sys.argv)): 17 | path = sys.argv[i] 18 | print(f'Appending: {path}') 19 | with open(path) as fp: 20 | tmp = json.load(fp) 21 | #pprint(tmp) 22 | for byte, count in tmp.items(): 23 | tabulated[byte] = tabulated.get(byte, 0) + count 24 | #pprint(tabulated) 25 | 26 | # Convert dictionary to a sorted list gathering the max count in the process: [byte, count] 27 | largest = 0 28 | byte_list = [] 29 | for byte, count in tabulated.items(): 30 | byte_list.append([int(byte), count]) 31 | if count > largest: 32 | largest = count 33 | print(f'\nLargest byte count: {largest:,}') 34 | byte_list = sorted(byte_list, key=lambda e: e[1]) 35 | #pprint(byte_list) 36 | 37 | # Dump the values 38 | print('Byte - Percent - Count') 39 | for e in byte_list: 40 | print(f'{e[0]:02X}: {((e[1] / largest) * 100.0):0.4} {e[1]:,}') 41 | 42 | 43 | # Show byte frequency bar graph 44 | """ 45 | import matplotlib.pyplot as plt 46 | 47 | byte_list = sorted(byte_list, key=lambda e: e[0]) 48 | #x = [x[0] for x in byte_list] 49 | x = [f'{x[0]:02X}' for x in byte_list] 50 | y = [x[1] / largest for x in byte_list] 51 | 52 | fig, axs = plt.subplots(1, 1, figsize=(34, 13), tight_layout=True) 53 | axs.bar(x, y) 54 | 55 | plt.title('Code byte frequency', fontsize=20) 56 | plt.xlabel('Code Byte', fontsize=18) 57 | plt.ylabel('Count Ratio', fontsize=18) 58 | plt.grid(axis='y', alpha=0.5) 59 | plt.xticks(fontsize=6.5) 60 | plt.xlim(axs.patches[0].get_x()-1, axs.patches[-1].get_x()+1) 61 | #plt.savefig('byte_frequency.png') 62 | plt.show() 63 | """ -------------------------------------------------------------------------------- /Byte frequency/ida_get_byte_frequency.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | IDA Pro script to gather byte frequency table for code sections in the loaded IDB 4 | saving the data to a JSON file. 5 | """ 6 | import json 7 | import ctypes 8 | from pprint import pprint 9 | from idaapi import * 10 | import ida_kernwin as kernwin 11 | 12 | # Import: void QCoreApplication::processEvents(QEventLoop::ProcessEventsFlag = AllEvents) 13 | # https://doc.qt.io/qt-5/qcoreapplication.html#processEvents 14 | processEventsPtr = None 15 | if kernwin.is_idaq(): 16 | # If IDA Windows 17 | if os.name == 'nt': 18 | qtcore = ctypes.CDLL('Qt5Core') 19 | if qtcore: 20 | processEventsPtr = qtcore['?processEvents@QCoreApplication@QT@@SAXV?$QFlags@W4ProcessEventsFlag@QEventLoop@QT@@@2@@Z'] 21 | 22 | # Call QT processEvents() to trigger the IDA output window to update immediately 23 | def refresh(): 24 | if processEventsPtr: 25 | processEventsPtr(ctypes.c_uint(0)) 26 | 27 | 28 | # Ask user for save file name 29 | save_path = ask_file(True, "*.json", "Script: Select the byte frequency JSON save file:") 30 | if save_path: 31 | frequency = {} 32 | 33 | # Walk all code segments.. 34 | for n in range(get_segm_qty()): 35 | seg = getnseg(n) 36 | if seg.type == SEG_CODE: 37 | print(f'Script: Walking: "{get_segm_name(seg)}" {seg.start_ea:014X} - {seg.end_ea:014X}') 38 | refresh() 39 | 40 | # Add all code bytes.. 41 | ea = seg.start_ea 42 | while ea < seg.end_ea: 43 | # For bytes that don't exist in the IDB for a given address this get_db_byte() return 0xFF 44 | byte = get_db_byte(ea) 45 | frequency[byte] = frequency.get(byte, 0) + 1 46 | ea += 1 47 | 48 | #pprint(frequency) 49 | 50 | # Save the byte frequency table 51 | print(f'Script: Saving to "{save_path}"..') 52 | refresh() 53 | with open(save_path, "w") as fp: 54 | json.dump(frequency, fp, indent=2) 55 | print("Script: Done.") 56 | else: 57 | print("Script: Aborted.") 58 | refresh() 59 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Kevin Weatherman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Main.cpp: -------------------------------------------------------------------------------- 1 |  2 | // Plugin main 3 | #include "SigMaker.h" 4 | 5 | // UI "actions" 6 | enum SIG_ACTION 7 | { 8 | CREATE_FUNCTION_SIG, 9 | CREATE_ADDRESS_SIG, 10 | CREATE_RANGE_SIG 11 | }; 12 | 13 | // Global settings instance 14 | SETTINGS settings; 15 | 16 | static void idaapi OnRepoLink(int button_code, form_actions_t& fa) { open_url("https://github.com/kweatherman/sigmakerex"); } 17 | static void idaapi OnOptionButton(int button_code, form_actions_t& fa) 18 | { 19 | try 20 | { 21 | const char optionsDialog[] = 22 | { 23 | "SigMakerEx Options\n\n" 24 | 25 | // Output format dropdown 26 | "<#Signature output style.#Output format:b:0:100:>\n" 27 | // Mask byte for the inline wildcard style 28 | "<#Mask/wildcard byte for the \"inline\" BYTE output format.#Mask byte (0xAE default):N:0:4:>\n" 29 | 30 | // Function criteria dropdown 31 | "<#Function signature generation criteria option.#Function sigs:b:0:100:>\n" 32 | 33 | // Output level dropdown 34 | "<#IDA output message level.#Message level:b:0:100:>\n" 35 | 36 | "<#Maximum function refs to scan when a function is not unique.#Max function scan refs (0 for unlimited):D:0:4:>\n" 37 | "<#Maximum function entry point signature bytes. When this limit is hit, will attempt to find a cross-ref signature instead.#Max function entry point signature bytes (0 for unlimited):D:0:4:>\n" 38 | " \n" 39 | }; 40 | 41 | settings.Validate(); 42 | 43 | qstrvec_t outputFormatArray; 44 | outputFormatArray.push_back("IDA (Default)"); 45 | outputFormatArray.push_back("IDA (With single '?' wildcards)"); 46 | outputFormatArray.push_back("Code style"); 47 | outputFormatArray.push_back("Inline byte"); 48 | 49 | qstrvec_t funcCriteriaArray; 50 | funcCriteriaArray.push_back("Entry Point (Default)"); 51 | funcCriteriaArray.push_back("Minimal byte size"); 52 | funcCriteriaArray.push_back("Full function body"); 53 | 54 | qstrvec_t outputLevelArray; 55 | outputLevelArray.push_back("Terse (Default)"); 56 | outputLevelArray.push_back("Verbose"); 57 | 58 | UINT64 maxRefCount64 = (UINT64) settings.maxScanRefCount; 59 | UINT64 maxEntryPointBytes64 = (UINT64) settings.maxEntryPointBytes; 60 | ea_t maskByteEa = (ea_t) settings.maskByte; 61 | 62 | int result = ask_form(optionsDialog, &outputFormatArray,&settings.outputFormat, &maskByteEa, &funcCriteriaArray,&settings.funcCriteria, &outputLevelArray,&settings.outputLevel, &maxRefCount64, &maxEntryPointBytes64); 63 | if (result > 0) 64 | { 65 | settings.maxScanRefCount = (UINT32) min(maxRefCount64, UINT_MAX); 66 | settings.maxEntryPointBytes = (UINT32) min(maxEntryPointBytes64, UINT_MAX); 67 | settings.maskByte = (BYTE) min(maskByteEa, 0xFF); 68 | settings.Save(); 69 | } 70 | } 71 | catch (std::exception &ex) 72 | { 73 | msg(MSG_TAG "** C++ exception: OnOptionButton(): \"%s\" ***\n", ex.what()); 74 | } 75 | catch (...) 76 | { 77 | msg(MSG_TAG "** Gerneral C exception: OnOptionButton() ***\n"); 78 | } 79 | } 80 | 81 | static bool idaapi run(size_t arg) 82 | { 83 | // To facilitate passing action options via "plugins.cfg" hotkeys 84 | WORD action = (WORD) arg; 85 | try 86 | { 87 | if (action == 0) 88 | { 89 | const char mainDialog[] = 90 | { 91 | "STARTITEM 1\n" 92 | "BUTTON YES* Continue\n" 93 | 94 | // ---------------- Help ---------------- 95 | "HELP\n" 96 | "SigMakerEx Plugin:\n" 97 | "IDA Pro signature creation tool.\n" 98 | "Copyright\xC2\xA9 2022 Kevin Weatherman. Released under the MIT License.\n" 99 | 100 | "\n" 101 | "Create signature operations:\n" 102 | "1. \"Function\": Used to create a unique function entry point, a minimal function signature w/offset, or a whole-body signature depending on the \"Options\" config (see below).\n" 103 | "First select any address inside the target function.\n" 104 | "If the selected function is not unique (for the entry point, or the minimal option) then a signature for a unique function cross reference scan will be attempted.\n" 105 | "Typical use cases: Signatures to locate functions at run time in target memory, to locate functions in IDA after executable updates, or to help locate known libraries by signature, etc.\n\n" 106 | 107 | "2) \"At address\": Attempts to find a unique signature at the selected address.\n" 108 | "Typical use case: For locating a particular offset at runtime to hook, or making Cheat Engine script signatures for this purpose, etc.\n\n" 109 | 110 | "3. \"From address range\": Generates a signature from the selected address range, not checking for uniqueness.\n" 111 | "Special use case for when one of the other actions won't work.\n\n" 112 | 113 | "Signature results are pushed to the Windows clipboard for easy CTRL+V pasting into source code, etc.\n" 114 | 115 | "\n" 116 | "Options: (via the \"Options\" button)\n" 117 | "Output format:\n" 118 | "\"IDA\": The default hex binary search format that IDA and some other tools support, using spaced hex bytes and \"??\" wildcards.\n" 119 | "Example: \"C1 6C E8 ?? ?? ?? ?? 8B 50 08\"\n" 120 | "\"Code style\": Escape coded hex string and a separate mask string where 'x' are keeper bytes, and '?' are wildcard bytes.\n" 121 | "Example: \"\\xC1\\x6C\\xE8\\xCC\\xCC\\xCC\\xCC\\x8B\\x50\\x08\", \"xxx????xxx\"\n" 122 | "\"Inline byte\": A minimalist C style array of bytes with wildcard bytes included format.\n" 123 | "Example: \"{0xC1,0x6C,0xE8,0xAE,0xAE,0xAE,0xAE,0x8B,0x50,0x08};\"\n" 124 | "Use the \"mask byte\" edit box to change the default \"Inline byte\" mask byte.\n\n" 125 | 126 | "Function sigs:\n" 127 | "The criteria for \"Function\" signature generation.\n" 128 | "\"Entry point\": Will attempt to generate a minimal byte sized function entry point signature when possible.\n" 129 | "\"Minimal byte size\": Will attempt to generate a minimal, with least wildcards count, byte sized (five are greater) instruction boundary aligned signature inside of the selected function body.\n" 130 | "\"Full function body\": Will attempt to generate a unique full function body signature.\n\n" 131 | 132 | "For any of these three options, if the function is not unique, an attempt will be made to locate the smallest unique cross reference signature instead.\n" 133 | "If you wish to make a full or partial function signature for a non-unique function then use the \"From address range\" option instead.\n\n" 134 | 135 | "\"Message level\": Set to \"Verbose\" for internal signature generation message output to the IDA log window.\n\n" 136 | 137 | "\"Max function scan refs\": Limit how many function cross references to search when a direct \"Function\" action signature can't be found.\n" 138 | "Normally this should be '0' for unlimited search, but for problem cases where there are so many references that causes a slowdown, this can be set to some reasonable limit like 16 or 100.\n\n" 139 | 140 | "\"Max function entry point signature bytes\": When using the \"Function\" option, and the \"Entry point\" criteria is configured, optionally limit the maximum entry point signature byte size. The default is '0', for unlimited (which can be up to the entire selected function body byte size).\n" 141 | "If this limit is exceeded, an xref signature will be looked for instead.\n\n" 142 | 143 | "For the relatively rare case of functions that have their chunks spread over multiple address ranges, the tool will attempt to use just the first chunk.\n" 144 | "If wishing to make a signature in one of the disjointed chunks, try using the \"At address\" method. If all else fails, try a \"From address range\" sig (might take some manual searching for uniqueness).\n" 145 | 146 | "\n" 147 | "Credits:\n" 148 | "Thanks to the creator of the original SigMaker tool back from the gamedeception.net days up to the current C/C++ and Python iteration authors.\n" 149 | "P4TR!CK, bobbysing, xero|hawk, ajkhoury, and zoomgod et al.\n" 150 | "Thanks to Wojciech Mula for his SIMD programming resources.\n\n" 151 | 152 | "See the SigMakerEx READ.ME for more help and details.\n" 153 | "ENDHELP\n" 154 | // -------------------------------------- 155 | 156 | // Dialog title 157 | "SigMakerEx\n\n" 158 | 159 | // Message text 160 | "SigMakerEx %q \t\n" 161 | 162 | "<#Click to open SigMakerEx repo page.#SigmakerEx Github:k::>\n\n" 163 | 164 | "Create signature:\n" 165 | "<#Attempt to create a unique function signature for selected address at or inside the function.#Function:R>\n" 166 | "<#Attempt to create a unique signature at selected address.#At address:R>\n" 167 | "<#Create a raw signature for selected adress range, unique or not.#From address range \t:R>>\n\n" 168 | 169 | "<#Options:B::>\n" 170 | " \n" 171 | }; 172 | 173 | static WORD lastAction = CREATE_FUNCTION_SIG; 174 | qstring version, tmp; 175 | version.sprnt("v%s, built %s.", GetVersionString(MY_VERSION, tmp).c_str(), __DATE__); 176 | 177 | int result = ask_form(mainDialog, &version, OnRepoLink, &lastAction, OnOptionButton); 178 | if (result <= 0) 179 | return true; 180 | else 181 | action = lastAction; 182 | } 183 | else 184 | action -= 1; 185 | 186 | switch ((SIG_ACTION) action) 187 | { 188 | // Attempt to create an ideal function signature 189 | case CREATE_FUNCTION_SIG: 190 | CreateFunctionSig(); 191 | break; 192 | 193 | // Attempt to create a signature for a selected address 194 | case CREATE_ADDRESS_SIG: 195 | CreateAddressSig(); 196 | break; 197 | 198 | // Create a raw signature for a selected address range, unique or not 199 | case CREATE_RANGE_SIG: 200 | CreateAddressRangeSig(); 201 | break; 202 | }; 203 | } 204 | catch (std::exception &ex) 205 | { 206 | msg(MSG_TAG "** C++ exception: run(): \"%s\" ***\n", ex.what()); 207 | } 208 | catch (...) 209 | { 210 | // Note: Need to set the /EHa to catch SEH exceptions too 211 | msg(MSG_TAG "** Gerneral C exception: run() ***\n"); 212 | } 213 | return true; 214 | } 215 | 216 | static plugmod_t* idaapi init() 217 | { 218 | settings.Load(); 219 | return PLUGIN_OK; 220 | } 221 | 222 | void idaapi term() 223 | { 224 | SearchCleanup(); 225 | } 226 | 227 | __declspec(dllexport) plugin_t PLUGIN = 228 | { 229 | IDP_INTERFACE_VERSION, 230 | PLUGIN_PROC, 231 | init, 232 | term, 233 | run, 234 | "Signature creation tool.", 235 | "SigMakerEx plugin", 236 | "SigMakerEx", 237 | "Ctrl-Alt-S" 238 | }; 239 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## SigMakerEx 2 | 3 | Enhanced IDA Pro signature generator plugin. 4 | Repo [Github](https://github.com/kweatherman/sigmakerex) 5 | 6 | ### Installation 7 | 8 | Copy `IDA_SigMaker.dll` and to your IDA "plugins" directory. 9 | 10 | The default IDA hot key is "Ctrl-Alt-S", but can be set to another using key your IDA "plugins.cfg". 11 | Since "Ctrl-Alt-S" now combo conflicts with an IDA default, to avoid getting warning messages, edit your "idagui.cfg" and make the "StackTrace" entry like: `"StackTrace" = 0 // "Ctrl-Alt-S" // open stack trace window` (the '0' disables the key). 12 | 13 | Requires IDA Pro version 7.6'ish. 14 | 15 | ### Using 16 | Invoke the plugin via its hotkey or via the IDA Edit/Plugin menu. 17 | 18 | ![main](/images/main.png) 19 | 20 | There are three signature generation operations: 21 | 1. **Function**: Used to create a unique function entry point, a minimal function signature w/offset, or a whole-body signature depending on the *Options* config (see below). 22 | 23 | First select any address inside the target function. 24 | If the selected function is not unique (for the entry point, or the minimal option) then a signature for a unique function cross-reference scan will be attempted. 25 | 26 | Typical use cases: Signatures to locate functions at run time in target memory, to locate functions in IDA after executable updates, or to help locate known libraries by signature, etc. 27 | 28 | 29 | 2) **At address**: Attempts to find a unique signature at the selected address. 30 | Typical use case: For locating a particular offset at runtime to hook, or making [Cheat Engine](https://www.cheatengine.org/) script signatures for this purpose, etc. 31 | 32 | 3. **From address range**: Generates a signature from the selected address range, not checking for uniqueness. 33 | Special use case for when one of the other actions won't work. Like wanting to ignore the uniqueness of a signature, etc. 34 | 35 | Example signature output: 36 | ![minimal_func_example](/images/minimal_func_example.png) 37 | 38 | Signature results are pushed to the Windows clipboard for easy CTRL+V pasting into source code, etc. 39 | 40 | ##### Options 41 | 42 | ![options](/images/options.png) 43 | 44 | **Output format:** 45 | **IDA**: The default hex binary search format that IDA and some other tools support, using spaced hex bytes and "??" wildcards. 46 | Example: `C1 6C E8 ?? ?? ?? ?? 8B 50 08` 47 | **Code style**: Escape coded hex string and a separate mask string where 'x' are keeper bytes, and '?' are wildcard bytes. 48 | Example: `"\xC1\x6C\xE8\xCC\xCC\xCC\xCC\x8B\x50\x08", "xxx????xxx"` 49 | **Inline byte**: A minimalist C style array of bytes with wildcard bytes included format. 50 | Example: `{0xC1,0x6C,0xE8,0xAE,0xAE,0xAE,0xAE,0x8B,0x50,0x08};` 51 | Use the "mask byte" edit box to change the default "Inline byte" mask byte. 52 | The default mask byte is `0xAE`, one of the least used code bytes (see "Ideal mask byte" below). 53 | 54 | ##### Function sigs: 55 | 56 | The criteria for "Function" signature generation. 57 | **Entry point**: Will attempt to generate a minimal byte sized function entry point signature when possible. 58 | **Minimal byte size**: Will attempt to generate a minimal, with least wildcards count, byte sized (five are greater) instruction boundary aligned signature inside of the selected function body. 59 | **Full function body**: Will attempt to generate a unique full function body signature. 60 | 61 | For any of these three options, if the function is not unique, an attempt will be made to locate the smallest unique cross-reference signature instead. If you wish to make a full or partial function signature for a non-unique function then use the "From address range" option instead. 62 | 63 | **Message level**: Set to "Verbose" for internal signature generation message output to the IDA log window. 64 | 65 | **Max function scan refs**: Limit how many function cross-references to search when a direct "Function" action signature can't be found. Normally this should be '0' for unlimited search, but for problem cases where there are so many references that it causes a slowdown, this can be set to some reasonable limit like 16 or 100 to increase the scanning speed. 66 | 67 | For the relatively rare case of functions that have their chunks spread over multiple address ranges, the tool will attempt to use just the first chunk. If wishing to make a signature in one of the disjointed chunks, try using the "At address" method. If all else fails, try a "From address range" sig (which might take some manual searching for uniqueness). 68 | 69 | **Max function entry point signature bytes**: When using the "Function" option, and the "Entry point" criteria is configured, optionally limit the maximum entry point signature byte size. The default is '0', for unlimited (which can be up to the entire selected function body byte size) . 70 | If this limit is exceeded, a cross-reference signature will be looked for instead. 71 | 72 | Set to a practical limit like '16' or '32', for preferred typically smaller xref signatures vs potentially very large entry point signatures. 73 | 74 | ### Original SigMaker vs SigMakerEx 75 | 76 | 1) SigMakerEx ("EX") overall generates smaller and tighter function signatures by using better instruction analysis. 77 | Example: SigMaker ("SM") wildcards the operand bytes of instruction `sub esp, 90h` (as `"81 EC ?? ?? ?? ??`), throwing out the last four bytes unnecessarily. While EX sees it as an immediate value and keeps the whole `81 EC 90 00 00 00` byte sequence. 78 | 2) EX is better focused on normative function body signature use cases. 79 | For SM there is only one controllable option. It will attempt to make a unique signature at wherever address you select in the function. If it can't find one there, it will look for a unique cross-reference sig instead only. 80 | For EX, since the identified typical use case is to locate function entry points, the smallest entry point signature will be generated when the "Entry point" criteria option is configured. 81 | For when the "Minimal byte size" option is selected, it will look for the smallest and least wildcard count unique signature (of minimum five bytes) within the whole function body. 82 | 3) SM has more output criteria control over byte vs wildcard count, etc., in it's options dialog. EX assumes you want the best of both (least wildcards and smallest byte size). 83 | 4) EX omits the "conversion" and the individual "search" functionality that SM has over a preference for a simpler and less cluttered UI. 84 | 85 | For searching, since EX always emits IDA format output in addition to the selected output format signatures, use the IDA binary search "Hex" option with the IDA sig string. 86 | 4) EX is generally faster, when even doing more extensive searches, due to a technique of cloning the IDB into RAM and using an AVX2 optimized pattern scanner vs relying on the slow IDA find function for scanning. 87 | 88 | ### Ideal mask byte 89 | 90 | In my own projects for finding patterns dynamically, I prefer the "Inline byte" (for lack of a better name) format. 91 | It's the simplest, most compact, and it doesn't require a runtime transformation from an ASCII hex string. 92 | I've used this format for many projects and have yet to run into any signature collision or redundant match problems. 93 | 94 | To minimize potential redundancy issues, it's prudent to use one of the least used code byte values for the wildcard/mask byte. To find the ideal candidates, I gathered the code byte frequency from three each large 32bit and 64bit code segments, then tabulated and sorted the results. The "ida_get_byte_frequency.py" IDA script is used the gather a byte frequency dictionary and save it to a JSON DB. The "byte_frequency_tabulate.py" script tabulates and sorts in ascending order a set of these saved JSON DBs. 95 | It's apparent the byte frequency for 32bit isn't the same as the 64bit one and tabulated independently. See "32bit.txt" and "64bit.txt". 96 | In a visual correlation of the two, 0xA2 is actually the least common denominator, then followed by 0xAE. 97 | 0xAE was chosen over 0xA2 as the default mask byte since its subjectively easier to pick out in hex visually. 98 | 99 | ### Building 100 | 101 | Built using Visual Studio 2019, on Windows 10, with the only dependency being the official IDA Pro C/C++ SDK. 102 | Setup in the project file, it looks for an environment variable `_IDADIR` from which it expects to find a "idasdk/include" and a "idasdk/lib" folder where the IDA SDK is located. Not using `IDADIR` since IDA looks for it itself and can cause a conflict if you try to use more than one installed IDA version. 103 | 104 | Python 3.7'ish or better to run the "byte_frequency_tabulate.py" script. 105 | 106 | ### Credits 107 | 108 | Thanks to the creator of the original SigMaker tool back from the gamedeception.net days up to the current C/C++ and Python iteration authors: P4TR!CK, bobbysing, xero|hawk, ajkhoury, and zoomgod et al. 109 | Thanks to Wojciech Mula for his SIMD programming resources. 110 | 111 | 112 | ---- 113 | 114 | ##### License 115 | 116 | **MIT License** 117 | Copyright © 2009–present Kevin Weatherman 118 | 119 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 120 | 121 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 122 | 123 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 124 | 125 | See [MIT License](http://www.opensource.org/licenses/mit-license.php) for full details. -------------------------------------------------------------------------------- /Search.cpp: -------------------------------------------------------------------------------- 1 |  2 | // Search for binary signature pattern support 3 | #include "SigMaker.h" 4 | 5 | //#define FORCE_REF_SEARCH 6 | 7 | // Local search data container 8 | struct SearchData 9 | { 10 | // Clone IDB byte database to RAM for fast pattern scanning 11 | PBYTE buffer; 12 | size_t size; 13 | 14 | BOOL CloneIdb() 15 | { 16 | if (!buffer) 17 | { 18 | LOG_VERBOSE(__FUNCTION__ ": min_ea: 0x%llX, max_ea: 0x%llX, size: 0x%llX\n\n", (UINT64) inf_get_min_ea(), (UINT64) inf_get_max_ea(), (UINT64) (inf_get_max_ea() - inf_get_min_ea())); 19 | 20 | // Allocate page buffer to encompass the whole the IDB region 21 | size = (UINT64) (inf_get_max_ea() - inf_get_min_ea()); 22 | buffer = (PBYTE) VirtualAlloc(NULL, size + 32, (MEM_COMMIT | MEM_RESERVE), PAGE_READWRITE); 23 | if (buffer) 24 | { 25 | // Copy the IDB bytes to the buffer 26 | // Simple loop much faster than: get_qword(), get_bytes(), etc. 27 | // Note: For bytes that don't exist in the PE file, get_db_byte() will return 0xFF. 28 | ea_t currentEa = inf_get_min_ea(); 29 | PBYTE ptr = buffer; 30 | size_t count = size; 31 | 32 | do 33 | { 34 | *ptr = (BYTE) get_db_byte(currentEa); 35 | ++currentEa, ++ptr, --count; 36 | 37 | } while (count); 38 | } 39 | else 40 | msg(MSG_TAG "** Failed to allocate the clone RAM buffer of size: 0x%llX ! **\n", size); 41 | } 42 | 43 | return buffer != NULL; 44 | } 45 | 46 | void Cleanup() 47 | { 48 | if (buffer) 49 | { 50 | VirtualFree(buffer, 0, MEM_RELEASE); 51 | buffer = NULL; 52 | } 53 | } 54 | 55 | // Most post 2013 Intel and 2015 AMD CPUs have "Advanced Vector Extensions 2" (AVX2) support 56 | // 2022 86.65% https://store.steampowered.com/hwsurvey/ 57 | // https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2 58 | BOOL TestAVX2Support() 59 | { 60 | enum { EAX, EBX, ECX, EDX }; 61 | int regs[4]; 62 | 63 | // Highest Function Parameter 64 | __cpuid(regs, 0); 65 | if (regs[EAX] >= 7) 66 | { 67 | // Extended Features 68 | __cpuid(regs, 7); 69 | return (regs[EBX] & /*AVX2*/ (1 << 5)) != 0; 70 | } 71 | return FALSE; 72 | } 73 | 74 | BOOL hasAVX2; 75 | SearchData() : buffer(NULL), size(0) 76 | { 77 | hasAVX2 = TestAVX2Support(); 78 | } 79 | ~SearchData() 80 | { 81 | Cleanup(); 82 | } 83 | 84 | } static searchData; 85 | 86 | void SearchCleanup() 87 | { 88 | searchData.Cleanup(); 89 | } 90 | 91 | //------------------------------------------------------------------------------------------------- 92 | /* 93 | AVX2 pattern scanner based on Wojciech Mula's avx2_strstr_anysize() 94 | http://0x80.pl/articles/simd-strfind.html#generic-sse-avx2 95 | 96 | Rules: 97 | 1) Expects input data to be at least align 32 98 | 2) SIG must be at least 3 byte in length 99 | 3) SIG must be trimmed (the first and last of the pattern can't be a wildcard/mask) 100 | */ 101 | 102 | static inline UINT32 get_first_bit_set(UINT32 x) 103 | { 104 | // Generates a single BSF instruction 105 | unsigned long ret; 106 | _BitScanForward(&ret, x); 107 | return (UINT32) ret; 108 | } 109 | 110 | static inline UINT32 clear_leftmost_set(UINT32 value) 111 | { 112 | // Generates a single BLSR instruction 113 | return value & (value - 1); 114 | } 115 | 116 | // Like memcmp() but takes a 3rd 'mask' argument 117 | // Note: Tried optimizing, has little effect on cumulative scan speed 118 | int memcmp_mask(const BYTE *buffer1, const BYTE *buffer2, const BYTE *mask2, size_t count) 119 | { 120 | while (count--) 121 | { 122 | if (*mask2) 123 | { 124 | if (*buffer1 != *buffer2) 125 | return -1; 126 | } 127 | 128 | buffer1++, buffer2++, mask2++; 129 | }; 130 | return 0; 131 | } 132 | 133 | // Find signature pattern in memory 134 | PBYTE FindSignatureAVX2(PBYTE data, size_t size, const SIG &sig, BOOL hasWildcards) 135 | { 136 | const BYTE *pat = sig.bytes.data(); 137 | size_t patLen = sig.bytes.size(); 138 | size_t patLen1 = (patLen - 1); 139 | size_t patLen2 = (patLen - 2); 140 | 141 | // Fill 'first' and 'last' with the first and last pattern byte respectively 142 | const __m256i first = _mm256_set1_epi8(pat[0]); 143 | const __m256i last = _mm256_set1_epi8(pat[patLen1]); 144 | 145 | if(!hasWildcards) 146 | { 147 | // A little faster without wildcards 148 | 149 | // Scan 32 bytes at the time.. 150 | for (size_t i = 0; i < size; i += 32) 151 | { 152 | // Load in the next 32 bytes of input first and last 153 | // Can use align 32 bit read for first since the input is page aligned 154 | const __m256i block_first = _mm256_load_si256((const __m256i*) (data + i)); 155 | const __m256i block_last = _mm256_loadu_si256((const __m256i*) (data + i + patLen1)); 156 | 157 | // Compare first and last data to get 32byte masks 158 | const __m256i eq_first = _mm256_cmpeq_epi8(first, block_first); 159 | const __m256i eq_last = _mm256_cmpeq_epi8(last, block_last); 160 | 161 | // AND the equality masks and into a 32 bit mask 162 | UINT32 mask = _mm256_movemask_epi8(_mm256_and_si256(eq_first, eq_last)); 163 | 164 | // Do pattern compare between first and last position if we got our first and last at this data position 165 | while (mask != 0) 166 | { 167 | UINT32 bitpos = get_first_bit_set(mask); 168 | if (memcmp(data + i + bitpos + 1, pat + 1, patLen2) == 0) 169 | { 170 | return data + i + bitpos; 171 | } 172 | mask = clear_leftmost_set(mask); 173 | }; 174 | } 175 | } 176 | else 177 | { 178 | // Pattern scan with wildcards mask 179 | const BYTE *msk = sig.mask.data(); 180 | 181 | for (size_t i = 0; i < size; i += 32) 182 | { 183 | const __m256i block_first = _mm256_load_si256((const __m256i*) (data + i)); 184 | const __m256i block_last = _mm256_loadu_si256((const __m256i*) (data + i + patLen1)); 185 | 186 | const __m256i eq_first = _mm256_cmpeq_epi8(first, block_first); 187 | const __m256i eq_last = _mm256_cmpeq_epi8(last, block_last); 188 | 189 | UINT32 mask = _mm256_movemask_epi8(_mm256_and_si256(eq_first, eq_last)); 190 | 191 | // Do a byte pattern w/mask compare between first and last position if we got our first and last 192 | while (mask != 0) 193 | { 194 | UINT32 bitpos = get_first_bit_set(mask); 195 | if (memcmp_mask(data + i + bitpos + 1, pat + 1, msk + 1, patLen2) == 0) 196 | { 197 | return data + i + bitpos; 198 | } 199 | mask = clear_leftmost_set(mask); 200 | }; 201 | } 202 | } 203 | 204 | return NULL; 205 | } 206 | 207 | 208 | // ------------------------------------------------------------------------------------------------ 209 | 210 | // Find signature pattern in memory 211 | // Base memory search reference, about 10x slower than the AVX2 version 212 | PBYTE FindSignature(PBYTE input, size_t inputLen, const SIG &sig, BOOL hasWildcards) 213 | { 214 | if (!hasWildcards) 215 | { 216 | // If no wildcards, faster to use a memcmp() type 217 | const BYTE *pat = sig.bytes.data(); 218 | const BYTE *end = (input + inputLen); 219 | const BYTE first = *pat; 220 | size_t sigLen = sig.bytes.size(); 221 | 222 | // Setup last in the pattern length byte quick for rejection test 223 | size_t lastIdx = (sigLen - 1); 224 | BYTE last = pat[lastIdx]; 225 | 226 | for (PBYTE ptr = input; ptr < end; ++ptr) 227 | { 228 | if ((ptr[0] == first) && (ptr[lastIdx] == last)) 229 | { 230 | if (memcmp(ptr+1, pat+1, sigLen-2) == 0) 231 | return ptr; 232 | } 233 | } 234 | } 235 | else 236 | { 237 | const BYTE *pat = sig.bytes.data(); 238 | const BYTE *msk = sig.mask.data(); 239 | const BYTE *end = (input + inputLen); 240 | const BYTE first = *pat; 241 | size_t sigLen = sig.bytes.size(); 242 | size_t lastIdx = (sigLen - 1); 243 | BYTE last = pat[lastIdx]; 244 | 245 | for (PBYTE ptr = input; ptr < end; ++ptr) 246 | { 247 | if ((ptr[0] == first) && (ptr[lastIdx] == last)) 248 | { 249 | const BYTE *patPtr = pat+1; 250 | const BYTE *mskPtr = msk+1; 251 | const BYTE *memPtr = ptr+1; 252 | BOOL found = TRUE; 253 | 254 | for (int i = 0; (i < sigLen-2) && (memPtr < end); ++mskPtr, ++patPtr, ++memPtr, i++) 255 | { 256 | if (!*mskPtr) 257 | continue; 258 | 259 | if (*memPtr != *patPtr) 260 | { 261 | found = FALSE; 262 | break; 263 | } 264 | } 265 | 266 | if (found) 267 | return ptr; 268 | } 269 | } 270 | } 271 | 272 | return NULL; 273 | } 274 | 275 | // ------------------------------------------------------------------------------------------------ 276 | 277 | // Reference version search 278 | static SSTATUS SearchSignature(PBYTE input, size_t inputLen, const SIG &sig) 279 | { 280 | size_t sigSize = sig.bytes.size(); 281 | size_t len = inputLen; 282 | size_t count = 0; 283 | BOOL hasWildcards = sig.hasMask(); 284 | 285 | inputLen -= sigSize; 286 | 287 | // Search for signature match.. 288 | PBYTE match = FindSignature(input, len, sig, hasWildcards); 289 | while (match) 290 | { 291 | // Stop now if we've hit two matches 292 | if (++count >= 2) 293 | break; 294 | 295 | ++match; 296 | len = (inputLen - (int) (match - input)); 297 | if (len < sigSize) 298 | break; 299 | 300 | // Next search 301 | match = FindSignature(match, len, sig, hasWildcards); 302 | }; 303 | 304 | SSTATUS status; 305 | switch (count) 306 | { 307 | case 0: status = SSTATUS::NOT_FOUND; break; 308 | case 1: status = SSTATUS::UNIQUE; break; 309 | default: status = SSTATUS::NOT_UNIQUE; break; 310 | }; 311 | 312 | // Only happens when there is an error in the search algorithm during development/testing 313 | if (status == SSTATUS::NOT_FOUND) 314 | { 315 | msg("\n** " __FUNCTION__ ": Sig not found! **\n"); 316 | qstring tmp; 317 | sig.ToIdaString(tmp); 318 | msg("(%u) \"%s\"\n\n", (UINT32) sig.bytes.size(), tmp.c_str()); 319 | } 320 | 321 | return status; 322 | } 323 | 324 | // Fast AVX2 based search 325 | static SSTATUS SearchSignatureAVX2(PBYTE input, size_t inputLen, const SIG &sig) 326 | { 327 | size_t sigSize = sig.bytes.size(); 328 | size_t len = inputLen; 329 | size_t count = 0; 330 | BOOL hasWildcards = sig.hasMask(); 331 | 332 | inputLen -= sigSize; 333 | 334 | PBYTE match = FindSignatureAVX2(input, len, sig, hasWildcards); 335 | while (match) 336 | { 337 | if (++count >= 2) 338 | break; 339 | 340 | ++match; 341 | len = (inputLen - (int) (match - input)); 342 | if (len < sigSize) 343 | break; 344 | 345 | match = FindSignatureAVX2(match, len, sig, hasWildcards); 346 | }; 347 | 348 | SSTATUS status; 349 | switch (count) 350 | { 351 | case 0: status = SSTATUS::NOT_FOUND; break; 352 | case 1: status = SSTATUS::UNIQUE; break; 353 | default: status = SSTATUS::NOT_UNIQUE; break; 354 | }; 355 | 356 | // Only happens when there is an error in the search algorithm during development/testing 357 | if (status == SSTATUS::NOT_FOUND) 358 | { 359 | msg("\n** " __FUNCTION__ ": Sig not found! **\n"); 360 | qstring tmp; 361 | sig.ToIdaString(tmp); 362 | msg("(%u) \"%s\"\n\n", (UINT32) sig.bytes.size(), tmp.c_str()); 363 | } 364 | return status; 365 | } 366 | 367 | // Search for signature pattern, returning a status result 368 | SSTATUS SearchSignature(const SIG &sig) 369 | { 370 | // Setup IDB RAM clone on first scan 371 | if (!searchData.CloneIdb()) 372 | return SSTATUS::NOT_FOUND; 373 | 374 | #ifndef FORCE_REF_SEARCH 375 | if (searchData.hasAVX2) 376 | return SearchSignatureAVX2(searchData.buffer, searchData.size, sig); 377 | else 378 | #else 379 | #pragma message(__LOC2__ " ** Force use reference search switch on! **") 380 | #endif 381 | { 382 | static BOOL warnOnce = TRUE; 383 | if ((settings.outputLevel >= SETTINGS::LL_VERBOSE) && warnOnce) 384 | { 385 | warnOnce = FALSE; 386 | msg(" * Using non-AVX2 reference search *\n"); 387 | } 388 | 389 | return SearchSignature(searchData.buffer, searchData.size, sig); 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /Settings.h: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | // Settings container 5 | struct SETTINGS 6 | { 7 | #define SETTINGS_FILENAME "SigMakerEx.cfg" 8 | 9 | UINT32 version; // Plugin version 10 | 11 | // Function signature creation criteria 12 | enum FUNC_CRITERIA: int 13 | { 14 | FUNC_ENTRY_POINT, // Function entry point 15 | FUNC_MIN_SIZE, // By minimal byte size 16 | FUNC_FULL, // Sig of all function instructions (just first section, if has multiple) 17 | }; 18 | FUNC_CRITERIA funcCriteria; 19 | 20 | enum OUTPUT_FORMAT: int 21 | { 22 | OF_IDA, // IDA and others "AB 78 E8 ?? ?? ?? ?? CC" style spaced bytes with double "??" wildcard 23 | OF_IDA2, // "" but with a single '?' wildcard char 24 | OF_CODE, // Escape encoded binary with ASCII mask "code" style in two strings. 25 | // E.g. "\x33\x9A\xFA\x00\x00\x00\x00\x45\x68", "xxxxxxx????xx" 26 | OF_INLINE, // Like "code" style, but byte string with inlined bytes w/wildcard 27 | // E.g. "{0x33,0x9A,0xFA,0xAE,0xAE,0xAE,0xAE,0x45,0x68}", where 0xAE is the wildcard bytes. 28 | }; 29 | OUTPUT_FORMAT outputFormat; 30 | 31 | // IDA message output log level 32 | enum OUTPUTLEVEL: int 33 | { 34 | LL_TERSE, // Minimal/normal output 35 | LL_VERBOSE // Verbose for monitoring and troubleshooting 36 | }; 37 | OUTPUTLEVEL outputLevel; 38 | 39 | // Maximum code reference search candidates 40 | // 0 = unlimited 41 | UINT32 maxScanRefCount; 42 | 43 | // Optional maximum function entry point signature bytes 44 | // 0 = unlimited 45 | UINT32 maxEntryPointBytes; 46 | 47 | // Byte mask/wildcard byte for the "inline" output format 48 | BYTE maskByte; 49 | 50 | SETTINGS() { Default(); }; 51 | 52 | void Default() 53 | { 54 | version = MY_VERSION; 55 | funcCriteria = SETTINGS::FUNC_ENTRY_POINT; 56 | outputFormat = SETTINGS::OF_IDA; 57 | outputLevel = SETTINGS::LL_TERSE; 58 | maxScanRefCount = 0; 59 | maxEntryPointBytes = 0; 60 | maskByte = 0xAE; // Default, one of the least common code byte frequency values 61 | } 62 | 63 | template void CLAMP(T& x, T min, T max) { if (x < min) x = min; else if (x > max) x = max; } 64 | 65 | void Validate() 66 | { 67 | CLAMP(funcCriteria, SETTINGS::FUNC_ENTRY_POINT, SETTINGS::FUNC_FULL); 68 | CLAMP(outputFormat, SETTINGS::OF_IDA, SETTINGS::OF_INLINE); 69 | CLAMP(outputLevel, SETTINGS::LL_TERSE, SETTINGS::LL_VERBOSE); 70 | } 71 | 72 | void Save() 73 | { 74 | char path[MAXSTR]; 75 | qsnprintf(path, MAXSTR - 1, "%s\\%s", get_user_idadir(), SETTINGS_FILENAME); 76 | FILE *fp = qfopen(path, "wb"); 77 | if (fp) 78 | { 79 | Validate(); 80 | qfwrite(fp, this, sizeof(SETTINGS)); 81 | qfclose(fp); 82 | } 83 | } 84 | 85 | void Load() 86 | { 87 | Default(); 88 | 89 | try 90 | { 91 | char path[MAXSTR]; 92 | qsnprintf(path, MAXSTR - 1, "%s\\%s", get_user_idadir(), SETTINGS_FILENAME); 93 | FILE *fp = qfopen(path, "rb"); 94 | if (fp) 95 | { 96 | qfread(fp, this, sizeof(SETTINGS)); 97 | qfclose(fp); 98 | 99 | // If version is different other than just just the patch version, reset to the new version defaults 100 | if ((GET_VERSION_MAJOR(version) != GET_VERSION_MAJOR(MY_VERSION)) || (GET_VERSION_MINOR(version) != GET_VERSION_MINOR(MY_VERSION))) 101 | { 102 | Default(); 103 | Save(); 104 | } 105 | else 106 | Validate(); 107 | } 108 | } 109 | CATCH() 110 | } 111 | }; 112 | 113 | // Global instance 114 | extern SETTINGS settings; 115 | 116 | #define LOG_TERSE(...) { if (settings.outputLevel >= SETTINGS::LL_TERSE) msg(__VA_ARGS__); } 117 | #define LOG_VERBOSE(...) { if (settings.outputLevel >= SETTINGS::LL_VERBOSE){ msg(__VA_ARGS__); WaitBox::processIdaEvents(); } } 118 | -------------------------------------------------------------------------------- /SigMaker.h: -------------------------------------------------------------------------------- 1 | 2 | // Common and shared 3 | #pragma once 4 | 5 | #include "StdAfx.h" 6 | #include 7 | 8 | #include "Settings.h" 9 | 10 | // Minimal signature byte length 11 | static const UINT32 MIN_SIG_SIZE = 5; 12 | 13 | // Signature container 14 | struct SIG 15 | { 16 | std::vector bytes; 17 | std::vector mask; // 0xFF = keep, 0 = wildcard/skip 18 | 19 | // ------------------------------------------------------------------------ 20 | 21 | // Append one or more bytes at address to the signature 22 | void AddBytes(ea_t ea, UINT32 size) 23 | { 24 | size_t len = bytes.size(); 25 | bytes.resize(len + size); 26 | mask.resize(len + size); 27 | 28 | PBYTE bytesPtr = &bytes[len]; 29 | PBYTE maskPtr = &mask[len]; 30 | 31 | // get_db_byte() loop faster than get_bytes(), etc. 32 | while (size--) 33 | { 34 | *bytesPtr = get_db_byte(ea); 35 | *maskPtr = 0xFF; 36 | ea++, bytesPtr++, maskPtr++; 37 | }; 38 | } 39 | 40 | // Append one or more wildcards to a signature 41 | void AddWildcards(UINT32 size) 42 | { 43 | size_t len = bytes.size(); 44 | bytes.resize(len + size); 45 | mask.resize(len + size); 46 | 47 | PBYTE bytesPtr = &bytes[len]; 48 | PBYTE maskPtr = &mask[len]; 49 | 50 | while (size--) 51 | { 52 | *bytesPtr++ = 0xCC; 53 | *maskPtr++ = 0; 54 | }; 55 | } 56 | 57 | // ------------------------------------------------------------------------ 58 | 59 | // Output the sig as a "F8 66 4B ?? ?? ?? 88" format string or "F8 66 4B ? ? ? 88" 60 | void ToIdaString(__out qstring &string, BOOL singleByteWildCard = FALSE) const 61 | { 62 | size_t count = bytes.size(); 63 | if (count > 0) 64 | { 65 | if (singleByteWildCard) 66 | { 67 | string.reserve(count * SIZESTR("? ")); 68 | for (size_t i = 0; i < count; i++) 69 | { 70 | if (mask[i]) 71 | string.cat_sprnt("%02X ", bytes[i]); 72 | else 73 | string.cat_sprnt("? "); 74 | } 75 | 76 | // Remove the final ' ' space 77 | string.remove_last(); 78 | 79 | } 80 | else 81 | { 82 | string.reserve(count * SIZESTR("?? ")); 83 | for (size_t i = 0; i < count; i++) 84 | { 85 | if (mask[i]) 86 | string.cat_sprnt("%02X ", bytes[i]); 87 | else 88 | string.cat_sprnt("?? "); 89 | } 90 | 91 | // Remove the final ' ' space 92 | string.remove_last(); 93 | } 94 | } 95 | } 96 | 97 | // Convert mask to a "code" style mask string; "xxxxxxx????xxx" 98 | void ToMaskString(__out qstring &maskStr) const 99 | { 100 | int count = (int) mask.size(); 101 | maskStr.resize(count + 1); 102 | for (int i = 0; i < count; i++) 103 | { 104 | if (mask[i]) 105 | maskStr[i] = 'x'; 106 | else 107 | maskStr[i] = '?'; 108 | } 109 | } 110 | 111 | // Convert byte pattern to '\x' "code" style encoding; "\x45\xAA\xCC\xCC\xCC\x9A\xFA" 112 | void ToCodeString(__out qstring &string) const 113 | { 114 | size_t count = bytes.size(); 115 | if (count > 0) 116 | { 117 | string.reserve(count * SIZESTR("\\xCC")); 118 | for (size_t i = 0; i < count; i++) 119 | { 120 | if (mask[i]) 121 | string.cat_sprnt("\\x%02X", bytes[i]); 122 | else 123 | string += "\\xCC"; 124 | } 125 | } 126 | } 127 | 128 | // Convert signature to a "inline" byte style C string. E.g. "{0x33,0x9A,0xFA,0xAE,0xAE,0xAE,0xAE,0x45,0x68}" 129 | void ToInlineString(__out qstring &string) const 130 | { 131 | size_t count = bytes.size(); 132 | if (count > 0) 133 | { 134 | string = "const BYTE name_me[]={"; 135 | for (size_t i = 0; i < count; i++) 136 | { 137 | if (mask[i]) 138 | string.cat_sprnt("0x%02X,", bytes[i]); 139 | else 140 | string.cat_sprnt("0x%02X,", settings.maskByte); 141 | } 142 | string.remove_last(); 143 | string += "};"; 144 | } 145 | } 146 | 147 | // Right trim wildcards from signature if they exist 148 | void trim() 149 | { 150 | size_t len = 0; 151 | for (size_t i = (bytes.size() - 1); i > 0; i--) 152 | { 153 | if (!mask[i]) 154 | len++; 155 | else 156 | break; 157 | } 158 | 159 | if (len) 160 | { 161 | size_t newSize = (bytes.size() - len); 162 | bytes.resize(newSize); 163 | mask.resize(newSize); 164 | } 165 | } 166 | 167 | // Return wildcard/mask count 168 | size_t wildcards() const 169 | { 170 | size_t count = 0; 171 | 172 | // TODO: Vectorize this functions for speed? 173 | size_t size = bytes.size(); 174 | for (size_t i = 0; i < size; ++i) 175 | { 176 | if (!mask[i]) 177 | count++; 178 | } 179 | 180 | return count; 181 | } 182 | 183 | // Return TRUE is there is one or more wildcard/mask bytes 184 | __inline BOOL hasMask() const 185 | { 186 | return memchr(mask.data(), 0, bytes.size()) != NULL; 187 | } 188 | 189 | // ------------------------------------------------------------------------ 190 | 191 | SIG& operator+=(const SIG &rhs) 192 | { 193 | // Append another sig to me 194 | bytes.insert(bytes.end(), rhs.bytes.begin(), rhs.bytes.end()); 195 | mask.insert(mask.end(), rhs.mask.begin(), rhs.mask.end()); 196 | return *this; 197 | } 198 | }; 199 | 200 | // Search.cpp 201 | enum SSTATUS 202 | { 203 | NOT_FOUND, // Signature not found error 204 | UNIQUE, // Unique, single instance found 205 | NOT_UNIQUE // Not unique, more than one instance found 206 | }; 207 | SSTATUS SearchSignature(const SIG &sig); 208 | void SearchCleanup(); 209 | 210 | // Signature.cpp 211 | void CreateFunctionSig(); 212 | void CreateAddressSig(); 213 | void CreateAddressRangeSig(); 214 | void OutputSignature(const SIG &sig, ea_t address, UINT32 offset); 215 | -------------------------------------------------------------------------------- /SigMakerEx.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.13.35913.81 d17.13 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SigMakerEx", "SigMakerEx.vcxproj", "{11111111-AAAA-BBBB-CCCC-777777777777}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Release|x64 = Release|x64 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {11111111-AAAA-BBBB-CCCC-777777777777}.Debug|x64.ActiveCfg = Debug|x64 15 | {11111111-AAAA-BBBB-CCCC-777777777777}.Debug|x64.Build.0 = Debug|x64 16 | {11111111-AAAA-BBBB-CCCC-777777777777}.Release|x64.ActiveCfg = Release|x64 17 | {11111111-AAAA-BBBB-CCCC-777777777777}.Release|x64.Build.0 = Release|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {802A9F2E-7038-472F-B090-4154959F470D} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /SigMakerEx.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | SigMakerEx 15 | {11111111-AAAA-BBBB-CCCC-777777777777} 16 | plugin 17 | Win32Proj 18 | 10.0 19 | 20 | 21 | 22 | DynamicLibrary 23 | v143 24 | NotSet 25 | true 26 | false 27 | 28 | 29 | DynamicLibrary 30 | v143 31 | NotSet 32 | false 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | <_ProjectFileVersion>12.0.21005.1 46 | 47 | 48 | true 49 | false 50 | IDA_SigMaker 51 | true 52 | $(SolutionDir)$(Platform)\$(Configuration)\imd\ 53 | 54 | 55 | false 56 | false 57 | IDA_SigMaker 58 | true 59 | $(SolutionDir)$(Platform)\$(Configuration)\imd\ 60 | 61 | 62 | 63 | Disabled 64 | $(_IDADIR)\idasdk\include;$(IDASUPPORT)\IDA_WaitEx\include;$(IDASUPPORT)\Utility 65 | __EA64__;__X64__;_DEBUG;_WINDOWS;_USRDLL;__NT__;__IDP__;__VC__;QT_NAMESPACE=QT;QT_NO_UNICODE_LITERAL;_CRT_SECURE_NO_WARNINGS;TARGET_NAME="$(TargetFileName)";%(PreprocessorDefinitions) 66 | Async 67 | EnableFastChecks 68 | NotUsing 69 | Level3 70 | ProgramDatabase 71 | false 72 | MultiThreadedDebugDLL 73 | Fast 74 | true 75 | stdcpp17 76 | AdvancedVectorExtensions2 77 | false 78 | 79 | 80 | ida.lib;User32.lib;Ole32.lib 81 | $(OutDir)$(TargetFileName) 82 | $(_IDADIR)\idasdk\lib\x64_win_vc_64;$(_IDADIR)\idasdk\lib\x64_win_qt;$(IDASUPPORT)\IDA_WaitEx\lib;%(AdditionalLibraryDirectories) 83 | true 84 | 85 | 86 | Windows 87 | false 88 | 89 | 90 | true 91 | 92 | 93 | copy "$(OutDir)$(TargetFileName)" "$(_IDADIR)\plugins" 94 | 95 | 96 | 97 | 98 | MaxSpeed 99 | Speed 100 | $(_IDADIR)\idasdk\include;$(IDASUPPORT)\IDA_WaitEx\include;$(IDASUPPORT)\Utility 101 | __EA64__;__X64__;NDEBUG;_WINDOWS;_USRDLL;__NT__;__IDP__;__VC__;QT_NO_DEBUG;QT_NAMESPACE=QT;QT_NO_UNICODE_LITERAL;_CRT_SECURE_NO_WARNINGS;TARGET_NAME="$(TargetFileName)";%(PreprocessorDefinitions) 102 | true 103 | Async 104 | false 105 | NotUsing 106 | Level3 107 | ProgramDatabase 108 | false 109 | Fast 110 | true 111 | AnySuitable 112 | true 113 | stdcpp17 114 | AdvancedVectorExtensions2 115 | false 116 | 117 | 118 | ida.lib;User32.lib;Ole32.lib 119 | $(OutDir)$(TargetFileName) 120 | $(_IDADIR)\idasdk\lib\x64_win_vc_64;$(_IDADIR)\idasdk\lib\x64_win_qt;$(IDASUPPORT)\IDA_WaitEx\lib;%(AdditionalLibraryDirectories) 121 | 122 | 123 | Windows 124 | true 125 | true 126 | true 127 | 128 | 129 | 130 | 131 | true 132 | /NOVCFEATURE /NOCOFFGRPINFO %(AdditionalOptions) 133 | 134 | 135 | @if exist "%_TOOLS%\peupdate\peupdate.exe" ("%_TOOLS%\peupdate\peupdate.exe" -s -r -q "$(OutDir)$(TargetFileName)") 136 | copy "$(OutDir)$(TargetFileName)" "$(_IDADIR)\plugins" 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /SigMakerEx.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | Support 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | Support 17 | 18 | 19 | Support 20 | 21 | 22 | 23 | 24 | {42b24ecd-c647-4e57-b467-384b3c0e7c71} 25 | 26 | 27 | -------------------------------------------------------------------------------- /SigMakerEx.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(_IDADIR)\ida.exe 5 | WindowsLocalDebugger 6 | 7 | 8 | $(_IDADIR)\ida.exe 9 | WindowsLocalDebugger 10 | 11 | -------------------------------------------------------------------------------- /Signature.cpp: -------------------------------------------------------------------------------- 1 | 2 | // Main signature generation code 3 | #include "SigMaker.h" 4 | #include 5 | #include 6 | 7 | #define WAIT_BOX_UPDATE() { if (WaitBox::isUpdateTime()) WaitBox::updateAndCancelCheck(); } 8 | 9 | // Unique signature match container 10 | struct SIGMATCH 11 | { 12 | SIG sig; 13 | ea_t ea; 14 | UINT32 size; 15 | UINT32 wildcards; 16 | 17 | SIGMATCH(SIG &_sig, ea_t match_ea) 18 | { 19 | _sig.trim(); 20 | sig = _sig; 21 | ea = match_ea; 22 | size = (UINT32) _sig.bytes.size(); 23 | wildcards = (UINT32) _sig.wildcards(); 24 | } 25 | 26 | bool operator <(const SIGMATCH &b) const 27 | { 28 | //return size < b.size; 29 | return std::pair(size, wildcards) < std::pair(b.size, b.wildcards); 30 | } 31 | }; 32 | typedef std::list UNIQUELIST; 33 | 34 | // Container for instruction signature "siglets" 35 | typedef std::vector SIGLETS; 36 | 37 | 38 | // Output signature to the IDA log pane 39 | void OutputSignature(const SIG &sig, ea_t address, UINT32 offset) 40 | { 41 | if (offset == 0) 42 | msg("SIG: 0x%llX, %u bytes %u, wildcards.\n", address, (UINT32) sig.bytes.size(), (UINT32) sig.wildcards()); 43 | else 44 | msg("SIG: 0x%llX, @ Offset: 0x%X, %u bytes, %u wildcards\n", address, offset, (UINT32) sig.bytes.size(), (UINT32) sig.wildcards()); 45 | 46 | // Always output IDA format at minimal 47 | qstring tmp; 48 | if (settings.outputFormat == SETTINGS::OF_IDA2) 49 | { 50 | sig.ToIdaString(tmp, TRUE); 51 | msg("IDA: \"%s\"\n", tmp.c_str()); 52 | } 53 | else 54 | { 55 | sig.ToIdaString(tmp); 56 | msg("IDA: \"%s\"\n", tmp.c_str()); 57 | } 58 | 59 | switch (settings.outputFormat) 60 | { 61 | // Escape encoded binary with ASCII mask "code" style in two strings. 62 | // E.g. "\x33\x9A\xFA\x00\x00\x00\x00\x45\x68", "xxxxxxx????xx" 63 | case SETTINGS::OF_CODE: 64 | { 65 | qstring code; 66 | sig.ToCodeString(code); 67 | qstring mask; 68 | sig.ToMaskString(mask); 69 | tmp.sprnt("\"%s\", \"%s\"", code.c_str(), mask.c_str()); 70 | msg("%s\n", tmp.c_str()); 71 | } 72 | break; 73 | 74 | // Like "code" style, but byte string with inlined wildcard bytes 75 | // E.g. "\x33\x9A\xFA\xAD\xAD\xAD\xAD\x45\x68", where 0xAD is the wildcard bytes 76 | case SETTINGS::OF_INLINE: 77 | { 78 | qstring bytes; 79 | sig.ToInlineString(bytes); 80 | msg("\"%s\"\n", bytes.c_str()); 81 | qstring comment; 82 | comment.sprnt("// \"%s\"\n", tmp.c_str()); 83 | tmp.sprnt("%s%s\n", comment.c_str(), bytes.c_str()); 84 | if (offset) 85 | { 86 | qstring offstr; 87 | offstr.sprnt("const UINT32 name_me_offset = 0x%X;", offset); 88 | msg("\"%s\"\n", offstr.c_str()); 89 | tmp += offstr; 90 | } 91 | msg("\"const BYTE MASK_BYTE = 0x%X;\"\n", settings.maskByte); 92 | } 93 | break; 94 | }; 95 | WaitBox::processIdaEvents(); 96 | 97 | SetClipboard(tmp.c_str()); 98 | } 99 | 100 | // ------------------------------------------------------------------------------------------------ 101 | // Instruction analysis 102 | 103 | static inline BOOL isJmpCntl(UINT32 type) { return((type >= NN_ja) && (type <= NN_jz)); } // Return TRUE if a conditional jump instruction 104 | static inline BOOL isJmpNotCntl(UINT32 type) { return((type >= NN_jmp) && (type <= NN_jmpshort)); } // Return TRUE if a non-conditional jump instruction 105 | static inline BOOL isCall(UINT32 type) { return((type >= NN_call) && (type <= NN_callni)); } // Return TRUE if is a call instruction 106 | static inline BOOL IsIdbAddress(ea_t address) { return((address >= inf_get_min_ea()) && (address < inf_get_max_ea())); } // Returns TRUE if address is inside this IDB 107 | 108 | // Return the instruction operand offset if it has one 109 | static UINT32 OperandOffset(__in insn_t &cmd) 110 | { 111 | // For x86/AMD64 this will only a few max 112 | for (UINT32 i = 0; i < UA_MAXOP; i++) 113 | { 114 | // Hit end of opcode entries? 115 | optype_t type = cmd.ops[i].type; 116 | if ((type == o_void) || (type == (o_idpspec5 + 1))) 117 | return 0; 118 | else 119 | // Has an operand value 120 | if (cmd.ops[i].offb != 0) 121 | return cmd.ops[i].offb; 122 | } 123 | return 0; 124 | } 125 | 126 | // Get largest value of the instruction operands be it a displacement or immediate value, etc., and considering the MSB/sign bit 127 | static ea_t LargestOperandValue(insn_t &cmd, ea_t test_ea) 128 | { 129 | // IDA conveniently returns absolute addresses (not relative ones) 130 | 131 | // TODO: For the sign assumptions here, could check for AWE aware flag (PE header flag IMAGE_FILE_LARGE_ADDRESS_AWARE) for 32bit targets 132 | // Rare PE header flag for 32bit but a possibility still. 133 | static ea_t HIGH_BIT = (inf_is_64bit() ? 0x8000000000000000 : 0x80000000); 134 | ea_t result = 0; 135 | 136 | for (UINT32 i = 0; i < UA_MAXOP; i++) 137 | { 138 | optype_t type = cmd.ops[i].type; 139 | if ((type == o_void) || (type == (o_idpspec5 + 1))) 140 | break; 141 | else 142 | { 143 | ea_t value = (ea_t) cmd.ops[i].value; 144 | //if ((value & HIGH_BIT) && (type == o_imm)) 145 | // msg(EAFORMAT " v: %llX\n", test_ea, value); 146 | 147 | // Ignore signed immediate value, assume it's a flag value that can be ignored 148 | if (!((value & HIGH_BIT) && (type == o_imm))) 149 | if (value > result) 150 | result = value; 151 | 152 | // Ignore signed displacements and memory references 153 | ea_t adress = cmd.ops[i].addr; 154 | if (!((adress & HIGH_BIT) && ((type == o_displ) || (type == o_mem)))) 155 | if (adress > result) 156 | result = adress; 157 | 158 | //if (result & HIGH_BIT) 159 | // msg(EAFORMAT " %llX %llX t: %d\n", test_ea, value, adress, type); 160 | } 161 | } 162 | 163 | return result; 164 | } 165 | 166 | // Decode an instruction into a sig container 167 | static void AddInst(__in_opt func_t *pfn, __in insn_t &cmd, __inout SIG &sig) 168 | { 169 | UINT32 offb = OperandOffset(cmd); 170 | if (offb != 0) 171 | { 172 | // Filter out all call targets 173 | BOOL filter = FALSE; 174 | if (isCall(cmd.itype)) 175 | filter = TRUE; 176 | else 177 | // Check jump targets 178 | if (isJmpCntl(cmd.itype) || isJmpNotCntl(cmd.itype)) 179 | { 180 | // If we have function bounds, test for membership 181 | if (pfn) 182 | { 183 | // Filter if jump target is outside of our function 184 | ea_t target_ea = LargestOperandValue(cmd, cmd.ea); 185 | filter = !func_contains(pfn, target_ea); 186 | } 187 | else 188 | // Else, keep short jumps and filter the rest 189 | { 190 | if (cmd.size != 2) 191 | filter = TRUE; 192 | } 193 | } 194 | else 195 | { 196 | // Filter intermediate values that are probably an address 197 | if (ea_t value = LargestOperandValue(cmd, cmd.ea)) 198 | filter = IsIdbAddress(value); 199 | } 200 | 201 | if (filter) 202 | { 203 | // Save the leading instruction bytes and wildcard the rest 204 | sig.AddBytes(cmd.ea, offb); 205 | sig.AddWildcards(cmd.size - offb); 206 | } 207 | else 208 | sig.AddBytes(cmd.ea, cmd.size); 209 | } 210 | else 211 | // No operand value 212 | sig.AddBytes(cmd.ea, cmd.size); 213 | } 214 | 215 | 216 | // ------------------------------------------------------------------------------------------------ 217 | 218 | // Dump a function's siglets for development 219 | static void DumpFuncSiglets(__in func_t *pfn, __in SIGLETS &siglets) 220 | { 221 | qstring name; 222 | get_func_name(&name, pfn->start_ea); 223 | msg("--------------------- 0x%llX '%s' ---------------------\n", pfn->start_ea, name.c_str()); 224 | 225 | ea_t current_ea = pfn->start_ea; 226 | size_t count = siglets.size(); 227 | for (size_t i = 0; i < count; i++) 228 | { 229 | SIG &siglet = siglets[i]; 230 | UINT32 size = (UINT32) siglet.bytes.size(); 231 | 232 | msg("[%04u] %llX: ", i, current_ea); 233 | qstring str; 234 | siglet.ToIdaString(str); 235 | msg("(%u) \"%s\"", size, str.c_str()); 236 | qstring disasm; 237 | getDisasmText(current_ea, disasm); 238 | msg(" '%s'\n", disasm.c_str()); 239 | current_ea += size; 240 | } 241 | 242 | msg("--------------------- 0x%llX '%s' ---------------------\n", pfn->end_ea, name.c_str()); 243 | } 244 | 245 | // Decode instruction into a siglet 246 | // Returns instruction/alignment section on return, else <= 0 on error 247 | static int InstToSig(__in_opt func_t *pfn, ea_t current_ea, __out SIG &siglet) 248 | { 249 | // Decode instruction at this address 250 | insn_t cmd; 251 | int decodeSize = decode_insn(&cmd, current_ea); 252 | int itemSize = (int) get_item_size(current_ea); 253 | if (decodeSize <= 0) 254 | { 255 | // Decode failure 256 | // TODO: Fix bad instruction cases if/when encountered 257 | msg(MSG_TAG "** " __FUNCTION__ ": Decode failure @ 0x%llX! decodeSize: %d, itemSize: %d **\n", current_ea, decodeSize, itemSize); 258 | return -1; 259 | } 260 | 261 | if (decodeSize != itemSize) 262 | { 263 | // 99% of the time these are just "align" blocks 264 | flags64_t flags = get_flags_ex(current_ea, 0); 265 | if (is_align(flags)) 266 | { 267 | // Wild card the itemSize count of bytes 268 | siglet.AddWildcards(itemSize); 269 | } 270 | else 271 | { 272 | // TODO: Fix more anomalous instruction cases as they encountered.. 273 | msg(MSG_TAG "* " __FUNCTION__ ": Decode anomaly @ 0x%llX! decodeSize: %d, itemSize: %d *\n", current_ea, decodeSize, itemSize); 274 | qstring outbuf; 275 | idaFlags2String(flags, outbuf); 276 | msg(" F: %08X, \"%s\"\n", flags, outbuf.c_str()); 277 | qstring disasm; 278 | getDisasmText(current_ea, disasm); 279 | msg(" '%s'\n\n", disasm.c_str()); 280 | return -1; 281 | } 282 | } 283 | else 284 | { 285 | // Add instruction to signature 286 | AddInst(pfn, cmd, siglet); 287 | } 288 | 289 | return itemSize; 290 | } 291 | 292 | // Convert function instructions into an array of "siglets" 293 | // For disjointed chunk functions, only processes the first/entry chunk 294 | static BOOL FuncToSiglets(__in func_t *pfn, __out SIGLETS &siglets) 295 | { 296 | // Iterate function instructions 297 | func_item_iterator_t fIt; 298 | if (!fIt.set(pfn)) 299 | { 300 | msg(MSG_TAG "** Failed to init function iterator **\n"); 301 | return FALSE; 302 | } 303 | 304 | ea_t expected_ea = BADADDR; 305 | do 306 | { 307 | // Decode next instruction 308 | ea_t current_ea = fIt.current(); 309 | 310 | // Detect if we walked into some other function body 311 | // Will happen for the functions that have chunks spread out over different address ranges. 312 | // Also for the occasional broken function definition too. 313 | if ((current_ea != expected_ea) && (expected_ea != BADADDR)) 314 | { 315 | // We'll stop here, keep what we have, and return 316 | msg(MSG_TAG "* Into non-contiguous chunk @ 0x%llX, expected 0x%llX. Signature truncated. * \n", current_ea, expected_ea); 317 | break; 318 | } 319 | 320 | // Add next instruction siglet 321 | SIG siglet; 322 | int itemSize = InstToSig(pfn, current_ea, siglet); 323 | if (itemSize >= 1) 324 | siglets.push_back(siglet); 325 | else 326 | return FALSE; 327 | 328 | expected_ea = (current_ea + itemSize); 329 | 330 | } while (fIt.next_not_tail()); 331 | 332 | return TRUE; 333 | } 334 | 335 | // Build a full function signature combined from a siglets array 336 | static void BuildFuncSig(__in const SIGLETS &siglets, __out SIG &sig) 337 | { 338 | for (const SIG &siglet: siglets) 339 | sig += siglet; 340 | 341 | // Trim any right side wildcards to make signature smaller 342 | sig.trim(); 343 | } 344 | 345 | // Look for a unique sig at given function siglet boundary position 346 | static ea_t FindSigAtFuncAddress(ea_t current_ea, ea_t end_ea, size_t sigIndex, const SIGLETS &siglets, __out SIG &outsig) 347 | { 348 | /* 349 | TODO: Currently sig candidates are generated from instruction boundary lengths. 350 | Walking by sub-instruction lengths could result in more smallish sig canidates. 351 | But already relativity slow from all the uniqueness queries, this would increase the amount of searches (thus the time) even more. 352 | Although typically using instruction lengths as it is returns plenty of canidates in the 5'ish byte length anyhow. 353 | */ 354 | 355 | // Expand our sig until we either find a unique one or we hit the end address 356 | SIG sig; 357 | size_t sigByteSize = 0; 358 | size_t sigletCount = siglets.size(); 359 | 360 | for (size_t i = sigIndex; i < sigletCount; i++) 361 | { 362 | const SIG &siglet = siglets[i]; 363 | sig += siglet; 364 | size_t byteSize = siglet.bytes.size(); 365 | sigByteSize += byteSize; 366 | 367 | // If sig byte size is MIN_SIG_SIZE or larger check if the sig is unique 368 | if (sigByteSize >= MIN_SIG_SIZE) 369 | { 370 | // Skip the cases like "E8 ?? ?? ?? ??" 371 | size_t nonMaskSize = (sigByteSize - sig.wildcards()); 372 | if (nonMaskSize > 1) 373 | { 374 | // Make a trimmed temp copy for further testing and faster scanning speed 375 | SIG tmp = sig; 376 | tmp.trim(); 377 | 378 | // Skip cases like "E8 ?? ?? ?? ??" 379 | if (tmp.bytes.size() >= MIN_SIG_SIZE) 380 | { 381 | // Unique sig now? 382 | SSTATUS status = SearchSignature(tmp); 383 | if (status == SSTATUS::UNIQUE) 384 | { 385 | // Yes, return it 386 | outsig = tmp; 387 | return current_ea; 388 | } 389 | else 390 | // To cover a case that can only happen during development 391 | if (status == SSTATUS::NOT_FOUND) 392 | return BADADDR; 393 | 394 | WAIT_BOX_UPDATE(); 395 | } 396 | } 397 | } 398 | 399 | current_ea += (ea_t) byteSize; 400 | if (current_ea >= end_ea) 401 | break; 402 | } 403 | 404 | return BADADDR; 405 | } 406 | 407 | 408 | // Find minimal at instruction boundary, inside a function (already known to be unique), signature. 409 | static ea_t FindMinimalFuncSig(ea_t start_ea, ea_t end_ea, __in const SIGLETS &siglets, __out SIG &outsig) 410 | { 411 | // Walk through each siglet from the top down at instruction boundaries 412 | UNIQUELIST canidates; 413 | ea_t current_ea = start_ea; 414 | size_t count = siglets.size(); 415 | 416 | for (size_t i = 0; i < count; i++) 417 | { 418 | // Try to find a unique sig at this address for siglet position 419 | const SIG &siglet = siglets[i]; 420 | SIG sig; 421 | ea_t result_ea = FindSigAtFuncAddress(current_ea, end_ea, i, siglets, sig); 422 | if (result_ea != BADADDR) 423 | { 424 | // Save candidate 425 | SIGMATCH canidate(sig, current_ea); 426 | canidates.push_back(canidate); 427 | 428 | // If at MIN_SIG_SIZE or less and no wildcards stop with this one 429 | if ((canidate.size <= MIN_SIG_SIZE) && (canidate.wildcards == 0)) 430 | { 431 | LOG_VERBOSE(__FUNCTION__ ": Found ideal canidate: %u, %u.\n", canidate.size, canidate.wildcards); 432 | break; 433 | } 434 | } 435 | 436 | current_ea += (ea_t) siglet.bytes.size(); 437 | } 438 | 439 | // Sport unique sig canidates by ascending primarily size, secondarily by 2nd wildcard count 440 | canidates.sort(); 441 | 442 | if (settings.outputLevel >= SETTINGS::LL_VERBOSE) 443 | { 444 | msg("\nUnique sig canidates: %u\n", (UINT32)canidates.size()); 445 | for (SIGMATCH &c: canidates) 446 | { 447 | qstring str; 448 | c.sig.ToIdaString(str); 449 | msg("%llX: (%02u, %02u) '%s'\n", c.ea, c.size, c.wildcards, str.c_str()); 450 | } 451 | WaitBox::processIdaEvents(); 452 | } 453 | 454 | // Return the topmost/best 455 | outsig = canidates.front().sig; 456 | return canidates.front().ea; 457 | } 458 | 459 | // Find unique sig at function (already known to be unique) entry point downward 460 | // The size will be anywhere from MIN_SIG_SIZE to the entire function body size 461 | static ea_t FindFuncEntryPointSig(ea_t start_ea, __in SIG &funcSig, __out SIG &outsig) 462 | { 463 | // Walk function sig down a byte at the time until we build a unique sig 464 | funcSig.trim(); 465 | size_t sigSize = funcSig.bytes.size(); 466 | size_t sigByteSize = 0; 467 | outsig.bytes.reserve(sigSize); 468 | outsig.mask.reserve(sigSize); 469 | 470 | for (size_t i = 0; i < sigSize; i++) 471 | { 472 | // Append next byte from function sig 473 | outsig.bytes.push_back(funcSig.bytes[i]); 474 | outsig.mask.push_back(funcSig.mask[i]); 475 | sigByteSize += 1; 476 | 477 | // If sig byte size is MIN_SIG_SIZE or greater check if the sig is unique 478 | if (sigByteSize >= MIN_SIG_SIZE) 479 | { 480 | // Make a trimmed temp copy for further testing and faster scan speed 481 | SIG tmp = outsig; 482 | tmp.trim(); 483 | 484 | // Skip cases like "E8 ?? ?? ?? ??" 485 | if (tmp.bytes.size() >= MIN_SIG_SIZE) 486 | { 487 | // Unique now? 488 | SSTATUS status = SearchSignature(tmp); 489 | if (status == SSTATUS::UNIQUE) 490 | { 491 | // Yes, return it 492 | outsig = tmp; 493 | return start_ea; 494 | } 495 | else 496 | // To cover a case that can only happen during development 497 | if (status == SSTATUS::NOT_FOUND) 498 | return BADADDR; 499 | 500 | WAIT_BOX_UPDATE(); 501 | } 502 | } 503 | } 504 | 505 | return BADADDR; 506 | } 507 | 508 | // Find the optimal function (already known to be unique) signature based on user criteria setting 509 | ea_t FindFuncSig(__in const func_t *pfn, __in const SIGLETS &siglets, __in SIG &funcSig, __out SIG &outsig, UINT32 &offset) 510 | { 511 | switch (settings.funcCriteria) 512 | { 513 | // Sig from function entry point downward 514 | case SETTINGS::FUNC_ENTRY_POINT: 515 | { 516 | ea_t result_ea = FindFuncEntryPointSig(pfn->start_ea, funcSig, outsig); 517 | offset = 0; 518 | return result_ea; 519 | } 520 | break; 521 | 522 | // Minimal optimal function sig 523 | case SETTINGS::FUNC_MIN_SIZE: 524 | { 525 | ea_t result_ea = FindMinimalFuncSig(pfn->start_ea, pfn->end_ea, siglets, outsig); 526 | offset = (UINT32) (result_ea - pfn->start_ea); 527 | return result_ea; 528 | } 529 | break; 530 | 531 | // Full function sig 532 | case SETTINGS::FUNC_FULL: 533 | { 534 | funcSig.trim(); 535 | outsig = funcSig; 536 | offset = 0; 537 | return pfn->start_ea; 538 | } 539 | break; 540 | }; 541 | 542 | return BADADDR; 543 | } 544 | 545 | 546 | // ------------------------------------------------------------------------------------------------ 547 | 548 | // Look for a unique function sig at given address 549 | // Returns base address of sig, or BADADDR on failure 550 | static ea_t FindSigAtFuncAddress(ea_t current_ea, __in func_t *pfn, __out SIG &outsig) 551 | { 552 | // Expand our sig until we either find a unique one or we hit the end address.. 553 | SIG sig; 554 | size_t sigByteSize = 0; 555 | ea_t end_ea = pfn->end_ea; 556 | 557 | while ((current_ea != BADADDR) && (current_ea < end_ea)) 558 | { 559 | SIG siglet; 560 | int itemSize = InstToSig(pfn, current_ea, siglet); 561 | if (itemSize >= 1) 562 | sig += siglet; 563 | else 564 | return BADADDR; 565 | sigByteSize += (size_t) itemSize; 566 | 567 | // If sig byte size is MIN_SIG_SIZE or larger check if the sig is unique 568 | if (sigByteSize >= MIN_SIG_SIZE) 569 | { 570 | // Make a trimmed temp copy for further testing and faster scan speed 571 | SIG tmp = sig; 572 | tmp.trim(); 573 | 574 | // Skip cases like "E8 ?? ?? ?? ??" 575 | if (tmp.bytes.size() >= MIN_SIG_SIZE) 576 | { 577 | // Unique sig now? 578 | SSTATUS status = SearchSignature(tmp); 579 | if (status == SSTATUS::UNIQUE) 580 | { 581 | // Yes, return it 582 | outsig = tmp; 583 | return current_ea; 584 | } 585 | else 586 | // To cover a case that can only happen during development 587 | if (status == SSTATUS::NOT_FOUND) 588 | return BADADDR; 589 | 590 | WAIT_BOX_UPDATE(); 591 | } 592 | } 593 | 594 | current_ea += (ea_t) itemSize; 595 | if (current_ea >= end_ea) 596 | break; 597 | } 598 | 599 | return BADADDR; 600 | } 601 | 602 | // Look for a unique sig at given address; same as above sans function requirement 603 | // Returns base address of sig, or BADADDR on failure 604 | static ea_t FindSigAtAddress(ea_t current_ea, __out SIG &outsig) 605 | { 606 | // Expand our sig until we either find a unique one, we run into a function, or we hit a non-address 607 | SIG sig; 608 | size_t sigByteSize = 0; 609 | 610 | while (TRUE) 611 | { 612 | // Bail if we are no longer inside of a valid code space 613 | flags64_t flags = get_flags_ex(current_ea, 0); 614 | if (!is_code(flags)) 615 | { 616 | LOG_VERBOSE(__FUNCTION__ ": 0x%llX no longer in a valid code space.\n", current_ea); 617 | break; 618 | } 619 | 620 | // Check if we walked into a function now 621 | // The assumption is the user wants a sig for some place non inside of a function and now we 622 | // walked into one at or past the entry point. 623 | //if(get_func(current_ea)) 624 | if (is_func(flags)) 625 | { 626 | LOG_VERBOSE(__FUNCTION__ ": 0x%llX walked into a function.\n", current_ea); 627 | break; 628 | } 629 | 630 | SIG siglet; 631 | int itemSize = InstToSig(NULL, current_ea, siglet); 632 | if (itemSize >= 1) 633 | sig += siglet; 634 | else 635 | return BADADDR; 636 | sigByteSize += (size_t)itemSize; 637 | 638 | // If sig byte size is MIN_SIG_SIZE or larger check if the sig is unique 639 | if (sigByteSize >= MIN_SIG_SIZE) 640 | { 641 | // Make a trimmed temp copy for further testing and faster scan speed 642 | SIG tmp = sig; 643 | tmp.trim(); 644 | 645 | // Skip cases like "E8 ?? ?? ?? ??" 646 | if (tmp.bytes.size() >= MIN_SIG_SIZE) 647 | { 648 | // Unique sig now? 649 | SSTATUS status = SearchSignature(tmp); 650 | if (status == SSTATUS::UNIQUE) 651 | { 652 | // Yes, return it 653 | outsig = tmp; 654 | return current_ea; 655 | } 656 | else 657 | // To cover a case that can only happen during development 658 | if (status == SSTATUS::NOT_FOUND) 659 | return BADADDR; 660 | 661 | WAIT_BOX_UPDATE(); 662 | } 663 | } 664 | 665 | current_ea += (ea_t) itemSize; 666 | } 667 | 668 | return BADADDR; 669 | } 670 | 671 | // Attempt to find a function entry code reference sig and output it 672 | BOOL FindFuncXrefSig(ea_t func_ea) 673 | { 674 | // Get first cref to the function if there is one 675 | ea_t ref_ea = get_first_cref_to(func_ea); 676 | if (ref_ea == BADADDR) 677 | { 678 | LOG_VERBOSE("No crefs available.\n"); 679 | } 680 | else 681 | { 682 | // Gather target function references best sig canidates.. 683 | UNIQUELIST canidates; 684 | 685 | // Override maximum ref limit search if setting exists, else use unlimited 686 | // TODO: Could be situations where we look at 100's, if not thousands of refs, trying a sig at each taking seconds if not minutes. 687 | // Might need a default max limit and/or iteration time limit. 688 | UINT32 refLimit = ((settings.maxScanRefCount > 0) ? settings.maxScanRefCount : UINT_MAX); 689 | UINT32 sigCount = 0; 690 | 691 | while ((ref_ea != BADADDR) && (sigCount < refLimit)) 692 | { 693 | func_t *pfn = get_func(ref_ea); 694 | if (pfn) 695 | { 696 | LOG_VERBOSE("[%u] Function ref @ 0x%llX, Func: 0x%llX\n", sigCount, ref_ea, pfn->start_ea); 697 | 698 | // Look for a unique sig from reference branch down 699 | SIG sig; 700 | ea_t sig_ea = FindSigAtFuncAddress(ref_ea, pfn, sig); 701 | if (sig_ea != BADADDR) 702 | { 703 | // Save candidate 704 | SIGMATCH canidate(sig, ref_ea); 705 | canidates.push_back(canidate); 706 | 707 | // The ref sigs are going to start with the reference branch instruction. 708 | // So we are looking at least a 5 byte sig with wildcards to begin with. 709 | // Bail out now if we got a good minimal sig. 710 | static const UINT32 BRANCH_INSTRUCTION_SIZE = 5; // E.g. "E8 ?? ?? ?? ??" 711 | if ((canidate.size <= (BRANCH_INSTRUCTION_SIZE + MIN_SIG_SIZE)) && (canidate.wildcards <= (BRANCH_INSTRUCTION_SIZE - 1))) 712 | { 713 | LOG_VERBOSE(__FUNCTION__ ": Found good minimal canidate: %u, %u.\n", canidate.size, canidate.wildcards); 714 | break; 715 | } 716 | } 717 | else 718 | LOG_VERBOSE(" Ref not unique or error occurred, skipped.\n"); 719 | } 720 | 721 | sigCount++; 722 | ref_ea = get_next_cref_to(func_ea, ref_ea); 723 | }; 724 | 725 | if (!canidates.empty()) 726 | { 727 | // Sort sig canidates by ascending primarily size, secondarily by 2nd wildcard count 728 | canidates.sort(); 729 | 730 | if (settings.outputLevel >= SETTINGS::LL_VERBOSE) 731 | { 732 | msg("\nXfef sig canidates: %u\n", (UINT32) canidates.size()); 733 | for (SIGMATCH &c: canidates) 734 | { 735 | qstring str; 736 | c.sig.ToIdaString(str); 737 | msg("%llX: (%02u, %02u) '%s'\n", c.ea, c.size, c.wildcards, str.c_str()); 738 | } 739 | msg("\n"); 740 | WaitBox::processIdaEvents(); 741 | } 742 | 743 | // Output the topmost/best canidate 744 | msg("Function reference "); 745 | OutputSignature(canidates.front().sig, canidates.front().ea, 0); 746 | return TRUE; 747 | } 748 | } 749 | 750 | // If we made it here, we didn't find a xref sig 751 | return FALSE; 752 | } 753 | 754 | // ------------------------------------------------------------------------------------------------ 755 | 756 | // Attempt to create unique function signature at selected address 757 | void CreateFunctionSig() 758 | { 759 | // User selected address 760 | ea_t ea_selection = get_screen_ea(); 761 | if (ea_selection == BADADDR) 762 | { 763 | msg(MSG_TAG "* Select a function address first *\n"); 764 | return; 765 | } 766 | 767 | // Address must be at or inside a function 768 | func_t *pfn = get_func(ea_selection); 769 | if (!pfn) 770 | { 771 | msg(MSG_TAG "* Select an address inside a code function *\n"); 772 | return; 773 | } 774 | 775 | // Convert function into a instruction "siglets" for analysis 776 | msg("\n"); 777 | msg(MSG_TAG "Finding function signature.\n"); 778 | TIMESTAMP procStart = GetTimeStamp(); 779 | SIGLETS siglets; 780 | if (FuncToSiglets(pfn, siglets)) 781 | { 782 | if (settings.outputLevel >= SETTINGS::LL_VERBOSE) 783 | { 784 | msg("\nFunction siglets:\n"); 785 | DumpFuncSiglets(pfn, siglets); 786 | } 787 | } 788 | 789 | // Build a full function signature from the siglets 790 | SIG funcSig; 791 | BuildFuncSig(siglets, funcSig); 792 | if (settings.outputLevel >= SETTINGS::LL_VERBOSE) 793 | { 794 | qstring sigStr; 795 | funcSig.ToIdaString(sigStr); 796 | msg("\nFull sig: \"%s\"\n\n", sigStr.c_str()); 797 | } 798 | WaitBox::processIdaEvents(); 799 | WaitBox::show("SigMakerEx", "Working.."); 800 | WaitBox::updateAndCancelCheck(-1); 801 | 802 | // Check if the function is unique first. If it's not, we won't find a unique sig within it 803 | if (SearchSignature(funcSig) == SSTATUS::UNIQUE) 804 | { 805 | LOG_VERBOSE("Function is unique, finding optimal settings sig.\n"); 806 | 807 | // Find an optimal sig for the unique function 808 | SIG outsig; 809 | UINT32 offset = 0; 810 | ea_t sig_ea = FindFuncSig(pfn, siglets, funcSig, outsig, offset); 811 | if (sig_ea != BADADDR) 812 | { 813 | // If entry point criteria is active, check optional max byte size 814 | if (settings.funcCriteria == SETTINGS::FUNC_ENTRY_POINT) 815 | { 816 | if ((settings.maxEntryPointBytes != 0) && ((UINT32) outsig.bytes.size() > settings.maxEntryPointBytes)) 817 | { 818 | LOG_VERBOSE("\nEntry point signature byte count exceeds configured max, looking for a reference function sig instead.\n"); 819 | if (!FindFuncXrefSig(pfn->start_ea)) 820 | msg(MSG_TAG "* Failed to find a base or reference signature for selected function. *\n"); 821 | goto exit; 822 | } 823 | } 824 | 825 | msg("Function "); 826 | OutputSignature(outsig, sig_ea, offset); 827 | } 828 | } 829 | else 830 | // Not unique, look for a function reference signature instead 831 | { 832 | LOG_VERBOSE("\nFunction is not unique, looking for a reference function sig.\n"); 833 | if (!FindFuncXrefSig(pfn->start_ea)) 834 | msg(MSG_TAG "* Failed to find a base or reference signature for selected function. *\n"); 835 | } 836 | 837 | exit:; 838 | WaitBox::hide(); 839 | LOG_VERBOSE("Took %.3f seconds.\n", (GetTimeStamp() - procStart)); 840 | WaitBox::processIdaEvents(); 841 | } 842 | 843 | 844 | // ------------------------------------------------------------------------------------------------ 845 | 846 | // Attempt to create unique signature at selected address (inside a function or not) 847 | void CreateAddressSig() 848 | { 849 | // User selected address 850 | ea_t ea_selection = get_screen_ea(); 851 | if (ea_selection == BADADDR) 852 | { 853 | msg(MSG_TAG "* Select a function address first *\n"); 854 | return; 855 | } 856 | 857 | msg("\n"); 858 | msg(MSG_TAG "Finding signature for 0x%llX.\n", ea_selection); 859 | WaitBox::show("SigMakerEx", "Working.."); 860 | WaitBox::updateAndCancelCheck(-1); 861 | WaitBox::processIdaEvents(); 862 | TIMESTAMP procStart = GetTimeStamp(); 863 | 864 | // Ideally the address will be inside a function for better instruction analysis. Will typically 865 | // be the case, but not a requirement here. 866 | func_t *pfn = get_func(ea_selection); 867 | if (pfn) 868 | { 869 | LOG_VERBOSE("Selected address 0x%llX is inside function 0x%llX\n", ea_selection, pfn->start_ea); 870 | 871 | // Look for a minimal unique sig from address selection down 872 | SIG sig; 873 | ea_t sig_ea = FindSigAtFuncAddress(ea_selection, pfn, sig); 874 | if (sig_ea != BADADDR) 875 | { 876 | msg("Address "); 877 | OutputSignature(sig, ea_selection, 0); 878 | } 879 | else 880 | msg(MSG_TAG "* Failed to find unique signature at address. *\n"); 881 | } 882 | else 883 | { 884 | // The not inside a function version 885 | LOG_VERBOSE("Selected address 0x%llX is NOT inside a function.", ea_selection); 886 | 887 | SIG sig; 888 | ea_t sig_ea = FindSigAtAddress(ea_selection, sig); 889 | if (sig_ea != BADADDR) 890 | { 891 | msg("Address "); 892 | OutputSignature(sig, ea_selection, 0); 893 | } 894 | else 895 | msg(MSG_TAG "* Failed to find unique signature at address. *\n"); 896 | } 897 | 898 | WaitBox::hide(); 899 | LOG_VERBOSE("Took %.3f seconds.\n", (GetTimeStamp() - procStart)); 900 | WaitBox::processIdaEvents(); 901 | } 902 | 903 | // ------------------------------------------------------------------------------------------------ 904 | 905 | void CreateAddressRangeSig() 906 | { 907 | // Generate signature from user selected address range, unique or not 908 | ea_t start_ea, end_ea; 909 | if (read_range_selection(get_current_viewer(), &start_ea, &end_ea)) 910 | { 911 | if ((end_ea - start_ea) < MIN_SIG_SIZE) 912 | { 913 | msg(MSG_TAG "Code selection too small, needs to be at least %u bytes long. *\n", MIN_SIG_SIZE); 914 | return; 915 | } 916 | 917 | msg("\n"); 918 | msg(MSG_TAG "Creating signature from 0x%llX to 0x%llX.\n", start_ea, end_ea); 919 | WaitBox::processIdaEvents(); 920 | TIMESTAMP procStart = GetTimeStamp(); 921 | 922 | // Iterate instructions over range. 923 | SIG sig; 924 | func_item_iterator_t fIt; 925 | bool isWithinRange = fIt.set_range(start_ea, end_ea); 926 | 927 | do 928 | { 929 | // Add next instruction to signature 930 | ea_t current_ea = fIt.current(); 931 | SIG siglet; 932 | int itemSize = InstToSig(get_func(current_ea), current_ea, siglet); 933 | if (itemSize >= 1) 934 | sig += siglet; 935 | else 936 | { 937 | // Bail on decode failure, already reported in InstToSig() 938 | return; 939 | } 940 | 941 | } while (fIt.next_not_tail()); 942 | 943 | if (!sig.bytes.empty()) 944 | { 945 | sig.trim(); 946 | msg("Range "); 947 | OutputSignature(sig, start_ea, 0); 948 | } 949 | 950 | LOG_VERBOSE("Took %.3f seconds.\n", (GetTimeStamp() - procStart)); 951 | } 952 | else 953 | { 954 | msg(MSG_TAG "* No code range selected *\n"); 955 | } 956 | WaitBox::processIdaEvents(); 957 | } 958 | -------------------------------------------------------------------------------- /StdAfx.h: -------------------------------------------------------------------------------- 1 | 2 | // Common header 3 | #pragma once 4 | 5 | #define WIN32_LEAN_AND_MEAN 6 | #define WINVER 0x0A00 // _WIN32_WINNT_WIN10 7 | #define _WIN32_WINNT 0x0A00 8 | #include 9 | #include 10 | #include 11 | #include 12 | #pragma intrinsic(memset, memcpy, memcmp, strcat, strcmp, strcpy, strlen) 13 | 14 | // IDA SDK 15 | #define USE_DANGEROUS_FUNCTIONS 16 | #define USE_STANDARD_FILE_FUNCTIONS 17 | //#define NO_OBSOLETE_FUNCS 18 | #pragma warning(push) 19 | #pragma warning(disable:4244) // "conversion from 'ssize_t' to 'int', possible loss of data" 20 | #pragma warning(disable:4267) // "conversion from 'size_t' to 'uint32', possible loss of data" 21 | #pragma warning(disable:4146) // "unary minus operator applied to unsigned type, result still unsigned" 22 | #pragma warning(disable:4018) // warning C4018: '<': signed/unsigned mismatch 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #pragma warning(pop) 32 | 33 | #define MSG_TAG "SigMakerEx: " 34 | #include "Utility.h" 35 | 36 | #include "WaitBoxEx.h" 37 | 38 | #define MY_VERSION MAKE_SEMANTIC_VERSION(VERSION_RELEASE, 1, 3, 0) 39 | -------------------------------------------------------------------------------- /images/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kweatherman/sigmakerex/614e36f87c66633c6b04d77c0ebc828a56ad1660/images/main.png -------------------------------------------------------------------------------- /images/minimal_func_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kweatherman/sigmakerex/614e36f87c66633c6b04d77c0ebc828a56ad1660/images/minimal_func_example.png -------------------------------------------------------------------------------- /images/options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kweatherman/sigmakerex/614e36f87c66633c6b04d77c0ebc828a56ad1660/images/options.png --------------------------------------------------------------------------------