├── tsk ├── Makefile └── scan.c ├── dissect └── dscan.py └── README.md /tsk/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | 3 | all: 4 | $(CC) -Wall -g -ggdb3 -fno-stack-protector scan.c -ltsk -o scan 5 | 6 | clean: 7 | rm -rf scan -------------------------------------------------------------------------------- /dissect/dscan.py: -------------------------------------------------------------------------------- 1 | 2 | from dissect.target import container, filesystem, volume 3 | from pathlib import Path 4 | from os import listdir, path, geteuid 5 | from sys import argv, exit 6 | 7 | # Ignore /run since it may produce large numbers of false positives 8 | IGNORE_RUN = True 9 | 10 | def get_filesystem(path: str) -> str: 11 | try: 12 | return filesystem.open(open(path, "rb")) 13 | except: 14 | try: 15 | return filesystem.open(volume.open(open(path, "rb"))) 16 | except: 17 | disk = container.open(path) 18 | vol = volume.open(disk) 19 | vol_i = 0 20 | if len(vol.volumes) > 1: 21 | for v in vol.volumes: 22 | print(f"[{vol.volumes.index(v)}] {v.name} ({v.size} bytes)") 23 | vol_i = int(input("$ ")) 24 | return filesystem.open(vol.volumes[vol_i]) 25 | 26 | def is_visible(path: str) -> bool: 27 | p = Path(path) 28 | parent = p.parent.absolute() 29 | try: 30 | return p.name in listdir(parent) 31 | except: 32 | print("Failed to open", path) 33 | return True 34 | 35 | def whitelist(path: str) -> bool: 36 | if path == "/": 37 | return True 38 | p = Path(path) 39 | if str(p.parents[-2]) == "/run" and IGNORE_RUN: 40 | return True 41 | return False 42 | 43 | def extract_file(file: filesystem.FilesystemEntry, path: str) -> None: 44 | out = open(path, "wb") 45 | out.write(file.open().read()) 46 | out.close() 47 | 48 | def scan_filesystem(disk_path: str, mount_point: str, extract_path: str) -> int: 49 | try: 50 | fs = get_filesystem(disk_path) 51 | print("Filesystem:", fs.__type__) 52 | except: 53 | print(disk_path, "is not a valid volume/disk OR does not contain a supported filesystem") 54 | exit(-1) 55 | 56 | hidden_files = 0 57 | extract = [] 58 | 59 | for _, dirs, files in fs.walk_ext("/", True, None, False): 60 | entries = dirs + files 61 | for entry in entries: 62 | full_path = path.join(mount_point, str(entry)) 63 | if not is_visible(full_path) and not whitelist(full_path): 64 | out_path = path.join(extract_path, full_path[1:]) 65 | print(f"Hidden: {full_path} ({entry.stat(False).st_ino})") 66 | hidden_files += 1 67 | extract.append([entry, out_path]) 68 | 69 | for entry, out_path in extract: 70 | if entry.is_file(False): 71 | Path(out_path).parent.mkdir(parents=True, exist_ok=True) 72 | try: 73 | extract_file(entry, out_path) 74 | print(f"Extracted: {out_path} | MD5={entry.md5()} SHA256={entry.sha256()}") 75 | except: 76 | print("Failed to extract:", out_path) 77 | else: 78 | Path(out_path).mkdir(parents=True, exist_ok=True) 79 | print("Created:", out_path) 80 | 81 | return hidden_files 82 | 83 | def main() -> int: 84 | if len(argv) >= 4: 85 | disk_path = argv[1] 86 | mount_point = argv[2] 87 | extract_path = argv[3] 88 | else: 89 | print(f"Usage: python {argv[0]} \nExample: python {argv[0]} /dev/sda / /evidence") 90 | exit(-1) 91 | 92 | if (geteuid() != 0): 93 | print("Root priviliges required to parse disk") 94 | exit(-1) 95 | 96 | hidden_files = scan_filesystem(disk_path, mount_point, extract_path) 97 | 98 | if hidden_files > 0: 99 | print(hidden_files, "hidden file(s) found") 100 | else: 101 | print("No hidden files found") 102 | 103 | return hidden_files 104 | 105 | if __name__ == "__main__": 106 | exit(main()) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 猟師 [ryōshi] 2 | 3 | These utilities enable forensic investigators to reliably detect and extract files, which have been hidden by rootkits. They are primarily designed for use on UNIX-based systems. The underlying principle, however, is platform-independent. 4 | 5 | --- 6 | 7 | ### How it works 8 | 9 | All of the rootkits that we have found and analyzed during our research, share a similarity when it comes to file hiding: They don't touch the disk. Instead, they filter and manipulate the data flow from the disk to the user. This allows us to detect discrepancies between files present on disk and files shown in userspace applications. 10 | 11 | ``` 12 | ubuntu@Ubuntu:~$ ls -la | grep diamorphine 13 | ubuntu@Ubuntu:~$ sudo fls -o 1054720 -r -p /dev/sda | grep diamorphine 14 | r/r 664159: home/ubuntu/diamorphine_secret_file 15 | ``` 16 | In the example above, we were of course aware of a hidden file and already knew its name. To find hidden files with arbitrary paths, an investigator has to iterate through all files present on disk and verify that they are visible in userspace. The utilities in this repository not only automate this task, but also assist in filtering false positives and extracting found files. 17 | 18 | ``` 19 | ubuntu@Ubuntu:~$ sudo ./scan /dev/sda3 / /usb/evidence 20 | /dev/sda3 (26302MB) -> ext (1605633 inodes) 21 | Hidden: /home/ubuntu/diamorphine_secret_file (664159) 22 | Extracted: /usb/evidence/home/ubuntu/diamorphine_secret_file | MD5=(...) SHA1=(...) 23 | ``` 24 | During extraction, the utilities attempt to replicate the directory structure of the scanned file system and calculate hash sums for extracted files. 25 | 26 | --- 27 | 28 | ### Installation & Usage 29 | 30 | We provide two different implementations: One is based on Brian Carrier's *The Sleuth Kit* and is written in C, while the other uses Dissect, a more modern forensic library written in Python. Both offer the same capabilities and should be chosen depending on the situation. In general: The C version is slightly faster, while the Python version is more portable. There are also small differences in supported file systems. Please view their respective documentations for details. 31 | 32 | To be able to parse the disk, both implementations require root privileges. As arguments, they take: 33 | 34 | **\** The disk/volume/filesystem to be parsed. It has to contain a supported file system. If you want to parse an encrypted volume or other logical volume, use the corresponding mapper instead of specifying the volume directly (e.g. /dev/mapper/ubuntu--vg-ubuntu--lv).
35 | **\** The path where the specified volume is mounted.
36 | **\** A path where extracted files should be stored. 37 | 38 | 39 | 40 | #### libtsk 41 | 42 | Note, that statically compiling the libtsk can greatly reduce this utility's footprint. 43 | 44 | ``` 45 | sudo apt install libtsk-dev build-essential 46 | git clone https://github.com/fkie-cad/ryoshi.git 47 | cd ryoshi/tsk 48 | make 49 | ``` 50 | ``` 51 | sudo ./scan 52 | sudo ./scan /dev/sda1 / /usb/evidence 53 | ``` 54 | 55 | #### dissect 56 | 57 | Python >= 3.7 is required. The following instructions also assume *pip* is installed. 58 | 59 | 60 | ``` 61 | sudo pip install dissect 62 | git clone https://github.com/fkie-cad/ryoshi.git 63 | cd ryoshi/dissect 64 | ``` 65 | ``` 66 | sudo python3 dscan.py 67 | sudo python3 dscan.py /dev/sda1 / /usb/evidence 68 | ``` 69 | 70 | Using a tool like *PyInstaller* or *PyOxidizer* this utility can also be compiled into a single portable binary (the following example uses *PyInstaller*). 71 | 72 | ``` 73 | sudo apt install python3-venv binutils 74 | python3 -m venv .venv 75 | . .venv/bin/activate 76 | pip install dissect pyinstaller 77 | git clone https://github.com/fkie-cad/ryoshi.git 78 | cd ryoshi/dissect 79 | pyinstaller -F --clean --hidden-import pkgutil --hidden-import dissect --collect-submodules dissect dscan.py 80 | cd dist 81 | sudo ./dscan 82 | ``` 83 | For more information on deployment and the differences between *PyInstaller* and *PyOxidizer* see the dissect [documentation](https://docs.dissect.tools/en/latest/tools/acquire.html#deployment). 84 | 85 | --- 86 | 87 | ### Limitations 88 | 89 | Note, that due to the functions used, these utilities are currently unable to detect files hidden by application-level rootkits. Since these rootkits have become very rare, we believe this blind side to be mostly irrelevant in real-world scenarios. It should be possible to add support for specific applications if required. 90 | 91 | Also note, that future rootkits may directly target these utilities to prevent detection. 92 | 93 | --- 94 | 95 | ### References 96 | 97 | [The Sleuth Kit](https://www.sleuthkit.org/sleuthkit/)
98 | [Dissect](https://github.com/fox-it/dissect) 99 | -------------------------------------------------------------------------------- /tsk/scan.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Skip the /run directory (since it produces many false positives) 11 | 12 | #define ignore_run_dir 1 13 | 14 | // Skip empty files or files with unallocated names (since they may produce false positives) 15 | 16 | #define ignore_empty 1 17 | #define ignore_unalloc 1 18 | 19 | int hidden_files = 0; 20 | char full_path[PATH_MAX]; 21 | char extract_path[PATH_MAX]; 22 | char *extract_dir; 23 | char *mount_point; 24 | struct dirent *dp; 25 | DIR *dirp; 26 | 27 | static void mkdir_p(const char *dir) { 28 | char path[PATH_MAX]; 29 | char *c = NULL; 30 | 31 | snprintf(path, sizeof(path), "%s", dir); 32 | if (path[strlen(path) - 1] == '/') { 33 | path[strlen(path) - 1] = 0; 34 | } 35 | for (c = path + 1; *c; c++) { 36 | if (*c == '/') { 37 | *c = 0; 38 | mkdir(path, S_IRWXU); 39 | *c = '/'; 40 | } 41 | } 42 | mkdir(path, S_IRWXU); 43 | } 44 | 45 | TSK_WALK_RET_ENUM scan_callback(TSK_FS_FILE *file, const char *path, void *ptr) { 46 | 47 | // Skip links, tsk virtual files/dirs and undefined files 48 | 49 | if (file->name->type == TSK_FS_NAME_TYPE_LNK || 50 | file->name->type == TSK_FS_NAME_TYPE_UNDEF || 51 | file->name->type == TSK_FS_NAME_TYPE_VIRT || 52 | file->name->type == TSK_FS_NAME_TYPE_VIRT_DIR) { 53 | return TSK_WALK_CONT; 54 | } 55 | 56 | // Skip files with unallocted names 57 | 58 | #if ignore_unalloc != 0 59 | if (file->name->flags == TSK_FS_NAME_FLAG_UNALLOC) { 60 | return TSK_WALK_CONT; 61 | } 62 | #endif 63 | 64 | // Skip empty files 65 | 66 | #if ignore_empty != 0 67 | if (file->meta == NULL || file->meta->size <= 0) { 68 | return TSK_WALK_CONT; 69 | } 70 | #endif 71 | 72 | // Skip the /run directory 73 | 74 | #if ignore_run_dir != 0 75 | if (strncmp("run", path, 3) == 0) { 76 | return TSK_WALK_CONT; 77 | } 78 | #endif 79 | 80 | sprintf(full_path, "%s%s", mount_point, path); 81 | 82 | dirp = opendir(full_path); 83 | if (dirp == NULL) { 84 | printf("Failed to open directory: %s\n", full_path); 85 | return TSK_WALK_CONT; 86 | } 87 | 88 | do { 89 | dp = readdir(dirp); 90 | if (dp != NULL) { 91 | if (strcmp(dp->d_name, file->name->name) == 0) { 92 | closedir(dirp); 93 | return TSK_WALK_CONT; 94 | } 95 | } 96 | } while(dp != NULL); 97 | 98 | closedir(dirp); 99 | 100 | printf("Hidden: %s%s (%li)\n", full_path, file->name->name, file->name->meta_addr); 101 | hidden_files += 1; 102 | 103 | sprintf(extract_path, "%s%s", extract_dir, path); 104 | 105 | if (file->name->type == TSK_FS_NAME_TYPE_DIR) { 106 | strcat(extract_path, file->name->name); 107 | mkdir_p(extract_path); 108 | printf("Created: %s\n", extract_path); 109 | } 110 | 111 | if (file->name->type == TSK_FS_NAME_TYPE_REG) { 112 | char buf[1024]; 113 | unsigned int offset; 114 | FILE *fp; 115 | 116 | mkdir_p(extract_path); 117 | strcat(extract_path, file->name->name); 118 | 119 | fp = fopen(extract_path, "ab"); 120 | offset = 0; 121 | while(tsk_fs_file_read(file, offset, buf, 1024, 0) > 0) { 122 | for (int i = 0; i < 1024; i++) { 123 | if (buf[i]) { 124 | fwrite(&buf[i], 1, 1, fp); 125 | } 126 | } 127 | offset += 1024; 128 | } 129 | fclose(fp); 130 | 131 | TSK_FS_HASH_RESULTS hashes; 132 | 133 | if (tsk_fs_file_hash_calc(file, &hashes, TSK_BASE_HASH_MD5 | TSK_BASE_HASH_SHA1) != 0) { 134 | printf("Failed to calculate hashes: %s%s\n", full_path, file->name->name); 135 | } 136 | 137 | printf("Extracted: %s | MD5=", extract_path); 138 | for (int i = 0; i < TSK_MD5_DIGEST_LENGTH; i++) { 139 | printf("%02x", hashes.md5_digest[i]); 140 | } 141 | printf(" SHA1="); 142 | for (int i = 0; i < TSK_SHA_DIGEST_LENGTH; i++) { 143 | printf("%02x", hashes.sha1_digest[i]); 144 | } 145 | printf("\n"); 146 | } 147 | 148 | return TSK_WALK_CONT; 149 | } 150 | 151 | int main(int argc, char *argv[]) { 152 | 153 | if (argc < 4) { 154 | printf("Usage: %s [volume] [mount point] [extract path]\nExample: %s /dev/sda1 / /evidence\n", argv[0], argv[0]); 155 | exit(-1); 156 | } 157 | 158 | if (geteuid() != 0) { 159 | printf("Root priviliges required to parse disk\n"); 160 | exit(-1); 161 | } 162 | 163 | if (argv[2][strlen(argv[2]) - 1] != '/') { 164 | strcat(argv[2], "/"); 165 | } 166 | 167 | if (argv[3][strlen(argv[3]) - 1] != '/') { 168 | strcat(argv[3], "/"); 169 | } 170 | 171 | // Load volume or disk image 172 | 173 | TSK_IMG_INFO *disk_image = tsk_img_open_utf8_sing(argv[1], TSK_IMG_TYPE_DETECT, 0); 174 | 175 | if (disk_image == NULL) { 176 | printf("%s is not a valid volume or disk image\n", argv[1]); 177 | exit(-1); 178 | } 179 | 180 | // Open filesystem on volume / disk image 181 | 182 | TSK_FS_INFO *filesystem = tsk_fs_open_img(disk_image, 0, TSK_FS_TYPE_DETECT); 183 | 184 | if (filesystem == NULL) { 185 | printf("%s does not contain a supported filesystem\n", argv[1]); 186 | exit(-1); 187 | } 188 | 189 | printf("%s (%ldMB) -> %s (%li inodes)\n", argv[1], disk_image->size / 1000000, tsk_fs_type_toname(filesystem->ftype), filesystem->inum_count); 190 | 191 | mount_point = argv[2]; 192 | extract_dir = argv[3]; 193 | void *ptr = NULL; 194 | 195 | tsk_fs_dir_walk(filesystem, filesystem->root_inum, TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN, scan_callback, ptr); 196 | 197 | tsk_fs_close(filesystem); 198 | tsk_img_close(disk_image); 199 | 200 | if (hidden_files > 0) { 201 | printf("%i hidden file(s) found\nExtracted to: %s\n", hidden_files, argv[3]); 202 | } else { 203 | printf("No hidden files found\n"); 204 | } 205 | 206 | exit(hidden_files); 207 | } --------------------------------------------------------------------------------