├── .gitignore ├── CHANGES ├── Makefile ├── README ├── api.c ├── binary ├── foremost-linux ├── foremost-mac ├── foremost.conf └── foremost.exe ├── cli.c ├── config.c ├── dir.c ├── engine.c ├── extract.c ├── extract.h ├── foremost.8.gz ├── foremost.conf ├── helpers.c ├── library └── libiberty.a ├── main.c ├── main.h ├── ole.h └── state.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.ko 3 | *.obj 4 | *.elf 5 | *.gch 6 | *.pch 7 | *.lib 8 | *.la 9 | *.lo 10 | *.dll 11 | *.so 12 | *.so.* 13 | *.dylib 14 | *.out 15 | *.app 16 | *.i*86 17 | *.x86_64 18 | *.hex 19 | *.dSYM/ 20 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Version 1.5.7 2 | -Added support for MP4 files 3 | Version 1.5.6 4 | -Added support for Office 2007 file as well as bug fixes 5 | Version 1.5.5 6 | -Added patch submitted by John K. Antonishek as well as cleaning 7 | up compiler warnings and man file installation. 8 | Version 1.5.4 9 | -Added patch submitted by Milan Broz & Eamonn Saunders that 10 | fixes jpeg extraction bug. 11 | warnings 12 | and an 64 bit bug. 13 | Version 1.5.3 14 | -Added patches submitted by Toshio Kuratomi that fix compiler warnings 15 | and an 64 bit bug. 16 | Version 1.5.2 17 | -Fixed problem with gap code thanks to Jeffry Turnmire 18 | Version 1.5.1 19 | -Fixed jpeg extraction bug thanks to Jeffry Turnmire 20 | -Fixed bug in OLE extraction thanks to Filip Van Raemdonck 21 | Version 1.5 22 | -Fixed Endian errors on OSX 23 | -Fixed several bugs reported by John K. Antonishek 24 | Version 1.4 25 | -Fixed realpath problems when compiling with cygwin 26 | -Fixed flaw in Zip extraction 27 | -Made indirect block detection a little more stable 28 | Version 1.3 29 | - Fixed flaw in ZIP algorithm that didn't take into acct zeroized local file headers 30 | that contain valid compressed/uncompressed info in the data descriptors 31 | Version 1.2 32 | - Fixed conf file typos 33 | Version 1.1 34 | - Improved Speed of extraction functions 35 | - Added NEXT option to config file 36 | - Fixed some integer overflow problems 37 | - Updated config file 38 | - Added ASCII option for the config file 39 | Version 1.0 40 | - Changed display functionq 41 | - Enhanced RaR and PE extraction 42 | - Minor bug fixes thanks to Eamon Walsh for the bug report and patch 43 | - Added support for Windows PE executables 44 | - Added support for multiple files 45 | - Thanks to Toshio Kuratomi for fixing some compiler warnings under gcc 4 46 | - Fixed bugs with respect to unique file names, and quick mode 47 | Version 0.9.4 48 | - Improved speed and reliability of zip and mpeg extraction algorithms. 49 | Version 0.9.3 50 | - Added subdirectories for each output type as opposed to 1 directory 51 | containing 90,000 files. 52 | Version 0.9.2 53 | - Greatly improved OLE extraction capabilites. 54 | Version 0.9.1 55 | - Re-wrote code to run on LINUX,OSX,BSD,and SOLARIS 56 | - Added builtin extraction functions 57 | - Changed default behavior to look for the conf file in /usr/local/etc as 58 | well as the the current dir. Also the conf file is not required 59 | for the program to run if the -t option is enabled. 60 | - Added a -i switch to specify an input file as opposed to using stdin 61 | - Added -k to allow the user to change the default chunk size as well 62 | as -b to change the default block size 63 | - Changed the output dir to a time stamp of when the program was run. 64 | - Added -d for indirect block detection 65 | Version 0.69 (Our thanks to Zach Kanner for these improvements...) 66 | - Corrected a bug that prevented the "reverse footer search" option 67 | from working correctly. 68 | - Added a new "NEXT" option, specify NEXT after the footer on any 69 | search specification line and foremost will search for 70 | the last occurence (forward only currently) of that footer in the 71 | window of size length but not including that footer in the resulting 72 | output file created. This feature lets you search for files that 73 | don't have good ending footers but are separated by multiple starting footers or other identifiable data which you know should not be 74 | included in the output. This works really well for MS Word documents where you don't know where the end is. The start of another document 75 | becomes the end. With this feature as you can specific the "NEXT" 76 | or something after the end of the data we are looking for. 77 | - Updated the default foremost.conf file to use the feature for .doc 78 | files. Also added tags for ScanSoft PaperPort files (.max), and 79 | a Windows program called PINs (.pins), which stores encrypted 80 | passwords. 81 | Version 0.68 82 | Version 0.67 83 | - Added "reverse footer search" option, specify REVERSE after the 84 | footer on any search specification line and foremost will search for 85 | the last occurence of that footer in the window of size length. 86 | 87 | Version 0.66 88 | - Changed normal search to Boyer-Moore algorithm. Much faster! 89 | - Added progress meter 90 | - Added ability to suppress extensions from a single file type or 91 | from all file types. 92 | - Added "chop" field to show when files have been trimmed 93 | based on their definitions in the configuration file 94 | - Added "interior" field to show when files have been found 95 | somewhere other than a sector boundary 96 | - Added OpenBSD support 97 | - Added Win32 support via native compilation (Mingw) 98 | - Added Win32 support via Cygwin, to include: 99 | -using %lld instead of %Ld 100 | -ignoring the fnctl line for O_LARGEFILE in Win32 101 | -redeclare strsignal as const char strsignal 102 | -write function basename for Win32 using '\\' as delimiter 103 | -updated Makefile 104 | - Removed unneccessary header files from foremost.h 105 | 106 | (Version 0.65 was not published) 107 | 108 | Version 0.64 - Audit file now records full paths of input and output files 109 | Foremost now requires that the output directory is empty 110 | before running. If necessary, foremost will create the 111 | output directory (ie. if it doesn't exist) 112 | Added structure to internal code of foremost.c and created 113 | dig.c file 114 | Fixed bug that generated wrong line number in configuration 115 | file error messages 116 | Fixed bug on empty wildcard definitions 117 | Added limit for number of file types in configuration file 118 | 119 | Version 0.63 - Increased speed by using files already loaded in memory 120 | instead of going back to the disk every time. 121 | Minor speed increase to helper functions 122 | Added footers for several file formats including ZIP 123 | 124 | Version 0.62 - Added man page and make install functionality 125 | Added "internal" indicator to show when a file is found 126 | off the start of the sector. 127 | Fixed discrepancy between audit file and screen output 128 | regarding file numbers and offset locations (off by one) 129 | Added more graceful error handling 130 | 131 | Version 0.61 - Added check for "^M" line feeds added by MSDOS editors 132 | while reading configuration files. 133 | 134 | Version 0.6 - Renamed project to "foremost" 135 | Added support for wildcards 136 | Added -q for quick mode 137 | More code clean up 138 | Removed BSD porting code (oops) and added support 139 | for large (>2GB) files. 140 | 141 | Version 0.5 - Added -v for verbose mode 142 | Added more intelligble output regarding file locations 143 | Added error handling procedures 144 | Added support for loading specification files from the disk 145 | 146 | Version 0.4 - More code cleanup 147 | (not actually released, used as test during investigation) 148 | 149 | Version 0.3 - Code cleanup continues, moved all variables into the 150 | state variable. The program still needs a LOT of work. 151 | 152 | Version 0.2 - Code cleanup by Jesse Kornblum. Removed linux specific 153 | code and ported to OpenBSD. Added support for handling 154 | multiple images from the command line and created the 155 | state variable. 07 March 2001 156 | 157 | Version 0.1 - Proof of concept code written by Kris Kendall, 158 | originally called "snarfit" 05 March 2001 159 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | RAW_CC = gcc 3 | RAW_FLAGS = -Wall -O2 4 | LINK_OPT = 5 | VERSION = 1.5.7 6 | # Try to determine the host system 7 | SYS := $(shell uname -s | tr -d "[0-9]" | tr -d "-" | tr "[A-Z]" "[a-z]") 8 | 9 | 10 | # You can cross compile this program for Win32 using Linux and the 11 | # MinGW compiler. See the README for details. If you have already 12 | # installed MinGW, put the location ($PREFIX) here: 13 | CR_BASE = /opt/mingw32/bin 14 | 15 | # You shouldn't need to change anything below this line 16 | #--------------------------------------------------------------------- 17 | 18 | # This should be commented out when debugging is done 19 | #RAW_FLAGS += -D__DEBUG -ggdb 20 | 21 | NAME = foremost 22 | NAME_OUTPUT = $(NAME) 23 | MAN_PAGES = $(NAME).8.gz 24 | 25 | RAW_FLAGS += -DVERSION=\"$(VERSION)\" 26 | 27 | # Where we get installed 28 | BIN = /usr/local/bin 29 | MAN = /usr/share/man/man8 30 | CONF= /usr/local/etc 31 | # Setup for compiling and cross-compiling for Windows 32 | # The CR_ prefix refers to cross compiling from OSX to Windows 33 | CR_CC = $(CR_BASE)/gcc 34 | CR_OPT = $(RAW_FLAGS) -D__WIN32 35 | CR_LINK = -liberty 36 | CR_STRIP = $(CR_BASE)/strip 37 | CR_GOAL = $(NAME).exe 38 | WINCC = $(RAW_CC) $(RAW_FLAGS) -D__WIN32 39 | 40 | # Generic "how to compile C files" 41 | CC = $(RAW_CC) $(RAW_FLAGS) -D__UNIX 42 | .c.o: 43 | $(CC) -c $< 44 | 45 | 46 | # Definitions we'll need later (and that should rarely change) 47 | HEADER_FILES = main.h ole.h extract.h 48 | SRC = main.c state.c helpers.c config.c cli.c engine.c dir.c extract.c api.c 49 | OBJ = main.o state.o helpers.o config.o cli.o engine.o dir.o extract.o api.o 50 | DOCS = Makefile README CHANGES $(MAN_PAGES) foremost.conf 51 | WINDOC = README.txt CHANGES.txt 52 | 53 | 54 | #--------------------------------------------------------------------- 55 | # OPERATING SYSTEM DIRECTIVES 56 | #--------------------------------------------------------------------- 57 | 58 | all: $(SYS) goals 59 | 60 | goals: $(NAME) 61 | 62 | linux: CC += -D__LINUX -DLARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 63 | linux: goals 64 | 65 | sunos: solaris 66 | solaris: CC += -D__SOLARIS -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 67 | solaris: goals 68 | 69 | darwin: CC += -D__MACOSX 70 | darwin: goals 71 | 72 | mac: CC += -D__MACOSX 73 | mac: goals 74 | 75 | netbsd: unix 76 | openbsd: unix 77 | freebsd: unix 78 | unix: goals 79 | 80 | #Fore some reasons BSD variants get confused on how to build engine.o 81 | #so lets make it real clear 82 | 83 | engine.o: engine.c 84 | $(CC) -c engine.c 85 | 86 | 87 | # Common commands for compiling versions for Windows. 88 | # See cross and windows directives below. 89 | win_general: LINK_OPT = $(CR_LINK) 90 | win_general: GOAL = $(CR_GOAL) 91 | win_general: goals 92 | $(STRIP) $(CR_GOAL) 93 | 94 | # Cross compiling from Linux to Windows. See README for more info 95 | cross: CC = $(CR_CC) $(CR_OPT) 96 | cross: NAME_OUTPUT = $(CR_GOAL) 97 | cross: STRIP = $(CR_STRIP) 98 | cross: win_general 99 | 100 | # See the README for information on Windows compilation 101 | windows: CC = $(WINCC) 102 | windows: STRIP = strip 103 | windows: win_general 104 | 105 | cygwin_nt.: unix 106 | cygwin: unix 107 | 108 | 109 | #--------------------------------------------------------------------- 110 | # COMPILE THE PROGRAMS 111 | # This section must be updated each time you add an algorithm 112 | #--------------------------------------------------------------------- 113 | 114 | foremost: $(OBJ) 115 | $(CC) $(OBJ) -o $(NAME_OUTPUT) $(LINK_OPT) 116 | 117 | 118 | #--------------------------------------------------------------------- 119 | # INSTALLATION AND REMOVAL 120 | #--------------------------------------------------------------------- 121 | 122 | install: goals 123 | install -m 755 $(NAME) $(BIN) 124 | install -m 444 $(MAN_PAGES) $(MAN) 125 | install -m 444 foremost.conf $(CONF) 126 | macinstall: BIN = /usr/local/bin/ 127 | macinstall: MAN = /usr/share/man/man1/ 128 | macinstall: CONF = /usr/local/etc/ 129 | macinstall: mac install 130 | 131 | 132 | uninstall: 133 | rm -f -- $(BIN)/{$(RM_GOALS)} 134 | rm -f -- $(MAN)/{$(RM_DOCS)} 135 | 136 | macuninstall: BIN = /usr/bin 137 | macuninstall: MAN = /usr/share/man/man1 138 | macuninstall: uninstall 139 | 140 | #--------------------------------------------------------------------- 141 | # CLEAN UP 142 | #--------------------------------------------------------------------- 143 | 144 | # This is used for debugging 145 | preflight: 146 | grep -n RBF *.1 *.h *.c README CHANGES 147 | 148 | nice: 149 | rm -f -- *~ 150 | 151 | clean: nice 152 | rm -f -- *.o 153 | rm -f -- $(CR_GOAL) $(NAME) $(WIN_DOC) 154 | rm -f -- $(TAR_FILE).gz $(DEST_DIR).zip $(DEST_DIR).zip.gpg 155 | 156 | #------------------------------------------------------------------------- 157 | # MAKING PACKAGES 158 | #------------------------------------------------------------------------- 159 | 160 | EXTRA_FILES = 161 | DEST_DIR = $(NAME)-$(VERSION) 162 | TAR_FILE = $(DEST_DIR).tar 163 | PKG_FILES = $(SRC) $(HEADER_FILES) $(DOCS) $(EXTRA_FILES) 164 | 165 | # This packages me up to send to somebody else 166 | package: clean 167 | rm -f $(TAR_FILE) $(TAR_FILE).gz 168 | mkdir $(DEST_DIR) 169 | cp $(PKG_FILES) $(DEST_DIR) 170 | tar cvf $(TAR_FILE) $(DEST_DIR) 171 | rm -rf $(DEST_DIR) 172 | gzip $(TAR_FILE) 173 | 174 | 175 | # This Makefile is designed for Mac OSX to package the file. 176 | # To do this on a linux box, The big line below starting with "/usr/bin/tbl" 177 | # should be replaced with: 178 | # 179 | # man ./$(MD5GOAL).1 | col -bx > README.txt 180 | # 181 | # and the "flip -d" command should be replaced with dos2unix 182 | # 183 | # The flip command can be found at: 184 | # http://ccrma-www.stanford.edu/~craig/utility/flip/# 185 | win-doc: 186 | /usr/bin/tbl ./$(MD5GOAL).1 | /usr/bin/groff -S -Wall -mtty-char -mandoc -Tascii | /usr/bin/col > README.txt 187 | cp CHANGES CHANGES.txt 188 | flip -d $(WINDOC) 189 | 190 | cross-pkg: clean cross win-doc 191 | rm -f $(DEST_DIR).zip 192 | zip $(DEST_DIR).zip $(CR_MD5GOAL) $(CR_SHA1GOAL) $(CR_SHA256GOAL) $(WINDOC) 193 | rm -f $(WINDOC) 194 | 195 | world: package cross-pkg 196 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | WINDOWS: 2 | Requires path to MinGW. 3 | Makefile defaults MinGW gcc path to /opt/mingw32/bin 4 | If you installed mingw32 in different location modify line 13 in Makefile. 5 | Requires libiberty.a to compile 6 | 7 | MingGW: http://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Automated%20Builds/ 8 | 9 | $ make cross 10 | 11 | 12 | ---------------------------------------------------------------------- 13 | 14 | 15 | FOREMOST 16 | ---------------------------------------------------------------------- 17 | 18 | Foremost is a Linux program to recover files based on their headers and 19 | footers. Foremost can work on image files, such as those generated by dd, 20 | Safeback, Encase, etc, or directly on a drive. The headers and footers are 21 | specified by a configuration file, so you can pick and choose which 22 | headers you want to look for. 23 | 24 | 25 | 26 | -------------------------------------------- 27 | INSTALL FOREMOST 28 | -------------------------------------------- 29 | 30 | To run foremost, you must: 31 | 32 | - uncompress the archive 33 | - compile 34 | - install 35 | 36 | Here's how to do it: 37 | 38 | LINUX: 39 | $ tar zxvf foremost-xx.tar.gz 40 | $ cd foremost-xx 41 | $ make 42 | $ make install 43 | 44 | BSD: 45 | $ tar zxvf foremost-xx.tar.gz 46 | $ cd foremost-xx 47 | $ make unix 48 | $ make install 49 | 50 | SOLARIS: 51 | $ tar zxvf foremost-xx.tar.gz 52 | $ cd foremost-xx 53 | $ make solaris 54 | $ make install 55 | 56 | OSX: 57 | $ tar zxvf foremost-xx.tar.gz 58 | $ cd foremost-xx 59 | $ make mac 60 | $ make macinstall 61 | 62 | On systems with older versions of glibc (earlier than 2.2.0), you will get 63 | some harmless warnings about ftello and fseeko not being defined. You can 64 | ignore these. 65 | 66 | 67 | If you ever need to remove foremost from your system, you can do this: 68 | 69 | $ make uninstall 70 | 71 | 72 | 73 | -------------------------------------------- 74 | USING FOREMOST 75 | -------------------------------------------- 76 | 77 | A description of the command line arguments can be found in the man page. 78 | To view it: 79 | 80 | $ man foremost 81 | 82 | 83 | 84 | -------------------------------------------- 85 | CONFIGURATION FILE FORMAT 86 | -------------------------------------------- 87 | 88 | The configuration file is used to control what types of files foremost 89 | searches for. A sample configuration file, foremost.conf, is included with 90 | this distribution. For each file type, the configuration file describes 91 | the file's extension, whether the header and footer are case sensitive, 92 | the maximum file size, and the header and footer for the file. The footer 93 | field is optional, but header, size, case sensitivity, and extension are 94 | not! 95 | 96 | Any line that begins with a '#' is considered a comment and ignored. Thus, 97 | to skip a file type just put a '#' at the beginning of that line 98 | 99 | Headers and footers are decoded before use. To specify a value in 100 | hexadecimal use \x[0-f][0-f], and for octal use \[1-9][1-9][1-9]. Spaces 101 | can be represented by \s. Example: "\x4F\123\I\sCCI" decodes to "OSI CCI". 102 | 103 | To match any single character (aka a wildcard) use a '?'. If you need to 104 | search for the '?' character, you will need to change the 'wildcard' line 105 | *and* every occurrence of the old wildcard character in the configuration 106 | file. Don't forget those hex and octal values! '?' is equal to 0x3f and 107 | \063. 108 | 109 | Here's a sample set of headers and footers: 110 | 111 | # extension case-sens max-size header footer (option) 112 | # 113 | # GIF and JPG files (very common) 114 | gif y 155000 \x47\x49\x46\x38\x37\x61 \x00\x3b 115 | gif y 155000 \x47\x49\x46\x38\x39\x61 \x00\x00\x3b 116 | jpg y 200000 \xff\xd8\xff \xff\xd9 117 | 118 | Note: the option is a method of specifying additional options. Current the following options exist: 119 | 120 | FORWARD: Specify to search from the header to the footer (optional) up to the max-size. 121 | REVERSE: Specify to search from the footer to the header up to the max-size. 122 | NEXT: Specify to search from the header to the data just past the footer. This allows you to specify data that you know is 'NOT' in the data you are looking for and should terminated the search, up to the max-size. 123 | 124 | -------------------------------------------- 125 | BUG REPORTING 126 | -------------------------------------------- 127 | 128 | Please report ALL bugs to nick dot mikus AT gmail d0t com. Please include a 129 | description of the bug, how you found it, and your contact information. 130 | 131 | 132 | 133 | 134 | -------------------------------------------- 135 | CREDITS AND THANKS 136 | -------------------------------------------- 137 | 138 | Foremost was written by Special Agent Kris Kendall and Special Agent Jesse 139 | Kornblum of the United States Air Force Office of Special Investigations 140 | starting in March 2001. This program would not be what it is today without 141 | help from (in no particular order): Rob Meekins, Dan Kalil, and Chet 142 | Maciag. This project was inspired by CarvThis, written by the Defense 143 | Computer Forensic Lab in 1999. 144 | 145 | 146 | -------------------------------------------- 147 | LEGAL NOTICE 148 | -------------------------------------------- 149 | 150 | dd, Safeback, and Encase are copyrighted works and any questions regarding 151 | these tools should be directed to the copyright holders. The United States 152 | Government does not endorse the use of these or any other imaging tools. 153 | -------------------------------------------------------------------------------- /api.c: -------------------------------------------------------------------------------- 1 | /* 2 | Modified API from http://chicago.sourceforge.net/devel/docs/ole/ 3 | Basically the same API, added error checking and the ability 4 | to check buffers for docs, not just files. 5 | */ 6 | #include "main.h" 7 | #include "ole.h" 8 | 9 | /*Some ugly globals 10 | * This API should be re-written 11 | * in a modular fashion*/ 12 | unsigned char buffer[OUR_BLK_SIZE]; 13 | char *extract_name; 14 | int extract = 0; 15 | int dir_count = 0; 16 | int *FAT; 17 | int verbose = TRUE; 18 | int FATblk; 19 | int currFATblk; 20 | int highblk = 0; 21 | int block_list[OUR_BLK_SIZE / sizeof(int)]; 22 | 23 | /*Inititialize those globals used by extract_ole*/ 24 | void init_ole() 25 | { 26 | int i = 0; 27 | extract = 0; 28 | dir_count = 0; 29 | FAT = NULL; 30 | highblk = 0; 31 | FATblk = 0; 32 | currFATblk = -1; 33 | dirlist = NULL; 34 | dl = NULL; 35 | for (i = 0; i < OUR_BLK_SIZE / sizeof(int); i++) 36 | { 37 | block_list[i] = 0; 38 | } 39 | 40 | for (i = 0; i < OUR_BLK_SIZE; i++) 41 | { 42 | buffer[i] = 0; 43 | } 44 | } 45 | 46 | void *Malloc(size_t bytes) 47 | { 48 | void *x; 49 | 50 | x = malloc(bytes); 51 | if (x) 52 | return x; 53 | die("Can't malloc %d bytes.\n", (char *)bytes); 54 | return 0; 55 | } 56 | 57 | void die(char *fmt, void *arg) 58 | { 59 | fprintf(stderr, fmt, arg); 60 | exit(1); 61 | } 62 | 63 | int get_dir_block(unsigned char *fd, int blknum, int buffersize) 64 | { 65 | int i; 66 | struct OLE_DIR *dir; 67 | unsigned char *dest = NULL; 68 | 69 | dest = get_ole_block(fd, blknum, buffersize); 70 | if (dest == NULL) 71 | { 72 | return FALSE; 73 | } 74 | 75 | for (i = 0; i < DIRS_PER_BLK; i++) 76 | { 77 | dir = (struct OLE_DIR *) &dest[sizeof(struct OLE_DIR) * i]; 78 | if (dir->type == NO_ENTRY) 79 | break; 80 | } 81 | 82 | if (i == DIRS_PER_BLK) 83 | { 84 | return TRUE; 85 | } 86 | else 87 | { 88 | return SHORT_BLOCK; 89 | } 90 | } 91 | 92 | int get_dir_info(unsigned char *src) 93 | { 94 | int i, j; 95 | char *p, *q; 96 | struct OLE_DIR *dir; 97 | int punctCount = 0; 98 | short name_size = 0; 99 | 100 | for (i = 0; i < DIRS_PER_BLK; i++) 101 | { 102 | dir = (struct OLE_DIR *) &src[sizeof(struct OLE_DIR) * i]; 103 | punctCount = 0; 104 | 105 | //if(dir->reserved!=0) return FALSE; 106 | if (dir->type < 0) //Should we check if values are > 5 ????? 107 | { 108 | #ifdef DEBUG 109 | printf("\n Invalid directory type\n"); 110 | printf("type:=%c size:=%lu \n", dir->type, dir->size); 111 | #endif 112 | return FALSE; 113 | } 114 | 115 | if (dir->type == NO_ENTRY) 116 | break; 117 | 118 | #ifdef DEBUG 119 | 120 | //dump_dirent (i); 121 | #endif 122 | dl = &dirlist[dir_count++]; 123 | if (dl == NULL) 124 | { 125 | #ifdef DEBUG 126 | printf("dl==NULL!!! bailing out\n"); 127 | #endif 128 | return FALSE; 129 | } 130 | 131 | if (dir_count > 500) 132 | return FALSE; /*SANITY CHECKING*/ 133 | q = dl->name; 134 | p = dir->name; 135 | 136 | name_size = htos((unsigned char *) &dir->namsiz, FOREMOST_LITTLE_ENDIAN); 137 | 138 | #ifdef DEBUG 139 | printf(" dir->namsiz:=%d\n", name_size); 140 | #endif 141 | if (name_size > 64 || name_size <= 0) 142 | return FALSE; 143 | 144 | if (*p < ' ') 145 | p += 2; /* skip leading short */ 146 | for (j = 0; j < name_size; j++, p++) 147 | { 148 | 149 | if (p == NULL || q == NULL) 150 | return FALSE; 151 | if (*p && isprint(*p)) 152 | { 153 | 154 | if (ispunct(*p)) 155 | punctCount++; 156 | *q++ = *p; 157 | 158 | } 159 | } 160 | 161 | if (punctCount > 3) 162 | { 163 | #ifdef DEBUG 164 | printf("dl->name:=%s\n", dl->name); 165 | printf("pcount > 3!!! bailing out\n"); 166 | #endif 167 | return FALSE; 168 | } 169 | 170 | if (dl->name == NULL) 171 | { 172 | #ifdef DEBUG 173 | printf(" ***NULL dir name. bailing out \n"); 174 | #endif 175 | return FALSE; 176 | } 177 | 178 | /*Ignore Catalogs*/ 179 | if (strstr(dl->name, "Catalog")) 180 | return FALSE; 181 | *q = 0; 182 | dl->type = dir->type; 183 | dl->size = htoi((unsigned char *) &dir->size, FOREMOST_LITTLE_ENDIAN); 184 | 185 | dl->start_block = htoi((unsigned char *) &dir->start_block, FOREMOST_LITTLE_ENDIAN); 186 | dl->next = htoi((unsigned char *) &dir->next_dirent, FOREMOST_LITTLE_ENDIAN); 187 | dl->prev = htoi((unsigned char *) &dir->prev_dirent, FOREMOST_LITTLE_ENDIAN); 188 | dl->dir = htoi((unsigned char *) &dir->dir_dirent, FOREMOST_LITTLE_ENDIAN); 189 | if (dir->type != STREAM) 190 | { 191 | dl->s1 = dir->secs1; 192 | dl->s2 = dir->secs2; 193 | dl->d1 = dir->days1; 194 | dl->d2 = dir->days2; 195 | } 196 | } 197 | 198 | return TRUE; 199 | } 200 | 201 | static int *lnlv; /* last next link visited ! */ 202 | int reorder_dirlist(struct DIRECTORY *dir, int level) 203 | { 204 | 205 | //printf(" Reordering the dirlist\n"); 206 | dir->level = level; 207 | if (dir->dir != -1 || dir->dir > dir_count) 208 | { 209 | return 0; 210 | } 211 | else if (!reorder_dirlist(&dirlist[dir->dir], level + 1)) 212 | return 0; 213 | 214 | /* reorder next-link subtree, saving the most next link visited */ 215 | if (dir->next != -1) 216 | { 217 | if (dir->next > dir_count) 218 | return 0; 219 | else if (!reorder_dirlist(&dirlist[dir->next], level)) 220 | return 0; 221 | } 222 | else 223 | lnlv = &dir->next; 224 | 225 | /* move the prev child to the next link and reorder it, if any exist 226 | */ 227 | if (dir->prev != -1) 228 | { 229 | if (dir->prev > dir_count) 230 | return 0; 231 | else 232 | { 233 | *lnlv = dir->prev; 234 | dir->prev = -1; 235 | if (!reorder_dirlist(&dirlist[*lnlv], level)) 236 | return 0; 237 | } 238 | } 239 | 240 | return 1; 241 | } 242 | 243 | int get_block(unsigned char *fd, int blknum, unsigned char *dest, long long int buffersize) 244 | { 245 | unsigned char *temp = fd; 246 | int i = 0; 247 | unsigned long long jump = (unsigned long long)OUR_BLK_SIZE * (unsigned long long)(blknum + 1); 248 | if (blknum < -1 || jump < 0 || blknum > buffersize || buffersize < jump) 249 | { 250 | #ifdef DEBUG 251 | printf(" Bad blk read1 blknum:=%d jump:=%lld buffersize=%lld\n", blknum, jump, buffersize); 252 | #endif 253 | return FALSE; 254 | } 255 | 256 | temp = fd + jump; 257 | #ifdef DEBUG 258 | printf(" Jumping to %lld blknum=%d buffersize=%lld\n", jump, blknum, buffersize); 259 | #endif 260 | for (i = 0; i < OUR_BLK_SIZE; i++) 261 | { 262 | dest[i] = temp[i]; 263 | } 264 | 265 | if ((blknum + 1) > highblk) 266 | highblk = blknum + 1; 267 | return TRUE; 268 | } 269 | 270 | unsigned char *get_ole_block(unsigned char *fd, int blknum, unsigned long long buffersize) 271 | { 272 | unsigned long long jump = (unsigned long long)OUR_BLK_SIZE * (unsigned long long)(blknum + 1); 273 | if (blknum < -1 || jump < 0 || blknum > buffersize || buffersize < jump) 274 | { 275 | #ifdef DEBUG 276 | printf(" Bad blk read1 blknum:=%d jump:=%lld buffersize=%lld\n", blknum, jump, buffersize); 277 | #endif 278 | return FALSE; 279 | } 280 | 281 | #ifdef DEBUG 282 | printf(" Jumping to %lld blknum=%d buffersize=%lld\n", jump, blknum, buffersize); 283 | #endif 284 | return (fd + jump); 285 | } 286 | 287 | int get_FAT_block(unsigned char *fd, int blknum, int *dest, int buffersize) 288 | { 289 | static int FATblk; 290 | 291 | // static int currFATblk = -1; 292 | FATblk = htoi((unsigned char *) &FAT[blknum / (OUR_BLK_SIZE / sizeof(int))], 293 | FOREMOST_LITTLE_ENDIAN); 294 | #ifdef DEBUG 295 | printf("****blknum:=%d FATblk:=%d currFATblk:=%d\n", blknum, FATblk, currFATblk); 296 | #endif 297 | if (currFATblk != FATblk) 298 | { 299 | #ifdef DEBUG 300 | printf("*****blknum:=%d FATblk:=%d\n", blknum, FATblk); 301 | #endif 302 | if (!get_block(fd, FATblk, (unsigned char *)dest, buffersize)) 303 | { 304 | return FALSE; 305 | } 306 | 307 | currFATblk = FATblk; 308 | } 309 | 310 | return TRUE; 311 | } 312 | 313 | void dump_header(struct OLE_HDR *h) 314 | { 315 | int i, *x; 316 | 317 | //struct OLE_HDR *h = (struct OLE_HDR *) buffer; 318 | // fprintf (stderr, "clsid = "); 319 | //printx(h->clsid,0,16); 320 | fprintf(stderr, 321 | "\nuMinorVersion = %u\t", 322 | htos((unsigned char *) &h->uMinorVersion, FOREMOST_LITTLE_ENDIAN)); 323 | fprintf(stderr, 324 | "uDllVersion = %u\t", 325 | htos((unsigned char *) &h->uDllVersion, FOREMOST_LITTLE_ENDIAN)); 326 | fprintf(stderr, 327 | "uByteOrder = %u\n", 328 | htos((unsigned char *) &h->uByteOrder, FOREMOST_LITTLE_ENDIAN)); 329 | fprintf(stderr, 330 | "uSectorShift = %u\t", 331 | htos((unsigned char *) &h->uSectorShift, FOREMOST_LITTLE_ENDIAN)); 332 | fprintf(stderr, 333 | "uMiniSectorShift = %u\t", 334 | htos((unsigned char *) &h->uMiniSectorShift, FOREMOST_LITTLE_ENDIAN)); 335 | fprintf(stderr, 336 | "reserved = %u\n", 337 | htos((unsigned char *) &h->reserved, FOREMOST_LITTLE_ENDIAN)); 338 | fprintf(stderr, 339 | "reserved1 = %u\t", 340 | htoi((unsigned char *) &h->reserved1, FOREMOST_LITTLE_ENDIAN)); 341 | fprintf(stderr, 342 | "reserved2 = %u\t", 343 | htoi((unsigned char *) &h->reserved2, FOREMOST_LITTLE_ENDIAN)); 344 | fprintf(stderr, 345 | "csectMiniFat = %u\t", 346 | htoi((unsigned char *) &h->csectMiniFat, FOREMOST_LITTLE_ENDIAN)); 347 | fprintf(stderr, 348 | "miniSectorCutoff = %u\n", 349 | htoi((unsigned char *) &h->miniSectorCutoff, FOREMOST_LITTLE_ENDIAN)); 350 | fprintf(stderr, 351 | "root_start_block = %u\n", 352 | htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN)); 353 | fprintf(stderr, 354 | "dir flag = %u\n", 355 | htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN)); 356 | fprintf(stderr, 357 | "# FAT blocks = %u\n", 358 | htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN)); 359 | fprintf(stderr, 360 | "FAT_next_block = %u\n", 361 | htoi((unsigned char *) &h->FAT_next_block, FOREMOST_LITTLE_ENDIAN)); 362 | fprintf(stderr, 363 | "# extra FAT blocks = %u\n", 364 | htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN)); 365 | x = (int *) &h[1]; 366 | fprintf(stderr, "bbd list:"); 367 | for (i = 0; i < 109; i++, x++) 368 | { 369 | if ((i % 10) == 0) 370 | fprintf(stderr, "\n"); 371 | if (*x == '\xff') 372 | break; 373 | fprintf(stderr, "%x ", *x); 374 | } 375 | 376 | fprintf(stderr, "\n **************End of header***********\n"); 377 | } 378 | 379 | struct OLE_HDR *reverseBlock(struct OLE_HDR *dest, struct OLE_HDR *h) 380 | { 381 | int i, *x, *y; 382 | dest->uMinorVersion = htos((unsigned char *) &h->uMinorVersion, FOREMOST_LITTLE_ENDIAN); 383 | dest->uDllVersion = htos((unsigned char *) &h->uDllVersion, FOREMOST_LITTLE_ENDIAN); 384 | dest->uByteOrder = htos((unsigned char *) &h->uByteOrder, FOREMOST_LITTLE_ENDIAN); /*28*/ 385 | dest->uSectorShift = htos((unsigned char *) &h->uSectorShift, FOREMOST_LITTLE_ENDIAN); 386 | dest->uMiniSectorShift = htos((unsigned char *) &h->uMiniSectorShift, FOREMOST_LITTLE_ENDIAN); /*32*/ 387 | dest->reserved = htos((unsigned char *) &h->reserved, FOREMOST_LITTLE_ENDIAN); /*34*/ 388 | dest->reserved1 = htoi((unsigned char *) &h->reserved1, FOREMOST_LITTLE_ENDIAN); /*36*/ 389 | dest->reserved2 = htoi((unsigned char *) &h->reserved2, FOREMOST_LITTLE_ENDIAN); /*40*/ 390 | dest->num_FAT_blocks = htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN); /*44*/ 391 | dest->root_start_block = htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN); /*48*/ 392 | dest->dfsignature = htoi((unsigned char *) &h->dfsignature, FOREMOST_LITTLE_ENDIAN); /*52*/ 393 | dest->miniSectorCutoff = htoi((unsigned char *) &h->miniSectorCutoff, FOREMOST_LITTLE_ENDIAN); /*56*/ 394 | dest->dir_flag = htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN); /*60 first sec in the mini fat chain*/ 395 | dest->csectMiniFat = htoi((unsigned char *) &h->csectMiniFat, FOREMOST_LITTLE_ENDIAN); /*64 number of sectors in the minifat */ 396 | dest->FAT_next_block = htoi((unsigned char *) &h->FAT_next_block, FOREMOST_LITTLE_ENDIAN); /*68*/ 397 | dest->num_extra_FAT_blocks = htoi((unsigned char *) &h->num_extra_FAT_blocks, 398 | FOREMOST_LITTLE_ENDIAN); 399 | 400 | x = (int *) &h[1]; 401 | y = (int *) &dest[1]; 402 | for (i = 0; i < 109; i++, x++) 403 | { 404 | *y = htoi((unsigned char *)x, FOREMOST_LITTLE_ENDIAN); 405 | y++; 406 | } 407 | 408 | return dest; 409 | } 410 | 411 | void dump_ole_header(struct OLE_HDR *h) 412 | { 413 | int i, *x; 414 | 415 | //fprintf (stderr, "clsid = "); 416 | //printx(h->clsid,0,16); 417 | fprintf(stderr, 418 | "\nuMinorVersion = %u\t", 419 | htos((unsigned char *) &h->uMinorVersion, FOREMOST_LITTLE_ENDIAN)); 420 | fprintf(stderr, 421 | "uDllVersion = %u\t", 422 | htos((unsigned char *) &h->uDllVersion, FOREMOST_LITTLE_ENDIAN)); 423 | fprintf(stderr, 424 | "uByteOrder = %u\n", 425 | htos((unsigned char *) &h->uByteOrder, FOREMOST_LITTLE_ENDIAN)); 426 | fprintf(stderr, 427 | "uSectorShift = %u\t", 428 | htos((unsigned char *) &h->uSectorShift, FOREMOST_LITTLE_ENDIAN)); 429 | fprintf(stderr, 430 | "uMiniSectorShift = %u\t", 431 | htos((unsigned char *) &h->uMiniSectorShift, FOREMOST_LITTLE_ENDIAN)); 432 | fprintf(stderr, 433 | "reserved = %u\n", 434 | htos((unsigned char *) &h->reserved, FOREMOST_LITTLE_ENDIAN)); 435 | fprintf(stderr, 436 | "reserved1 = %u\t", 437 | htoi((unsigned char *) &h->reserved1, FOREMOST_LITTLE_ENDIAN)); 438 | fprintf(stderr, 439 | "reserved2 = %u\t", 440 | htoi((unsigned char *) &h->reserved2, FOREMOST_LITTLE_ENDIAN)); 441 | fprintf(stderr, 442 | "csectMiniFat = %u\t", 443 | htoi((unsigned char *) &h->csectMiniFat, FOREMOST_LITTLE_ENDIAN)); 444 | fprintf(stderr, 445 | "miniSectorCutoff = %u\n", 446 | htoi((unsigned char *) &h->miniSectorCutoff, FOREMOST_LITTLE_ENDIAN)); 447 | fprintf(stderr, 448 | "root_start_block = %u\n", 449 | htoi((unsigned char *) &h->root_start_block, FOREMOST_LITTLE_ENDIAN)); 450 | fprintf(stderr, 451 | "dir flag = %u\n", 452 | htoi((unsigned char *) &h->dir_flag, FOREMOST_LITTLE_ENDIAN)); 453 | fprintf(stderr, 454 | "# FAT blocks = %u\n", 455 | htoi((unsigned char *) &h->num_FAT_blocks, FOREMOST_LITTLE_ENDIAN)); 456 | fprintf(stderr, 457 | "FAT_next_block = %u\n", 458 | htoi((unsigned char *) &h->FAT_next_block, FOREMOST_LITTLE_ENDIAN)); 459 | fprintf(stderr, 460 | "# extra FAT blocks = %u\n", 461 | htoi((unsigned char *) &h->num_extra_FAT_blocks, FOREMOST_LITTLE_ENDIAN)); 462 | x = (int *) &h[1]; 463 | fprintf(stderr, "bbd list:"); 464 | for (i = 0; i < 109; i++, x++) 465 | { 466 | if ((i % 10) == 0) 467 | fprintf(stderr, "\n"); 468 | if (*x == '\xff') 469 | break; 470 | fprintf(stderr, "%x ", htoi((unsigned char *)x, FOREMOST_LITTLE_ENDIAN)); 471 | } 472 | 473 | fprintf(stderr, "\n **************End of header***********\n"); 474 | } 475 | 476 | int dump_dirent(int which_one) 477 | { 478 | int i; 479 | char *p; 480 | short unknown; 481 | struct OLE_DIR *dir; 482 | 483 | dir = (struct OLE_DIR *) &buffer[which_one * sizeof(struct OLE_DIR)]; 484 | if (dir->type == NO_ENTRY) 485 | return TRUE; 486 | fprintf(stderr, "DIRENT_%d :\t", dir_count); 487 | fprintf(stderr, 488 | "%s\t", 489 | (dir->type == ROOT) ? "root directory" : (dir->type == STORAGE) ? "directory" : "file"); 490 | 491 | /* get UNICODE name */ 492 | p = dir->name; 493 | if (*p < ' ') 494 | { 495 | unknown = *((short *)p); 496 | 497 | //fprintf (stderr, "%04x\t", unknown); 498 | p += 2; /* step over unknown short */ 499 | } 500 | 501 | for (i = 0; i < dir->namsiz; i++, p++) 502 | { 503 | if (*p && (*p > 0x1f)) 504 | { 505 | if (isprint(*p)) 506 | { 507 | fprintf(stderr, "%c", *p); 508 | } 509 | else 510 | { 511 | printf("*** Invalid char %x ***\n", *p); 512 | return FALSE; 513 | } 514 | } 515 | } 516 | 517 | fprintf(stderr, "\n"); 518 | 519 | //fprintf (stderr, "prev_dirent = %lu\t", dir->prev_dirent); 520 | //fprintf (stderr, "next_dirent = %lu\t", dir->next_dirent); 521 | //fprintf (stderr, "dir_dirent = %lu\n", dir->dir_dirent); 522 | //fprintf (stderr, "name = %s\t", dir->name); 523 | fprintf(stderr, "namsiz = %u\t", dir->namsiz); 524 | fprintf(stderr, "type = %d\t", dir->type); 525 | fprintf(stderr, "reserved = %u\n", dir->reserved); 526 | 527 | fprintf(stderr, "start block = %lu\n", dir->start_block); 528 | fprintf(stderr, "size = %lu\n", dir->size); 529 | fprintf(stderr, "\n **************End of dirent***********\n"); 530 | return TRUE; 531 | } 532 | -------------------------------------------------------------------------------- /binary/foremost-linux: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jin-stuff/foremost/f6813bcc29fa2aef8f04dafd0843e698a13cc9d0/binary/foremost-linux -------------------------------------------------------------------------------- /binary/foremost-mac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jin-stuff/foremost/f6813bcc29fa2aef8f04dafd0843e698a13cc9d0/binary/foremost-mac -------------------------------------------------------------------------------- /binary/foremost.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Foremost configuration file 3 | #------------------------------------------------------------------------- 4 | # Note the foremost configuration file is provided to support formats which 5 | # don't have built-in extraction functions. If the format is built-in to foremost 6 | # simply run foremost with -t and provide the format you wish to extract. 7 | # 8 | # The configuration file is used to control what types of files foremost 9 | # searches for. A sample configuration file, foremost.conf, is included with 10 | # this distribution. For each file type, the configuration file describes 11 | # the file's extension, whether the header and footer are case sensitive, 12 | # the maximum file size, and the header and footer for the file. The footer 13 | # field is optional, but header, size, case sensitivity, and extension are 14 | # not! 15 | # 16 | # Any line that begins with a '#' is considered a comment and ignored. Thus, 17 | # to skip a file type just put a '#' at the beginning of that line 18 | # 19 | 20 | # Headers and footers are decoded before use. To specify a value in 21 | # hexadecimal use \x[0-f][0-f], and for octal use \[0-3][0-7][0-7]. Spaces 22 | # can be represented by \s. Example: "\x4F\123\I\sCCI" decodes to "OSI CCI". 23 | # 24 | # To match any single character (aka a wildcard) use a '?'. If you need to 25 | # search for the '?' character, you will need to change the 'wildcard' line 26 | # *and* every occurrence of the old wildcard character in the configuration 27 | # file. Don't forget those hex and octal values! '?' is equal to 0x3f and 28 | # \063. 29 | # 30 | # If you would like to extract files without an extension enter the value 31 | # "NONE" in the extension column (note: you can change the value of this 32 | # "no suffix" flag by setting the variable FOREMOST_NOEXTENSION_SUFFIX 33 | # in foremost.h and recompiling). 34 | # 35 | # The ASCII option will extract all ASCII printable characters before and after 36 | # the keyword provided. 37 | # 38 | # The NEXT keyword after a footer instructs foremost to search forwards for data 39 | # that starts with the header provided and terminates or is followed by data in 40 | # the footer -- the footer data is not included in the output. The data in the 41 | # footer, when used with the NEXT keyword effectively allows you to search for 42 | # data that you know for sure should not be in the output file. This method for 43 | # example, lets you search for two 'starting' headers in a document that doesn't 44 | # have a good ending footer and you can't say exactly what the footer is, but 45 | # you know if you see another header, that should end the search and an output 46 | # file should be written. 47 | 48 | # To redefine the wildcard character, change the setting below and all 49 | # occurances in the formost.conf file. 50 | # 51 | #wildcard ? 52 | # 53 | # case size header footer 54 | #extension sensitive 55 | # 56 | #--------------------------------------------------------------------- 57 | # EXAMPLE WITH NO SUFFIX 58 | #--------------------------------------------------------------------- 59 | # 60 | # Here is an example of how to use the no extension option. Any files 61 | # containing the string "FOREMOST" would be extracted to a file without 62 | # an extension (eg: 00000000,00000001) 63 | # NONE y 1000 FOREMOST 64 | # 65 | #--------------------------------------------------------------------- 66 | # GRAPHICS FILES 67 | #--------------------------------------------------------------------- 68 | # 69 | # 70 | # AOL ART files 71 | # art y 150000 \x4a\x47\x04\x0e \xcf\xc7\xcb 72 | # art y 150000 \x4a\x47\x03\x0e \xd0\xcb\x00\x00 73 | # 74 | # GIF and JPG files (very common) 75 | # (NOTE THESE FORMATS HAVE BUILTIN EXTRACTION FUNCTION) 76 | # gif y 155000000 \x47\x49\x46\x38\x37\x61 \x00\x3b 77 | # gif y 155000000 \x47\x49\x46\x38\x39\x61 \x00\x00\x3b 78 | # jpg y 20000000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9 79 | # jpg y 20000000 \xff\xd8\xff\xe1 \xff\xd9 80 | # jpg y 20000000 \xff\xd8 \xff\xd9 81 | # 82 | # PNG (used in web pages) 83 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 84 | # png y 200000 \x50\x4e\x47? \xff\xfc\xfd\xfe 85 | # 86 | # 87 | # BMP 88 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 89 | # bmp y 100000 BM??\x00\x00\x00 90 | # 91 | # TIF 92 | # tif y 200000000 \x49\x49\x2a\x00 93 | # 94 | #--------------------------------------------------------------------- 95 | # ANIMATION FILES 96 | #--------------------------------------------------------------------- 97 | # 98 | # AVI (Windows animation and DiVX/MPEG-4 movies) 99 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 100 | # avi y 4000000 RIFF????AVI 101 | # 102 | # Apple Quicktime 103 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 104 | # mov y 4000000 ????????\x6d\x6f\x6f\x76 105 | # mov y 4000000 ????????\x6d\x64\x61\x74 106 | # 107 | # MPEG Video 108 | # mpg y 4000000 mpg eof 109 | # mpg y 20000000 \x00\x00\x01\xba \x00\x00\x01\xb9 110 | # mpg y 20000000 \x00\x00\x01\xb3 \x00\x00\x01\xb7 111 | # 112 | # Macromedia Flash 113 | # fws y 4000000 FWS 114 | # 115 | #--------------------------------------------------------------------- 116 | # MICROSOFT OFFICE 117 | #--------------------------------------------------------------------- 118 | # 119 | # Word documents 120 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 121 | # doc y 12500000 \xd0\xcf\x11\xe0\xa1\xb1 122 | # 123 | # Outlook files 124 | # pst y 400000000 \x21\x42\x4e\xa5\x6f\xb5\xa6 125 | # ost y 400000000 \x21\x42\x44\x4e 126 | # 127 | # Outlook Express 128 | # dbx y 4000000 \xcf\xad\x12\xfe\xc5\xfd\x74\x6f 129 | # idx y 4000000 \x4a\x4d\x46\x39 130 | # mbx y 4000000 \x4a\x4d\x46\x36 131 | # 132 | #--------------------------------------------------------------------- 133 | # WORDPERFECT 134 | #--------------------------------------------------------------------- 135 | # 136 | # wpc y 100000 ?WPC 137 | # 138 | #--------------------------------------------------------------------- 139 | # HTML (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 140 | #--------------------------------------------------------------------- 141 | # 142 | # htm n 50000 143 | # 144 | #--------------------------------------------------------------------- 145 | # ADOBE PDF (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 146 | #--------------------------------------------------------------------- 147 | # 148 | # pdf y 5000000 %PDF- %EOF 149 | # 150 | # 151 | #--------------------------------------------------------------------- 152 | # AOL (AMERICA ONLINE) 153 | #--------------------------------------------------------------------- 154 | # 155 | # AOL Mailbox 156 | # mail y 500000 \x41\x4f\x4c\x56\x4d 157 | # 158 | # 159 | # 160 | #--------------------------------------------------------------------- 161 | # PGP (PRETTY GOOD PRIVACY) 162 | #--------------------------------------------------------------------- 163 | # 164 | # PGP Disk Files 165 | # pgd y 500000 \x50\x47\x50\x64\x4d\x41\x49\x4e\x60\x01 166 | # 167 | # Public Key Ring 168 | # pgp y 100000 \x99\x00 169 | # Security Ring 170 | # pgp y 100000 \x95\x01 171 | # pgp y 100000 \x95\x00 172 | # Encrypted Data or ASCII armored keys 173 | # pgp y 100000 \xa6\x00 174 | # (there should be a trailer for this...) 175 | # txt y 100000 -----BEGIN\040PGP 176 | # 177 | # 178 | #--------------------------------------------------------------------- 179 | # RPM (Linux package format) 180 | #--------------------------------------------------------------------- 181 | # rpm y 1000000 \xed\xab 182 | # 183 | # 184 | #--------------------------------------------------------------------- 185 | # SOUND FILES 186 | #--------------------------------------------------------------------- 187 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 188 | # wav y 200000 RIFF????WAVE 189 | # 190 | # Real Audio Files 191 | # ra y 1000000 \x2e\x72\x61\xfd 192 | # ra y 1000000 .RMF 193 | # 194 | # asf y 8000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C 195 | # 196 | # wmv y 20000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C 197 | # 198 | # wma y 8000000 \x30\x26\xB2\x75 \x00\x00\x00\xFF 199 | # 200 | # wma y 8000000 \x30\x26\xB2\x75 \x52\x9A\x12\x46 201 | # 202 | # mp3 y 8000000 \xFF\xFB??\x44\x00\x00 203 | # mp3 y 8000000 \x57\x41\x56\45 \x00\x00\xFF\ 204 | # mp3 y 8000000 \xFF\xFB\xD0\ \xD1\x35\x51\xCC\ 205 | # mp3 y 8000000 \x49\x44\x33\ 206 | # mp3 y 8000000 \x4C\x41\x4D\x45\ 207 | #--------------------------------------------------------------------- 208 | # WINDOWS REGISTRY FILES 209 | #--------------------------------------------------------------------- 210 | # 211 | # Windows NT registry 212 | # dat y 4000000 regf 213 | # Windows 95 registry 214 | # dat y 4000000 CREG 215 | # 216 | # lnk y 5000 \x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00 217 | # chm y 100000 \x49\x54\x53\x46\x03\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00 218 | # cookie n 4096 id= 219 | # rdp y 4096 \xFF\xFE\x73\x00\x63\x00\x72\x00\x65\x00\x65\x00\x6E\x00\x20\x00\x6D 220 | # 221 | #--------------------------------------------------------------------- 222 | # MISCELLANEOUS 223 | #--------------------------------------------------------------------- 224 | # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) 225 | # zip y 10000000 PK\x03\x04 \x3c\xac 226 | # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) 227 | # rar y 10000000 Rar! 228 | # 229 | # java y 1000000 \xca\xfe\xba\xbe 230 | # 231 | # cpp y 20000 #include #include ASCII 232 | #--------------------------------------------------------------------- 233 | # ScanSoft PaperPort "Max" files 234 | #--------------------------------------------------------------------- 235 | # max y 1000000 \x56\x69\x47\x46\x6b\x1a\x00\x00\x00\x00 \x00\x00\x05\x80\x00\x00 236 | #--------------------------------------------------------------------- 237 | # PINs Password Manager program 238 | #--------------------------------------------------------------------- 239 | # pins y 8000 \x50\x49\x4e\x53\x20\x34\x2e\x32\x30\x0d 240 | -------------------------------------------------------------------------------- /binary/foremost.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jin-stuff/foremost/f6813bcc29fa2aef8f04dafd0843e698a13cc9d0/binary/foremost.exe -------------------------------------------------------------------------------- /cli.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "main.h" 4 | 5 | void fatal_error (f_state * s, char *msg) 6 | { 7 | fprintf(stderr, "%s: %s%s", __progname, msg, NEWLINE); 8 | if (get_audit_file_open(s)) 9 | { 10 | audit_msg(s, msg); 11 | close_audit_file(s); 12 | } 13 | exit(EXIT_FAILURE); 14 | } 15 | 16 | void print_error(f_state *s, char *fn, char *msg) 17 | { 18 | if (!(get_mode(s, mode_quiet))) 19 | fprintf(stderr, "%s: %s: %s%s", __progname, fn, msg, NEWLINE); 20 | } 21 | 22 | void print_message(f_state *s, char *format, va_list argp) 23 | { 24 | vfprintf(stdout, format, argp); 25 | fprintf(stdout, "%s", NEWLINE); 26 | } 27 | -------------------------------------------------------------------------------- /config.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "main.h" 4 | 5 | int translate (char *str) 6 | { 7 | char next; 8 | char *rd = str, *wr = str, *bad; 9 | char temp[1 + 3 + 1]; 10 | char ch; 11 | 12 | if (!*rd) //If it's a null string just return 13 | { 14 | return 0; 15 | } 16 | 17 | while (*rd) 18 | { 19 | 20 | /* Is it an escaped character ? */ 21 | if (*rd == '\\') 22 | { 23 | rd++; 24 | switch (*rd) 25 | { 26 | case '\\': 27 | *rd++; 28 | *wr++ = '\\'; 29 | break; 30 | 31 | case 'a': 32 | *rd++; 33 | *wr++ = '\a'; 34 | break; 35 | 36 | case 's': 37 | *rd++; 38 | *wr++ = ' '; 39 | break; 40 | 41 | case 'n': 42 | *rd++; 43 | *wr++ = '\n'; 44 | break; 45 | 46 | case 'r': 47 | *rd++; 48 | *wr++ = '\r'; 49 | break; 50 | 51 | case 't': 52 | *rd++; 53 | *wr++ = '\t'; 54 | break; 55 | 56 | case 'v': 57 | *rd++; 58 | *wr++ = '\v'; 59 | break; 60 | 61 | /* Hexadecimal/Octal values are treated in one place using strtoul() */ 62 | case 'x': 63 | case '0': 64 | case '1': 65 | case '2': 66 | case '3': 67 | next = *(rd + 1); 68 | if (next < 48 || (57 < next && next < 65) || (70 < next && next < 97) || next > 102) 69 | break; //break if not a digit or a-f, A-F 70 | next = *(rd + 2); 71 | if (next < 48 || (57 < next && next < 65) || (70 < next && next < 97) || next > 102) 72 | break; //break if not a digit or a-f, A-F 73 | temp[0] = '0'; 74 | bad = temp; 75 | strncpy(temp + 1, rd, 3); 76 | temp[4] = '\0'; 77 | ch = strtoul(temp, &bad, 0); 78 | if (*bad == '\0') 79 | { 80 | *wr++ = ch; 81 | rd += 3; 82 | } /* else INVALID CHARACTER IN INPUT ('\\' followed by *rd) */ 83 | break; 84 | 85 | default: /* INVALID CHARACTER IN INPUT (*rd)*/ 86 | *wr++ = '\\'; 87 | break; 88 | } 89 | } 90 | 91 | /* Unescaped characters go directly to the output */ 92 | else 93 | *wr++ = *rd++; 94 | } 95 | *wr = '\0'; //Null terminate the string that we just created... 96 | return wr - str; 97 | } 98 | 99 | char *skipWhiteSpace(char *str) 100 | { 101 | while (isspace(str[0])) 102 | str++; 103 | return str; 104 | } 105 | 106 | int extractSearchSpecData(f_state *state, char **tokenarray) 107 | { 108 | 109 | /* Process a normal line with 3-4 tokens on it 110 | token[0] = suffix 111 | token[1] = case sensitive 112 | token[2] = size to snarf 113 | token[3] = begintag 114 | token[4] = endtag (optional) 115 | token[5] = search for footer from back of buffer flag and other options (whew!) 116 | */ 117 | 118 | /* Allocate the memory for these lines.... */ 119 | s_spec *s = &search_spec[state->num_builtin]; 120 | 121 | s->suffix = malloc(MAX_SUFFIX_LENGTH * sizeof(char)); 122 | s->header = malloc(MAX_STRING_LENGTH * sizeof(char)); 123 | s->footer = malloc(MAX_STRING_LENGTH * sizeof(char)); 124 | s->type = CONF; 125 | if (!strncasecmp(tokenarray[0], FOREMOST_NOEXTENSION_SUFFIX, strlen(FOREMOST_NOEXTENSION_SUFFIX) 126 | )) 127 | { 128 | s->suffix[0] = ' '; 129 | s->suffix[1] = 0; 130 | } 131 | else 132 | { 133 | 134 | /* Assign the current line to the SearchSpec object */ 135 | memcpy(s->suffix, tokenarray[0], MAX_SUFFIX_LENGTH); 136 | } 137 | 138 | /* Check for case sensitivity */ 139 | s->case_sen = (!strncasecmp(tokenarray[1], "y", 1) || !strncasecmp(tokenarray[1], "yes", 3)); 140 | 141 | s->max_len = atoi(tokenarray[2]); 142 | 143 | /* Determine which search type we want to use for this needle */ 144 | s->searchtype = SEARCHTYPE_FORWARD; 145 | if (!strncasecmp(tokenarray[5], "REVERSE", strlen("REVERSE"))) 146 | { 147 | 148 | s->searchtype = SEARCHTYPE_REVERSE; 149 | } 150 | else if (!strncasecmp(tokenarray[5], "NEXT", strlen("NEXT"))) 151 | { 152 | s->searchtype = SEARCHTYPE_FORWARD_NEXT; 153 | } 154 | 155 | // this is the default, but just if someone wants to provide this value just to be sure 156 | else if (!strncasecmp(tokenarray[5], "FORWARD", strlen("FORWARD"))) 157 | { 158 | s->searchtype = SEARCHTYPE_FORWARD; 159 | } 160 | else if (!strncasecmp(tokenarray[5], "ASCII", strlen("ASCII"))) 161 | { 162 | //fprintf(stderr,"Setting ASCII TYPE\n"); 163 | s->searchtype = SEARCHTYPE_ASCII; 164 | } 165 | 166 | /* Done determining searchtype */ 167 | 168 | /* We copy the tokens and translate them from the file format. 169 | The translate() function does the translation and returns 170 | the length of the argument being translated */ 171 | s->header_len = translate(tokenarray[3]); 172 | memcpy(s->header, tokenarray[3], s->header_len); 173 | s->footer_len = translate(tokenarray[4]); 174 | memcpy(s->footer, tokenarray[4], s->footer_len); 175 | 176 | init_bm_table(s->header, s->header_bm_table, s->header_len, s->case_sen, s->searchtype); 177 | init_bm_table(s->footer, s->footer_bm_table, s->footer_len, s->case_sen, s->searchtype); 178 | 179 | return TRUE; 180 | } 181 | 182 | int process_line(f_state *s, char *buffer, int line_number) 183 | { 184 | 185 | char *buf = buffer; 186 | char *token; 187 | char **tokenarray = (char **)malloc(6 * sizeof(char[MAX_STRING_LENGTH])); 188 | int i = 0, len = strlen(buffer); 189 | 190 | /* Any line that ends with a CTRL-M (0x0d) has been processed 191 | by a DOS editor. We will chop the CTRL-M to ignore it */ 192 | if (buffer[len - 2] == 0x0d && buffer[len - 1] == 0x0a) 193 | { 194 | buffer[len - 2] = buffer[len - 1]; 195 | buffer[len - 1] = buffer[len]; 196 | } 197 | 198 | buf = (char *)skipWhiteSpace(buf); 199 | token = strtok(buf, " \t\n"); 200 | 201 | /* Any line that starts with a '#' is a comment and can be skipped */ 202 | if (token == NULL || token[0] == '#') 203 | { 204 | return TRUE; 205 | } 206 | 207 | /* Check for the wildcard */ 208 | if (!strncasecmp(token, "wildcard", 9)) 209 | { 210 | if ((token = strtok(NULL, " \t\n")) != NULL) 211 | { 212 | translate(token); 213 | } 214 | else 215 | { 216 | return TRUE; 217 | } 218 | 219 | if (strlen(token) > 1) 220 | { 221 | fprintf(stderr, 222 | "Warning: Wildcard can only be one character," 223 | " but you specified %zu characters.\n" 224 | " Using the first character, \"%c\", as the wildcard.\n", 225 | strlen(token), 226 | token[0]); 227 | } 228 | 229 | wildcard = token[0]; 230 | return TRUE; 231 | } 232 | 233 | while (token && (i < NUM_SEARCH_SPEC_ELEMENTS)) 234 | { 235 | tokenarray[i] = token; 236 | i++; 237 | token = strtok(NULL, " \t\n"); 238 | } 239 | 240 | switch (NUM_SEARCH_SPEC_ELEMENTS - i) 241 | { 242 | case 2: 243 | tokenarray[NUM_SEARCH_SPEC_ELEMENTS - 1] = ""; 244 | tokenarray[NUM_SEARCH_SPEC_ELEMENTS - 2] = ""; 245 | break; 246 | 247 | case 1: 248 | tokenarray[NUM_SEARCH_SPEC_ELEMENTS - 1] = ""; 249 | break; 250 | 251 | case 0: 252 | break; 253 | 254 | default: 255 | fprintf(stderr, "\nERROR: In line %d of the configuration file.\n", line_number); 256 | return FALSE; 257 | return TRUE; 258 | 259 | } 260 | 261 | if (!extractSearchSpecData(s, tokenarray)) 262 | { 263 | fprintf(stderr, 264 | "\nERROR: Unknown error on line %d of the configuration file.\n", 265 | line_number); 266 | } 267 | 268 | s->num_builtin++; 269 | 270 | return TRUE; 271 | } 272 | 273 | int load_config_file(f_state *s) 274 | { 275 | FILE *f; 276 | char *buffer = (char *)malloc(MAX_STRING_LENGTH * sizeof(char)); 277 | off_t line_number = 0; 278 | 279 | #ifdef __DEBUG 280 | printf("About to open config file %s%s", get_config_file(s), NEWLINE); 281 | #endif 282 | 283 | if ((f = fopen(get_config_file(s), "r")) == NULL) 284 | { 285 | 286 | /*Can't find a conf in the current directory 287 | * So lets try the /usr/local/etc*/ 288 | #ifdef __WIN32 289 | set_config_file(s, "/Program Files/foremost/foremost.conf"); 290 | #else 291 | set_config_file(s, "/usr/local/etc/foremost.conf"); 292 | #endif 293 | if ((f = fopen(get_config_file(s), "r")) == NULL) 294 | { 295 | print_error(s, get_config_file(s), strerror(errno)); 296 | free(buffer); 297 | return TRUE; 298 | } 299 | 300 | } 301 | 302 | while (fgets(buffer, MAX_STRING_LENGTH, f)) 303 | { 304 | ++line_number; 305 | if (!process_line(s, buffer, line_number)) 306 | { 307 | free(buffer); 308 | fclose(f); 309 | return TRUE; 310 | 311 | } 312 | } 313 | 314 | fclose(f); 315 | free(buffer); 316 | return FALSE; 317 | } 318 | -------------------------------------------------------------------------------- /dir.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "main.h" 4 | 5 | int is_empty_directory (DIR * temp) 6 | { 7 | 8 | /* Empty directories contain two entries for . and .. 9 | A directory with three entries, therefore, is not empty */ 10 | if (readdir(temp) && readdir(temp) && readdir(temp)) 11 | return FALSE; 12 | 13 | return TRUE; 14 | } 15 | 16 | /*Try to cleanup the ouput directory if nothing to a sub-dir*/ 17 | void cleanup_output(f_state *s) 18 | { 19 | char dir_name[MAX_STRING_LENGTH]; 20 | 21 | DIR *temp; 22 | DIR *outputDir; 23 | struct dirent *entry; 24 | 25 | if ((outputDir = opendir(get_output_directory(s))) == NULL) 26 | { 27 | 28 | /*Error?*/ 29 | } 30 | 31 | while ((entry = readdir(outputDir))) 32 | { 33 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 34 | strcpy(dir_name, get_output_directory(s)); 35 | strcat(dir_name, "/"); 36 | strcat(dir_name, entry->d_name); 37 | temp = opendir(dir_name); 38 | if (temp != NULL) 39 | { 40 | if (is_empty_directory(temp)) 41 | { 42 | rmdir(dir_name); 43 | } 44 | } 45 | 46 | } 47 | 48 | } 49 | 50 | int make_new_directory(f_state *s, char *fn) 51 | { 52 | 53 | #ifdef __WIN32 54 | 55 | #ifndef __CYGWIN 56 | if (mkdir(fn)) 57 | #endif 58 | 59 | #else 60 | mode_t new_mode = 61 | ( 62 | S_IRUSR | 63 | S_IWUSR | 64 | S_IXUSR | 65 | S_IRGRP | 66 | S_IWGRP | 67 | S_IXGRP | 68 | S_IROTH | 69 | S_IWOTH 70 | ); 71 | if (mkdir(fn, new_mode)) 72 | #endif 73 | { 74 | if (errno != EEXIST) 75 | { 76 | print_error(s, fn, strerror(errno)); 77 | return TRUE; 78 | } 79 | } 80 | 81 | return FALSE; 82 | } 83 | 84 | /*Clean the timestamped dir name to make it a little more file system friendly*/ 85 | char *clean_time_string(char *time) 86 | { 87 | int len = strlen(time); 88 | int i = 0; 89 | 90 | for (i = 0; i < len; i++) 91 | { 92 | #ifdef __WIN32 93 | if (time[i] == ':' && time[i + 1] != '\\') 94 | { 95 | time[i] = '_'; 96 | } 97 | 98 | #else 99 | if (time[i] == ' ' || time[i] == ':') 100 | { 101 | time[i] = '_'; 102 | } 103 | #endif 104 | } 105 | 106 | return time; 107 | } 108 | 109 | int create_output_directory(f_state *s) 110 | { 111 | DIR *d; 112 | char dir_name[MAX_STRING_LENGTH]; 113 | 114 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 115 | if (s->time_stamp) 116 | { 117 | strcpy(dir_name, get_output_directory(s)); 118 | strcat(dir_name, "_"); 119 | strcat(dir_name, get_start_time(s)); 120 | clean_time_string(dir_name); 121 | set_output_directory(s, dir_name); 122 | } 123 | #ifdef DEBUG 124 | printf("Checking output directory %s\n", get_output_directory(s)); 125 | #endif 126 | 127 | if ((d = opendir(get_output_directory(s))) != NULL) 128 | { 129 | 130 | /* The directory exists already. It MUST be empty for us to continue */ 131 | if (!is_empty_directory(d)) 132 | { 133 | printf("ERROR: %s is not empty\n \tPlease specify another directory or run with -T.\n", 134 | get_output_directory(s)); 135 | 136 | exit(EXIT_FAILURE); 137 | } 138 | 139 | /* The directory exists and is empty. We're done! */ 140 | closedir(d); 141 | return FALSE; 142 | } 143 | 144 | /* The error value ENOENT means that either the directory doesn't exist, 145 | which is fine, or that the filename is zero-length, which is bad. 146 | All other errors are, of course, bad. 147 | */ 148 | if (errno != ENOENT) 149 | { 150 | print_error(s, get_output_directory(s), strerror(errno)); 151 | return TRUE; 152 | } 153 | 154 | if (strlen(get_output_directory(s)) == 0) 155 | { 156 | 157 | /* Careful! Calling print_error will try to display a filename 158 | that is zero characters! In theory this should never happen 159 | as our call to realpath should avoid this. But we'll play it safe. */ 160 | print_error(s, "(output_directory)", "Output directory name unknown"); 161 | return TRUE; 162 | } 163 | 164 | return (make_new_directory(s, get_output_directory(s))); 165 | } 166 | 167 | /*Create file type sub dirs, can get tricky when multiple types use one 168 | extraction algorithm (OLE)*/ 169 | int create_sub_dirs(f_state *s) 170 | { 171 | int i = 0; 172 | int j = 0; 173 | char dir_name[MAX_STRING_LENGTH]; 174 | char ole_types[7][4] = { "ppt", "doc", "xls", "sdw", "mbd", "vis", "ole" }; 175 | char riff_types[2][4] = { "avi", "wav" }; 176 | char zip_types[8][5] = { "sxc", "sxw", "sxi", "sx", "jar","docx","pptx","xlsx" }; 177 | 178 | for (i = 0; i < s->num_builtin; i++) 179 | { 180 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 181 | strcpy(dir_name, get_output_directory(s)); 182 | strcat(dir_name, "/"); 183 | strcat(dir_name, search_spec[i].suffix); 184 | make_new_directory(s, dir_name); 185 | 186 | if (search_spec[i].type == OLE) 187 | { 188 | for (j = 0; j < 7; j++) 189 | { 190 | if (strstr(ole_types[j], search_spec[i].suffix)) 191 | continue; 192 | 193 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 194 | strcpy(dir_name, get_output_directory(s)); 195 | strcat(dir_name, "/"); 196 | strcat(dir_name, ole_types[j]); 197 | make_new_directory(s, dir_name); 198 | } 199 | } 200 | else if (get_mode(s, mode_write_all)) 201 | { 202 | for (j = 0; j < 7; j++) 203 | { 204 | if (strstr(search_spec[i].suffix, ole_types[j])) 205 | { 206 | for (j = 0; j < 7; j++) 207 | { 208 | if (strstr(ole_types[j], search_spec[i].suffix)) 209 | continue; 210 | 211 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 212 | strcpy(dir_name, get_output_directory(s)); 213 | strcat(dir_name, "/"); 214 | strcat(dir_name, ole_types[j]); 215 | make_new_directory(s, dir_name); 216 | } 217 | break; 218 | } 219 | 220 | } 221 | } 222 | 223 | if (search_spec[i].type == EXE) 224 | { 225 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 226 | strcpy(dir_name, get_output_directory(s)); 227 | strcat(dir_name, "/"); 228 | strcat(dir_name, "dll"); 229 | make_new_directory(s, dir_name); 230 | } 231 | 232 | if (search_spec[i].type == RIFF) 233 | { 234 | for (j = 0; j < 2; j++) 235 | { 236 | if (strstr(ole_types[j], search_spec[i].suffix)) 237 | continue; 238 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 239 | strcpy(dir_name, get_output_directory(s)); 240 | strcat(dir_name, "/"); 241 | strcat(dir_name, riff_types[j]); 242 | make_new_directory(s, dir_name); 243 | } 244 | } 245 | else if (get_mode(s, mode_write_all)) 246 | { 247 | for (j = 0; j < 2; j++) 248 | { 249 | if (strstr(search_spec[i].suffix, riff_types[j])) 250 | { 251 | for (j = 0; j < 2; j++) 252 | { 253 | if (strstr(ole_types[j], search_spec[i].suffix)) 254 | continue; 255 | 256 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 257 | strcpy(dir_name, get_output_directory(s)); 258 | strcat(dir_name, "/"); 259 | strcat(dir_name, riff_types[j]); 260 | make_new_directory(s, dir_name); 261 | } 262 | break; 263 | } 264 | 265 | } 266 | } 267 | 268 | if (search_spec[i].type == ZIP) 269 | { 270 | for (j = 0; j < 8; j++) 271 | { 272 | if (strstr(ole_types[j], search_spec[i].suffix)) 273 | continue; 274 | 275 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 276 | strcpy(dir_name, get_output_directory(s)); 277 | strcat(dir_name, "/"); 278 | strcat(dir_name, zip_types[j]); 279 | make_new_directory(s, dir_name); 280 | } 281 | } 282 | else if (get_mode(s, mode_write_all)) 283 | { 284 | for (j = 0; j < 8; j++) 285 | { 286 | if (strstr(search_spec[i].suffix, zip_types[j])) 287 | { 288 | for (j = 0; j < 5; j++) 289 | { 290 | if (strstr(ole_types[j], search_spec[i].suffix)) 291 | continue; 292 | 293 | memset(dir_name, 0, MAX_STRING_LENGTH - 1); 294 | strcpy(dir_name, get_output_directory(s)); 295 | strcat(dir_name, "/"); 296 | strcat(dir_name, zip_types[j]); 297 | make_new_directory(s, dir_name); 298 | } 299 | break; 300 | } 301 | } 302 | } 303 | 304 | } 305 | 306 | return TRUE; 307 | } 308 | 309 | /*We have found a file so write to disk*/ 310 | int write_to_disk(f_state *s, s_spec *needle, u_int64_t len, unsigned char *buf, u_int64_t t_offset) 311 | { 312 | 313 | char fn[MAX_STRING_LENGTH]; 314 | FILE *f; 315 | FILE *test; 316 | long byteswritten = 0; 317 | char temp[32]; 318 | u_int64_t block = ((t_offset) / s->block_size); 319 | int i = 1; 320 | 321 | //Name files based on their block offset 322 | needle->written = TRUE; 323 | 324 | if (get_mode(s, mode_write_audit)) 325 | { 326 | if (needle->comment == NULL) 327 | strcpy(needle->comment, " "); 328 | 329 | audit_msg(s, 330 | "%d:\t%10ld.%s \t %10s \t %10llu \t %s", 331 | s->fileswritten, 332 | block, 333 | needle->suffix, 334 | human_readable(len, temp), 335 | t_offset, 336 | needle->comment); 337 | s->fileswritten++; 338 | needle->found++; 339 | return TRUE; 340 | } 341 | 342 | snprintf(fn, 343 | MAX_STRING_LENGTH, 344 | "%s/%s/%0*llu.%s", 345 | s->output_directory, 346 | needle->suffix, 347 | 8, 348 | block, 349 | needle->suffix); 350 | 351 | test = fopen(fn, "r"); 352 | while (test) /*Test the files to make sure we have unique file names, some headers could be within the same block*/ 353 | { 354 | memset(fn, 0, MAX_STRING_LENGTH - 1); 355 | snprintf(fn, 356 | MAX_STRING_LENGTH - 1, 357 | "%s/%s/%0*llu_%d.%s", 358 | s->output_directory, 359 | needle->suffix, 360 | 8, 361 | block, 362 | i, 363 | needle->suffix); 364 | i++; 365 | fclose(test); 366 | test = fopen(fn, "r"); 367 | } 368 | 369 | if (!(f = fopen(fn, "wb"))) /* added b so it can extract correctly for windows */ 370 | { 371 | printf("fn = %s failed\n", fn); 372 | fatal_error(s, "Can't open file for writing \n"); 373 | } 374 | 375 | if ((byteswritten = fwrite(buf, sizeof(char), len, f)) != len) 376 | { 377 | fprintf(stderr, "fn=%s bytes=%lu\n", fn, byteswritten); 378 | fatal_error(s, "Error writing file\n"); 379 | } 380 | 381 | if (fclose(f)) 382 | { 383 | fatal_error(s, "Error closing file\n"); 384 | } 385 | 386 | if (needle->comment == NULL) 387 | strcpy(needle->comment, " "); 388 | 389 | if (i == 1) { 390 | audit_msg(s,"%d:\t%08llu.%s \t %10s \t %10llu \t %s", 391 | s->fileswritten, 392 | block, 393 | needle->suffix, 394 | human_readable(len, temp), 395 | t_offset, 396 | needle->comment); 397 | } else { 398 | audit_msg(s,"%d:\t%08llu_%d.%s \t %10s \t %10llu \t %s", 399 | s->fileswritten, 400 | block, 401 | i - 1, 402 | needle->suffix, 403 | human_readable(len, temp), 404 | t_offset, 405 | needle->comment); 406 | } 407 | 408 | /* 409 | audit_msg(s,"%d:\t%10llu.%s \t %10s \t %10llu \t %s", 410 | s->fileswritten, 411 | block, 412 | needle->suffix, 413 | human_readable(len, temp), 414 | t_offset, 415 | needle->comment); 416 | 417 | */ 418 | s->fileswritten++; 419 | needle->found++; 420 | return TRUE; 421 | } 422 | -------------------------------------------------------------------------------- /engine.c: -------------------------------------------------------------------------------- 1 | 2 | /* FOREMOST 3 | * 4 | * By Jesse Kornblum, Kris Kendall, & Nick Mikus 5 | * 6 | * This is a work of the US Government. In accordance with 17 USC 105, 7 | * copyright protection is not available for any work of the US Government. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | * 13 | */ 14 | 15 | #include "main.h" 16 | 17 | int user_interrupt (f_state * s, f_info * i) 18 | { 19 | audit_msg(s, "Interrupt received at %s", current_time()); 20 | 21 | /* RBF - Write user_interrupt */ 22 | fclose(i->handle); 23 | free(s); 24 | free(i); 25 | cleanup_output(s); 26 | exit(-1); 27 | return FALSE; 28 | } 29 | 30 | unsigned char *read_from_disk(u_int64_t offset, f_info *i, u_int64_t length) 31 | { 32 | 33 | u_int64_t bytesread = 0; 34 | unsigned char *newbuf = (unsigned char *)malloc(length * sizeof(char)); 35 | if (!newbuf) { 36 | fprintf(stderr, "Ran out of memory in read_from_disk()\n"); 37 | exit(1); 38 | } 39 | 40 | fseeko(i->handle, offset, SEEK_SET); 41 | bytesread = fread(newbuf, 1, length, i->handle); 42 | if (bytesread != length) 43 | { 44 | free(newbuf); 45 | return NULL; 46 | } 47 | else 48 | { 49 | return newbuf; 50 | } 51 | } 52 | 53 | /* 54 | Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches) 55 | and allows the starting position in the buffer to be manually set, which allows data to be skipped 56 | */ 57 | unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len, unsigned char *haystack, 58 | size_t haystack_len, size_t table[UCHAR_MAX + 1], int casesensitive, 59 | int searchtype, int start_pos) 60 | { 61 | register size_t shift = 0; 62 | register size_t pos = start_pos; 63 | unsigned char *here; 64 | 65 | if (needle_len == 0) 66 | return haystack; 67 | 68 | if (searchtype == SEARCHTYPE_FORWARD || searchtype == SEARCHTYPE_FORWARD_NEXT) 69 | { 70 | while (pos < haystack_len) 71 | { 72 | while (pos < haystack_len && (shift = table[(unsigned char)haystack[pos]]) > 0) 73 | { 74 | pos += shift; 75 | } 76 | 77 | if (0 == shift) 78 | { 79 | here = (unsigned char *) &haystack[pos - needle_len + 1]; 80 | if (0 == memwildcardcmp(needle, here, needle_len, casesensitive)) 81 | { 82 | return (here); 83 | } 84 | else 85 | pos++; 86 | } 87 | } 88 | 89 | return NULL; 90 | } 91 | else if (searchtype == SEARCHTYPE_REVERSE) //Run our search backwards 92 | { 93 | while (pos < haystack_len) 94 | { 95 | while 96 | ( 97 | pos < haystack_len && 98 | (shift = table[(unsigned char)haystack[haystack_len - pos - 1]]) > 0 99 | ) 100 | { 101 | pos += shift; 102 | } 103 | 104 | if (0 == shift) 105 | { 106 | if (0 == memwildcardcmp(needle, here = (unsigned char *) &haystack[haystack_len - pos - 1], 107 | needle_len, casesensitive)) 108 | { 109 | return (here); 110 | } 111 | else 112 | pos++; 113 | } 114 | } 115 | 116 | return NULL; 117 | } 118 | 119 | return NULL; 120 | } 121 | 122 | /* 123 | Perform a modified boyer-moore string search (w/ support for wildcards and case-insensitive searches) 124 | and allows the starting position in the buffer to be manually set, which allows data to be skipped 125 | */ 126 | unsigned char *bm_search(unsigned char *needle, size_t needle_len, unsigned char *haystack, 127 | size_t haystack_len, size_t table[UCHAR_MAX + 1], int case_sen, 128 | int searchtype) 129 | { 130 | 131 | //printf("The needle2 is:\t"); 132 | //printx(needle,0,needle_len); 133 | return bm_search_skipn(needle, 134 | needle_len, 135 | haystack, 136 | haystack_len, 137 | table, 138 | case_sen, 139 | searchtype, 140 | needle_len - 1); 141 | 142 | } 143 | 144 | void setup_stream(f_state *s, f_info *i) 145 | { 146 | char buffer[MAX_STRING_LENGTH]; 147 | u_int64_t skip = (((u_int64_t) s->skip) * ((u_int64_t) s->block_size)); 148 | #ifdef DEBUG 149 | printf("s->skip=%d s->block_size=%d total=%llu\n", 150 | s->skip, 151 | s->block_size, 152 | (((u_int64_t) s->skip) * ((u_int64_t) s->block_size))); 153 | #endif 154 | i->bytes_read = 0; 155 | i->total_megs = i->total_bytes / ONE_MEGABYTE; 156 | 157 | if (i->total_bytes != 0) 158 | { 159 | audit_msg(s, 160 | "Length: %s (%llu bytes)", 161 | human_readable(i->total_bytes, buffer), 162 | i->total_bytes); 163 | } 164 | else 165 | audit_msg(s, "Length: Unknown"); 166 | 167 | if (s->skip != 0) 168 | { 169 | audit_msg(s, "Skipping: %s (%llu bytes)", human_readable(skip, buffer), skip); 170 | fseeko(i->handle, skip, SEEK_SET); 171 | if (i->total_bytes != 0) 172 | i->total_bytes -= skip; 173 | } 174 | 175 | audit_msg(s, " "); 176 | 177 | #ifdef __WIN32 178 | i->last_read = 0; 179 | i->overflow_count = 0; 180 | #endif 181 | 182 | } 183 | 184 | void audit_layout(f_state *s) 185 | { 186 | audit_msg(s, 187 | "Num\t %s (bs=%d)\t %10s\t %s\t %s \n", 188 | "Name", 189 | s->block_size, 190 | "Size", 191 | "File Offset", 192 | "Comment"); 193 | 194 | } 195 | 196 | void dumpInd(unsigned char *ind, int bs) 197 | { 198 | int i = 0; 199 | printf("\n/*******************************/\n"); 200 | 201 | while (bs > 0) 202 | { 203 | if (i % 10 == 0) 204 | printf("\n"); 205 | 206 | //printx(ind,0,10); 207 | printf("%4u ", htoi(ind, FOREMOST_LITTLE_ENDIAN)); 208 | 209 | bs -= 4; 210 | ind += 4; 211 | i++; 212 | } 213 | 214 | printf("\n/*******************************/\n"); 215 | } 216 | 217 | /******************************************************************************** 218 | *Function: ind_block 219 | *Description: check if the block foundat is pointing to looks like an indirect 220 | * block 221 | *Return: TRUE/FALSE 222 | **********************************************************************************/ 223 | int ind_block(unsigned char *foundat, u_int64_t buflen, int bs) 224 | { 225 | 226 | unsigned char *temp = foundat; 227 | int jump = 12 * bs; 228 | unsigned int block = 0; 229 | unsigned int block2 = 0; 230 | unsigned int dif = 0; 231 | int i = 0; 232 | unsigned int one = 1; 233 | unsigned int numbers = (bs / 4) - 1; 234 | 235 | //int reconstruct=FALSE; 236 | 237 | /*Make sure we don't jump past the end of the buffer*/ 238 | if (buflen < jump + 16) 239 | return FALSE; 240 | 241 | while (i < numbers) 242 | { 243 | block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN); 244 | 245 | if (block < 0) 246 | return FALSE; 247 | 248 | if (block == 0) 249 | { 250 | break; 251 | } 252 | 253 | i++; 254 | block2 = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN); 255 | if (block2 < 0) 256 | return FALSE; 257 | 258 | if (block2 == 0) 259 | { 260 | break; 261 | } 262 | 263 | dif = block2 - block; 264 | 265 | if (dif == one) 266 | { 267 | 268 | #ifdef DEBUG 269 | printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif); 270 | #endif 271 | } 272 | else 273 | { 274 | 275 | #ifdef DEBUG 276 | printf("Failure, dif!=1\n"); 277 | printf("\tblock1:=%u, block2:=%u dif=%u\n", block, block2, dif); 278 | #endif 279 | 280 | return FALSE; 281 | } 282 | 283 | #ifdef DEBUG 284 | printf("block1:=%u, block2:=%u dif=%u\n", block, block2, dif); 285 | #endif 286 | } 287 | 288 | if (i == 0) 289 | return FALSE; 290 | 291 | /*Check if the rest of the bytes are zero'd out */ 292 | for (i = i + 1; i < numbers; i++) 293 | { 294 | block = htoi(&temp[jump + (i * 4)], FOREMOST_LITTLE_ENDIAN); 295 | if (block != 0) 296 | { 297 | 298 | //printf("Failure, 0 test\n"); 299 | return FALSE; 300 | } 301 | } 302 | 303 | return TRUE; 304 | } 305 | 306 | /******************************************************************************** 307 | *Function: search_chunk 308 | *Description: Analyze the given chunk by running each defined search spec on it 309 | *Return: TRUE/FALSE 310 | **********************************************************************************/ 311 | int search_chunk(f_state *s, unsigned char *buf, f_info *i, u_int64_t chunk_size, u_int64_t f_offset) 312 | { 313 | 314 | u_int64_t c_offset = 0; 315 | //u_int64_t foundat_off = 0; 316 | //u_int64_t buf_off = 0; 317 | 318 | unsigned char *foundat = buf; 319 | unsigned char *current_pos = NULL; 320 | unsigned char *header_pos = NULL; 321 | unsigned char *newbuf = NULL; 322 | unsigned char *ind_ptr = NULL; 323 | u_int64_t current_buflen = chunk_size; 324 | int tryBS[3] = { 4096, 1024, 512 }; 325 | unsigned char *extractbuf = NULL; 326 | u_int64_t file_size = 0; 327 | s_spec *needle = NULL; 328 | int j = 0; 329 | int bs = 0; 330 | int rem = 0; 331 | int x = 0; 332 | int found_ind = FALSE; 333 | off_t saveme; 334 | //char comment[32]; 335 | for (j = 0; j < s->num_builtin; j++) 336 | { 337 | needle = &search_spec[j]; 338 | foundat = buf; /*reset the buffer for the next search spec*/ 339 | #ifdef DEBUG 340 | printf(" SEARCHING FOR %s's\n", needle->suffix); 341 | #endif 342 | bs = 0; 343 | current_buflen = chunk_size; 344 | while (foundat) 345 | { 346 | needle->written = FALSE; 347 | found_ind = FALSE; 348 | memset(needle->comment, 0, COMMENT_LENGTH - 1); 349 | if (chunk_size <= (foundat - buf)) { 350 | #ifdef DEBUG 351 | printf("avoided seg fault in search_chunk()\n"); 352 | #endif 353 | foundat = NULL; 354 | break; 355 | } 356 | current_buflen = chunk_size - (foundat - buf); 357 | 358 | //if((foundat-buf)< 1 ) break; 359 | #ifdef DEBUG 360 | //foundat_off=foundat; 361 | //buf_off=buf; 362 | //printf("current buf:=%llu (foundat-buf)=%llu \n", current_buflen, (u_int64_t) (foundat_off - buf_off)); 363 | #endif 364 | if (signal_caught == SIGTERM || signal_caught == SIGINT) 365 | { 366 | user_interrupt(s, i); 367 | printf("Cleaning up.\n"); 368 | signal_caught = 0; 369 | } 370 | 371 | if (get_mode(s, mode_quick)) /*RUN QUICK SEARCH*/ 372 | { 373 | #ifdef DEBUG 374 | 375 | //printf("quick mode is on\n"); 376 | #endif 377 | 378 | /*Check if we are not on a block head, adjust if so*/ 379 | rem = (foundat - buf) % s->block_size; 380 | if (rem != 0) 381 | { 382 | foundat += (s->block_size - rem); 383 | } 384 | 385 | if (memwildcardcmp(needle->header, foundat, needle->header_len, needle->case_sen 386 | ) != 0) 387 | { 388 | 389 | /*No match, jump to the next block*/ 390 | if (current_buflen > s->block_size) 391 | { 392 | foundat += s->block_size; 393 | continue; 394 | } 395 | else /*We are out of buffer lets go to the next search spec*/ 396 | { 397 | foundat = NULL; 398 | break; 399 | } 400 | } 401 | 402 | header_pos = foundat; 403 | } 404 | else /**********RUN STANDARD SEARCH********************/ 405 | { 406 | foundat = bm_search(needle->header, 407 | needle->header_len, 408 | foundat, 409 | current_buflen, //How much to search through 410 | needle->header_bm_table, 411 | needle->case_sen, //casesensative 412 | SEARCHTYPE_FORWARD); 413 | 414 | header_pos = foundat; 415 | } 416 | 417 | if (foundat != NULL && foundat >= 0) /*We got something, run the appropriate heuristic to find the EOF*/ 418 | { 419 | current_buflen = chunk_size - (foundat - buf); 420 | 421 | if (get_mode(s, mode_ind_blk)) 422 | { 423 | #ifdef DEBUG 424 | printf("ind blk detection on\n"); 425 | #endif 426 | 427 | //dumpInd(foundat+12*1024,1024); 428 | for (x = 0; x < 3; x++) 429 | { 430 | bs = tryBS[x]; 431 | 432 | if (ind_block(foundat, current_buflen, bs)) 433 | { 434 | if (get_mode(s, mode_verbose)) 435 | { 436 | sprintf(needle->comment, " (IND BLK bs:=%d)", bs); 437 | } 438 | 439 | //dumpInd(foundat+12*bs,bs); 440 | #ifdef DEBUG 441 | printf("performing mem move\n"); 442 | #endif 443 | if(current_buflen > 13 * bs)//Make sure we have enough buffer 444 | { 445 | if (!memmove(foundat + 12 * bs, foundat + 13 * bs, current_buflen - 13 * bs)) 446 | break; 447 | 448 | found_ind = TRUE; 449 | #ifdef DEBUG 450 | printf("performing mem move complete\n"); 451 | #endif 452 | ind_ptr = foundat + 12 * bs; 453 | current_buflen -= bs; 454 | chunk_size -= bs; 455 | break; 456 | } 457 | } 458 | 459 | } 460 | 461 | } 462 | 463 | c_offset = (foundat - buf); 464 | current_pos = foundat; 465 | 466 | /*Now lets analyze the file and see if we can determine its size*/ 467 | 468 | // printf("c_offset=%llu %x %x %llx\n", c_offset,foundat,buf,c_offset); 469 | foundat = extract_file(s, c_offset, foundat, current_buflen, needle, f_offset); 470 | #ifdef DEBUG 471 | if (foundat == NULL) 472 | { 473 | printf("Foundat == NULL!!!\n"); 474 | } 475 | #endif 476 | if (get_mode(s, mode_write_all)) 477 | { 478 | if (needle->written == FALSE) 479 | { 480 | 481 | /*write every header we find*/ 482 | if (current_buflen >= needle->max_len) 483 | { 484 | file_size = needle->max_len; 485 | } 486 | else 487 | { 488 | file_size = current_buflen; 489 | } 490 | 491 | sprintf(needle->comment, " (Header dump)"); 492 | extractbuf = (unsigned char *)malloc(file_size * sizeof(char)); 493 | memcpy(extractbuf, header_pos, file_size); 494 | write_to_disk(s, needle, file_size, extractbuf, c_offset + f_offset); 495 | free(extractbuf); 496 | } 497 | } 498 | else if (!foundat) /*Should we search further?*/ 499 | { 500 | 501 | /*We couldn't determine where the file ends, now lets check to see 502 | * if we should try again 503 | */ 504 | if (current_buflen < needle->max_len) /*We need to bridge the gap*/ 505 | { 506 | #ifdef DEBUG 507 | printf(" Bridge the gap\n"); 508 | #endif 509 | saveme = ftello(i->handle); 510 | /*grow the buffer and try to extract again*/ 511 | newbuf = read_from_disk(c_offset + f_offset, i, needle->max_len); 512 | if (newbuf == NULL) 513 | break; 514 | current_pos = extract_file(s, 515 | c_offset, 516 | newbuf, 517 | needle->max_len, 518 | needle, 519 | f_offset); 520 | 521 | /*Lets put the fp back*/ 522 | fseeko(i->handle, saveme, SEEK_SET); 523 | 524 | 525 | free(newbuf); 526 | } 527 | else 528 | { 529 | foundat = header_pos; /*reset the foundat pointer to the location of the last header*/ 530 | foundat += needle->header_len + 1; /*jump past the header*/ 531 | } 532 | } 533 | 534 | 535 | } 536 | 537 | if (found_ind) 538 | { 539 | 540 | /*Put the ind blk back in, re-arrange the buffer so that the future blks names come out correct*/ 541 | #ifdef DEBUG 542 | printf("Replacing the ind block\n"); 543 | #endif 544 | /*This is slow, should we do this??????*/ 545 | if (!memmove(ind_ptr + 1 * bs, ind_ptr, current_buflen - 13 * bs)) 546 | break; 547 | memset(ind_ptr, 0, bs - 1); 548 | chunk_size += bs; 549 | memset(needle->comment, 0, COMMENT_LENGTH - 1); 550 | } 551 | } //end while 552 | } 553 | 554 | return TRUE; 555 | } 556 | 557 | /******************************************************************************** 558 | *Function: search_stream 559 | *Description: Analyze the file by reading 1 chunk (default: 100MB) at a time and 560 | *passing it to search_chunk 561 | *Return: TRUE/FALSE 562 | **********************************************************************************/ 563 | int search_stream(f_state *s, f_info *i) 564 | { 565 | u_int64_t bytesread = 0; 566 | u_int64_t f_offset = 0; 567 | u_int64_t chunk_size = ((u_int64_t) s->chunk_size) * MEGABYTE; 568 | unsigned char *buf = (unsigned char *)malloc(sizeof(char) * chunk_size); 569 | 570 | setup_stream(s, i); 571 | 572 | audit_layout(s); 573 | #ifdef DEBUG 574 | printf("\n\t READING THE FILE INTO MEMORY\n"); 575 | #endif 576 | 577 | while ((bytesread = fread(buf, 1, chunk_size, i->handle)) > 0) 578 | { 579 | if (signal_caught == SIGTERM || signal_caught == SIGINT) 580 | { 581 | user_interrupt(s, i); 582 | printf("Cleaning up.\n"); 583 | signal_caught = 0; 584 | } 585 | 586 | #ifdef DEBUG 587 | printf("\n\tbytes_read:=%llu\n", bytesread); 588 | #endif 589 | search_chunk(s, buf, i, bytesread, f_offset); 590 | f_offset += bytesread; 591 | if (!get_mode(s, mode_quiet)) 592 | { 593 | fprintf(stderr, "*"); 594 | 595 | //displayPosition(s,i,f_offset); 596 | } 597 | 598 | /*FIX ME*** 599 | * We should jump back and make sure we didn't miss any headers that are 600 | * bridged between chunks. What is the best way to do this?\ 601 | */ 602 | } 603 | 604 | if (!get_mode(s, mode_quiet)) 605 | { 606 | fprintf(stderr, "|\n"); 607 | } 608 | 609 | #ifdef DEBUG 610 | printf("\n\tDONE READING bytes_read:=%llu\n", bytesread); 611 | #endif 612 | if (signal_caught == SIGTERM || signal_caught == SIGINT) 613 | { 614 | user_interrupt(s, i); 615 | printf("Cleaning up.\n"); 616 | signal_caught = 0; 617 | } 618 | 619 | free(buf); 620 | return FALSE; 621 | } 622 | 623 | void audit_start(f_state *s, f_info *i) 624 | { 625 | if (!get_mode(s, mode_quiet)) 626 | { 627 | fprintf(stderr, "Processing: %s\n|", i->file_name); 628 | } 629 | 630 | audit_msg(s, FOREMOST_DIVIDER); 631 | audit_msg(s, "File: %s", i->file_name); 632 | audit_msg(s, "Start: %s", current_time()); 633 | } 634 | 635 | void audit_finish(f_state *s, f_info *i) 636 | { 637 | audit_msg(s, "Finish: %s", current_time()); 638 | } 639 | 640 | int process_file(f_state *s) 641 | { 642 | 643 | //printf("processing file\n"); 644 | f_info *i = (f_info *)malloc(sizeof(f_info)); 645 | char temp[PATH_MAX]; 646 | 647 | if ((realpath(s->input_file, temp)) == NULL) 648 | { 649 | print_error(s, s->input_file, strerror(errno)); 650 | return TRUE; 651 | } 652 | 653 | i->file_name = strdup(s->input_file); 654 | i->is_stdin = FALSE; 655 | audit_start(s, i); 656 | 657 | // printf("opening file %s\n",i->file_name); 658 | #if defined(__LINUX) 659 | #ifdef DEBUG 660 | printf("Using 64 bit fopen\n"); 661 | #endif 662 | i->handle = fopen64(i->file_name, "rb"); 663 | #elif defined(__WIN32) 664 | 665 | /*I would like to be able to read from 666 | * physical devices in Windows, have played 667 | * with different options to fopen and the 668 | * dd src says you need write access on WinXP 669 | * but nothing seems to work*/ 670 | i->handle = fopen(i->file_name, "rb"); 671 | #else 672 | i->handle = fopen(i->file_name, "rb"); 673 | #endif 674 | if (i->handle == NULL) 675 | { 676 | print_error(s, s->input_file, strerror(errno)); 677 | audit_msg(s, "Error: %s", strerror(errno)); 678 | return TRUE; 679 | } 680 | 681 | i->total_bytes = find_file_size(i->handle); 682 | search_stream(s, i); 683 | audit_finish(s, i); 684 | 685 | fclose(i->handle); 686 | free(i); 687 | return FALSE; 688 | } 689 | 690 | int process_stdin(f_state *s) 691 | { 692 | f_info *i = (f_info *)malloc(sizeof(f_info)); 693 | 694 | i->file_name = strdup("stdin"); 695 | s->input_file = "stdin"; 696 | i->handle = stdin; 697 | i->is_stdin = TRUE; 698 | 699 | /* We can't compute the size of this stream, we just ignore it*/ 700 | i->total_bytes = 0; 701 | audit_start(s, i); 702 | 703 | search_stream(s, i); 704 | 705 | free(i->file_name); 706 | free(i); 707 | return FALSE; 708 | } 709 | -------------------------------------------------------------------------------- /extract.h: -------------------------------------------------------------------------------- 1 | /* 2 | local file header signature 4 bytes (0x04034b50) 3 | version needed to extract 2 bytes 4 | general purpose bit flag 2 bytes 5 | compression method 2 bytes 6 | last mod file time 2 bytes 7 | last mod file date 2 bytes 8 | crc-32 4 bytes 9 | compressed size 4 bytes 10 | uncompressed size 4 bytes 11 | filename length 2 bytes 12 | extra field length 2 bytes 13 | */ 14 | 15 | /* 16 | central file header signature 4 bytes (0x02014b50) 17 | version made by 2 bytes 18 | version needed to extract 2 bytes 19 | general purpose bit flag 2 bytes 20 | compression method 2 bytes 21 | last mod file time 2 bytes 22 | last mod file date 2 bytes 23 | crc-32 4 bytes 24 | compressed size 4 bytes 25 | uncompressed size 4 bytes 26 | filename length 2 bytes 27 | extra field length 2 bytes 28 | file comment length 2 bytes 29 | disk number start 2 bytes 30 | internal file attributes 2 bytes 31 | external file attributes 4 bytes 32 | relative offset of local header 4 bytes 33 | */ 34 | 35 | /* end of central dir signature 4 bytes (0x06054b50) 36 | number of this disk 2 bytes 37 | number of the disk with the 38 | start of the central directory 2 bytes 39 | total number of entries in 40 | the central dir on this disk 2 bytes 41 | total number of entries in 42 | the central dir 2 bytes 43 | size of the central directory 4 bytes 44 | offset of start of central 45 | directory with respect to 46 | the starting disk number 4 bytes 47 | zipfile comment length 2 bytes 48 | zipfile comment (variable size) 49 | */ 50 | struct zipLocalFileHeader 51 | { 52 | unsigned int signature; //0 53 | unsigned short version; //4 54 | unsigned short genFlag; //6 55 | signed short compression; //8 56 | unsigned short last_mod_time; //10 57 | unsigned short last_mod_date; //12 58 | unsigned int crc; //14 59 | unsigned int compressed; //18 60 | unsigned int uncompressed; //22 61 | unsigned short filename_length; //26 62 | unsigned short extra_length; //28 63 | }; 64 | struct zipCentralFileHeader 65 | { 66 | unsigned int signature; //0 67 | unsigned char version_extract[2]; //4 68 | unsigned char version_madeby[2]; //6 69 | unsigned short genFlag; //8 70 | unsigned short compression; //10 71 | unsigned short last_mod_time; //12 72 | unsigned short last_mod_date; //14 73 | unsigned int crc; //16 74 | unsigned int compressed; //20 75 | unsigned int uncompressed; //24 76 | unsigned short filename_length; //28 77 | unsigned short extra_length; //30 78 | unsigned short filecomment_length; //32 79 | unsigned short disk_number_start; //34 80 | }; 81 | struct zipEndCentralFileHeader 82 | { 83 | unsigned int signature; //0 84 | unsigned short numOfdisk; //4 85 | unsigned short compression; //6 86 | unsigned short start_of_central_dir; //8 87 | unsigned short num_entries_in_central_dir; //10 88 | unsigned int size_of_central_dir; //12 89 | unsigned int offset; //16 90 | unsigned short comment_length; //20 91 | }; 92 | 93 | void print_zip(struct zipLocalFileHeader *fileHeader, struct zipCentralFileHeader *centralHeader) 94 | { 95 | printf("\n Local Header Data\n"); 96 | printf("GenFlag:=%d,compressed:=%d,uncompressed:=%d\n", 97 | fileHeader->genFlag, 98 | fileHeader->compressed, 99 | fileHeader->uncompressed); 100 | printf("Compression:=%d, filename_len:=%d,extralen:=%d\n", 101 | fileHeader->compression, 102 | fileHeader->filename_length, 103 | fileHeader->extra_length); 104 | 105 | printf(" Central Header Data\n"); 106 | printf("GenFlag:=%d,compressed:=%d,uncompressed:=%d\n", 107 | centralHeader->genFlag, 108 | centralHeader->compressed, 109 | centralHeader->uncompressed); 110 | printf("Compression:=%d, Version Madeby:=%x%x\n", 111 | centralHeader->compression, 112 | centralHeader->version_madeby[0], 113 | centralHeader->version_madeby[1]); 114 | } 115 | -------------------------------------------------------------------------------- /foremost.8.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jin-stuff/foremost/f6813bcc29fa2aef8f04dafd0843e698a13cc9d0/foremost.8.gz -------------------------------------------------------------------------------- /foremost.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Foremost configuration file 3 | #------------------------------------------------------------------------- 4 | # Note the foremost configuration file is provided to support formats which 5 | # don't have built-in extraction functions. If the format is built-in to foremost 6 | # simply run foremost with -t and provide the format you wish to extract. 7 | # 8 | # The configuration file is used to control what types of files foremost 9 | # searches for. A sample configuration file, foremost.conf, is included with 10 | # this distribution. For each file type, the configuration file describes 11 | # the file's extension, whether the header and footer are case sensitive, 12 | # the maximum file size, and the header and footer for the file. The footer 13 | # field is optional, but header, size, case sensitivity, and extension are 14 | # not! 15 | # 16 | # Any line that begins with a '#' is considered a comment and ignored. Thus, 17 | # to skip a file type just put a '#' at the beginning of that line 18 | # 19 | 20 | # Headers and footers are decoded before use. To specify a value in 21 | # hexadecimal use \x[0-f][0-f], and for octal use \[0-3][0-7][0-7]. Spaces 22 | # can be represented by \s. Example: "\x4F\123\I\sCCI" decodes to "OSI CCI". 23 | # 24 | # To match any single character (aka a wildcard) use a '?'. If you need to 25 | # search for the '?' character, you will need to change the 'wildcard' line 26 | # *and* every occurrence of the old wildcard character in the configuration 27 | # file. Don't forget those hex and octal values! '?' is equal to 0x3f and 28 | # \063. 29 | # 30 | # If you would like to extract files without an extension enter the value 31 | # "NONE" in the extension column (note: you can change the value of this 32 | # "no suffix" flag by setting the variable FOREMOST_NOEXTENSION_SUFFIX 33 | # in foremost.h and recompiling). 34 | # 35 | # The ASCII option will extract all ASCII printable characters before and after 36 | # the keyword provided. 37 | # 38 | # The NEXT keyword after a footer instructs foremost to search forwards for data 39 | # that starts with the header provided and terminates or is followed by data in 40 | # the footer -- the footer data is not included in the output. The data in the 41 | # footer, when used with the NEXT keyword effectively allows you to search for 42 | # data that you know for sure should not be in the output file. This method for 43 | # example, lets you search for two 'starting' headers in a document that doesn't 44 | # have a good ending footer and you can't say exactly what the footer is, but 45 | # you know if you see another header, that should end the search and an output 46 | # file should be written. 47 | 48 | # To redefine the wildcard character, change the setting below and all 49 | # occurances in the formost.conf file. 50 | # 51 | #wildcard ? 52 | # 53 | # case size header footer 54 | #extension sensitive 55 | # 56 | #--------------------------------------------------------------------- 57 | # EXAMPLE WITH NO SUFFIX 58 | #--------------------------------------------------------------------- 59 | # 60 | # Here is an example of how to use the no extension option. Any files 61 | # containing the string "FOREMOST" would be extracted to a file without 62 | # an extension (eg: 00000000,00000001) 63 | # NONE y 1000 FOREMOST 64 | # 65 | #--------------------------------------------------------------------- 66 | # GRAPHICS FILES 67 | #--------------------------------------------------------------------- 68 | # 69 | # 70 | # AOL ART files 71 | # art y 150000 \x4a\x47\x04\x0e \xcf\xc7\xcb 72 | # art y 150000 \x4a\x47\x03\x0e \xd0\xcb\x00\x00 73 | # 74 | # GIF and JPG files (very common) 75 | # (NOTE THESE FORMATS HAVE BUILTIN EXTRACTION FUNCTION) 76 | # gif y 155000000 \x47\x49\x46\x38\x37\x61 \x00\x3b 77 | # gif y 155000000 \x47\x49\x46\x38\x39\x61 \x00\x00\x3b 78 | # jpg y 20000000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9 79 | # jpg y 20000000 \xff\xd8\xff\xe1 \xff\xd9 80 | # jpg y 20000000 \xff\xd8 \xff\xd9 81 | # 82 | # PNG (used in web pages) 83 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 84 | # png y 200000 \x50\x4e\x47? \xff\xfc\xfd\xfe 85 | # 86 | # 87 | # BMP 88 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 89 | # bmp y 100000 BM??\x00\x00\x00 90 | # 91 | # TIF 92 | # tif y 200000000 \x49\x49\x2a\x00 93 | # 94 | #--------------------------------------------------------------------- 95 | # ANIMATION FILES 96 | #--------------------------------------------------------------------- 97 | # 98 | # AVI (Windows animation and DiVX/MPEG-4 movies) 99 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 100 | # avi y 4000000 RIFF????AVI 101 | # 102 | # Apple Quicktime 103 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 104 | # mov y 4000000 ????????\x6d\x6f\x6f\x76 105 | # mov y 4000000 ????????\x6d\x64\x61\x74 106 | # 107 | # MPEG Video 108 | # mpg y 4000000 mpg eof 109 | # mpg y 20000000 \x00\x00\x01\xba \x00\x00\x01\xb9 110 | # mpg y 20000000 \x00\x00\x01\xb3 \x00\x00\x01\xb7 111 | # 112 | # Macromedia Flash 113 | # fws y 4000000 FWS 114 | # 115 | #--------------------------------------------------------------------- 116 | # MICROSOFT OFFICE 117 | #--------------------------------------------------------------------- 118 | # 119 | # Word documents 120 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 121 | # doc y 12500000 \xd0\xcf\x11\xe0\xa1\xb1 122 | # 123 | # Outlook files 124 | # pst y 400000000 \x21\x42\x4e\xa5\x6f\xb5\xa6 125 | # ost y 400000000 \x21\x42\x44\x4e 126 | # 127 | # Outlook Express 128 | # dbx y 4000000 \xcf\xad\x12\xfe\xc5\xfd\x74\x6f 129 | # idx y 4000000 \x4a\x4d\x46\x39 130 | # mbx y 4000000 \x4a\x4d\x46\x36 131 | # 132 | #--------------------------------------------------------------------- 133 | # WORDPERFECT 134 | #--------------------------------------------------------------------- 135 | # 136 | # wpc y 100000 ?WPC 137 | # 138 | #--------------------------------------------------------------------- 139 | # HTML (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 140 | #--------------------------------------------------------------------- 141 | # 142 | # htm n 50000 143 | # 144 | #--------------------------------------------------------------------- 145 | # ADOBE PDF (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 146 | #--------------------------------------------------------------------- 147 | # 148 | # pdf y 5000000 %PDF- %EOF 149 | # 150 | # 151 | #--------------------------------------------------------------------- 152 | # AOL (AMERICA ONLINE) 153 | #--------------------------------------------------------------------- 154 | # 155 | # AOL Mailbox 156 | # mail y 500000 \x41\x4f\x4c\x56\x4d 157 | # 158 | # 159 | # 160 | #--------------------------------------------------------------------- 161 | # PGP (PRETTY GOOD PRIVACY) 162 | #--------------------------------------------------------------------- 163 | # 164 | # PGP Disk Files 165 | # pgd y 500000 \x50\x47\x50\x64\x4d\x41\x49\x4e\x60\x01 166 | # 167 | # Public Key Ring 168 | # pgp y 100000 \x99\x00 169 | # Security Ring 170 | # pgp y 100000 \x95\x01 171 | # pgp y 100000 \x95\x00 172 | # Encrypted Data or ASCII armored keys 173 | # pgp y 100000 \xa6\x00 174 | # (there should be a trailer for this...) 175 | # txt y 100000 -----BEGIN\040PGP 176 | # 177 | # 178 | #--------------------------------------------------------------------- 179 | # RPM (Linux package format) 180 | #--------------------------------------------------------------------- 181 | # rpm y 1000000 \xed\xab 182 | # 183 | # 184 | #--------------------------------------------------------------------- 185 | # SOUND FILES 186 | #--------------------------------------------------------------------- 187 | # (NOTE THIS FORMAT HAS A BUILTIN EXTRACTION FUNCTION) 188 | # wav y 200000 RIFF????WAVE 189 | # 190 | # Real Audio Files 191 | # ra y 1000000 \x2e\x72\x61\xfd 192 | # ra y 1000000 .RMF 193 | # 194 | # asf y 8000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C 195 | # 196 | # wmv y 20000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C 197 | # 198 | # wma y 8000000 \x30\x26\xB2\x75 \x00\x00\x00\xFF 199 | # 200 | # wma y 8000000 \x30\x26\xB2\x75 \x52\x9A\x12\x46 201 | # 202 | # mp3 y 8000000 \xFF\xFB??\x44\x00\x00 203 | # mp3 y 8000000 \x57\x41\x56\45 \x00\x00\xFF\ 204 | # mp3 y 8000000 \xFF\xFB\xD0\ \xD1\x35\x51\xCC\ 205 | # mp3 y 8000000 \x49\x44\x33\ 206 | # mp3 y 8000000 \x4C\x41\x4D\x45\ 207 | #--------------------------------------------------------------------- 208 | # WINDOWS REGISTRY FILES 209 | #--------------------------------------------------------------------- 210 | # 211 | # Windows NT registry 212 | # dat y 4000000 regf 213 | # Windows 95 registry 214 | # dat y 4000000 CREG 215 | # 216 | # lnk y 5000 \x4C\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xC0\x00\x00 217 | # chm y 100000 \x49\x54\x53\x46\x03\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00 218 | # cookie n 4096 id= 219 | # rdp y 4096 \xFF\xFE\x73\x00\x63\x00\x72\x00\x65\x00\x65\x00\x6E\x00\x20\x00\x6D 220 | # 221 | #--------------------------------------------------------------------- 222 | # MISCELLANEOUS 223 | #--------------------------------------------------------------------- 224 | # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) 225 | # zip y 10000000 PK\x03\x04 \x3c\xac 226 | # (NOTE THIS FORMAT HAS BUILTIN EXTRACTION FUNCTION) 227 | # rar y 10000000 Rar! 228 | # 229 | # java y 1000000 \xca\xfe\xba\xbe 230 | # 231 | # cpp y 20000 #include #include ASCII 232 | #--------------------------------------------------------------------- 233 | # ScanSoft PaperPort "Max" files 234 | #--------------------------------------------------------------------- 235 | # max y 1000000 \x56\x69\x47\x46\x6b\x1a\x00\x00\x00\x00 \x00\x00\x05\x80\x00\x00 236 | #--------------------------------------------------------------------- 237 | # PINs Password Manager program 238 | #--------------------------------------------------------------------- 239 | # pins y 8000 \x50\x49\x4e\x53\x20\x34\x2e\x32\x30\x0d 240 | -------------------------------------------------------------------------------- /helpers.c: -------------------------------------------------------------------------------- 1 | 2 | /* MD5DEEP - helpers.c 3 | * 4 | * By Jesse Kornblum 5 | * 6 | * This is a work of the US Government. In accordance with 17 USC 105, 7 | * copyright protection is not available for any work of the US Government. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | * 13 | */ 14 | 15 | #include "main.h" 16 | 17 | /* Removes any newlines at the end of the string buf. 18 | Works for both *nix and Windows styles of newlines. 19 | Returns the new length of the string. */ 20 | unsigned int chop (char *buf) 21 | { 22 | 23 | /* Windows newlines are 0x0d 0x0a, *nix are 0x0a */ 24 | unsigned int len = strlen(buf); 25 | if (buf[len - 1] == 0x0a) 26 | { 27 | if (buf[len - 2] == 0x0d) 28 | { 29 | buf[len - 2] = buf[len - 1]; 30 | } 31 | buf[len - 1] = buf[len]; 32 | } 33 | return strlen(buf); 34 | } 35 | 36 | char *units(unsigned int c) 37 | { 38 | switch (c) 39 | { 40 | case 0: return "B"; 41 | case 1: return "KB"; 42 | case 2: return "MB"; 43 | case 3: return "GB"; 44 | case 4: return "TB"; 45 | case 5: return "PB"; 46 | case 6: return "EB"; 47 | /* Steinbach's Guideline for Systems Programming: 48 | Never test for an error condition you don't know how to handle. 49 | 50 | Granted, given that no existing system can handle anything 51 | more than 18 exabytes, this shouldn't be an issue. But how do we 52 | communicate that 'this shouldn't happen' to the user? */ 53 | default: return "??"; 54 | } 55 | } 56 | 57 | char *human_readable(off_t size, char *buffer) 58 | { 59 | unsigned int count = 0; 60 | while (size > 1024) 61 | { 62 | size /= 1024; 63 | ++count; 64 | } 65 | 66 | /* The size will be, at most, 1023, and the units will be 67 | two characters no matter what. Thus, the maximum length of 68 | this string is six characters. e.g. strlen("1023 EB") = 6 */ 69 | if (sizeof(off_t) == 4) 70 | { 71 | snprintf(buffer, 8, "%u %s", (unsigned int)size, units(count)); 72 | } 73 | else if (sizeof(off_t) == 8) 74 | { 75 | snprintf(buffer, 8, "%llu %s", (u_int64_t) size, units(count)); 76 | } 77 | 78 | return buffer; 79 | } 80 | 81 | char *current_time(void) 82 | { 83 | time_t now = time(NULL); 84 | char *ascii_time = ctime(&now); 85 | chop(ascii_time); 86 | return ascii_time; 87 | } 88 | 89 | /* Shift the contents of a string so that the values after 'new_start' 90 | will now begin at location 'start' */ 91 | void shift_string(char *fn, int start, int new_start) 92 | { 93 | if (start < 0 || start > strlen(fn) || new_start < 0 || new_start < start) 94 | return; 95 | 96 | while (new_start < strlen(fn)) 97 | { 98 | fn[start] = fn[new_start]; 99 | new_start++; 100 | start++; 101 | } 102 | 103 | fn[start] = 0; 104 | } 105 | 106 | void make_magic(void) 107 | { 108 | printf("%s%s", 109 | "\x53\x41\x4E\x20\x44\x49\x4D\x41\x53\x20\x48\x49\x47\x48\x20\x53\x43\x48\x4F\x4F\x4C\x20\x46\x4F\x4F\x54\x42\x41\x4C\x4C\x20\x52\x55\x4C\x45\x53\x21", 110 | NEWLINE); 111 | } 112 | 113 | #if defined(__UNIX) 114 | 115 | /* Return the size, in bytes of an open file stream. On error, return 0 */ 116 | #if defined(__LINUX) 117 | 118 | off_t find_file_size(FILE *f) 119 | { 120 | off_t num_sectors = 0; 121 | int fd = fileno(f); 122 | struct stat sb; 123 | 124 | if (fstat(fd, &sb)) 125 | { 126 | return 0; 127 | } 128 | 129 | if (S_ISREG(sb.st_mode) || S_ISDIR(sb.st_mode)) 130 | return sb.st_size; 131 | else if (S_ISCHR(sb.st_mode) || S_ISBLK(sb.st_mode)) 132 | { 133 | if (ioctl(fd, BLKGETSIZE, &num_sectors)) 134 | { 135 | #if defined(__DEBUG) 136 | fprintf(stderr, "%s: ioctl call to BLKGETSIZE failed.%s", __progname, NEWLINE); 137 | #endif 138 | } 139 | else 140 | return (num_sectors * 512); 141 | } 142 | 143 | return 0; 144 | } 145 | 146 | #elif defined(__MACOSX) 147 | 148 | #include 149 | #include 150 | #include 151 | 152 | off_t find_file_size(FILE *f) 153 | { 154 | #ifdef DEBUG 155 | printf(" FIND MAC file size\n"); 156 | #endif 157 | return 0; /*FIX ME this function causes strange problems on MACOSX, so for now return 0*/ 158 | struct stat info; 159 | off_t total = 0; 160 | off_t original = ftello(f); 161 | int ok = TRUE, fd = fileno(f); 162 | 163 | /* I'd prefer not to use fstat as it will follow symbolic links. We don't 164 | follow symbolic links. That being said, all symbolic links *should* 165 | have been caught before we got here. */ 166 | fstat(fd, &info); 167 | 168 | /* Block devices, like /dev/hda, don't return a normal filesize. 169 | If we are working with a block device, we have to ask the operating 170 | system to tell us the true size of the device. 171 | 172 | The following only works on Linux as far as I know. If you know 173 | how to port this code to another operating system, please contact 174 | the current maintainer of this program! */ 175 | if (S_ISBLK(info.st_mode)) 176 | { 177 | daddr_t blocksize = 0; 178 | daddr_t blockcount = 0; 179 | 180 | /* Get the block size */ 181 | if (ioctl(fd, DKIOCGETBLOCKSIZE, blocksize) < 0) 182 | { 183 | ok = FALSE; 184 | #if defined(__DEBUG) 185 | perror("DKIOCGETBLOCKSIZE failed"); 186 | #endif 187 | } 188 | 189 | /* Get the number of blocks */ 190 | if (ok) 191 | { 192 | if (ioctl(fd, DKIOCGETBLOCKCOUNT, blockcount) < 0) 193 | { 194 | #if defined(__DEBUG) 195 | perror("DKIOCGETBLOCKCOUNT failed"); 196 | #endif 197 | } 198 | } 199 | 200 | total = blocksize * blockcount; 201 | 202 | } 203 | 204 | else 205 | { 206 | 207 | /* I don't know why, but if you don't initialize this value you'll 208 | get wildly innacurate results when you try to run this function */ 209 | if ((fseeko(f, 0, SEEK_END))) 210 | return 0; 211 | total = ftello(f); 212 | if ((fseeko(f, original, SEEK_SET))) 213 | return 0; 214 | } 215 | 216 | return (total - original); 217 | } 218 | 219 | #else 220 | 221 | /* This is code for general UNIX systems 222 | (e.g. NetBSD, FreeBSD, OpenBSD, etc) */ 223 | static off_t midpoint(off_t a, off_t b, long blksize) 224 | { 225 | off_t aprime = a / blksize; 226 | off_t bprime = b / blksize; 227 | off_t c, cprime; 228 | 229 | cprime = (bprime - aprime) / 2 + aprime; 230 | c = cprime * blksize; 231 | 232 | return c; 233 | } 234 | 235 | off_t find_dev_size(int fd, int blk_size) 236 | { 237 | 238 | off_t curr = 0, amount = 0; 239 | void *buf; 240 | 241 | if (blk_size == 0) 242 | return 0; 243 | 244 | buf = malloc(blk_size); 245 | 246 | for (;;) 247 | { 248 | ssize_t nread; 249 | 250 | lseek(fd, curr, SEEK_SET); 251 | nread = read(fd, buf, blk_size); 252 | if (nread < blk_size) 253 | { 254 | if (nread <= 0) 255 | { 256 | if (curr == amount) 257 | { 258 | free(buf); 259 | lseek(fd, 0, SEEK_SET); 260 | return amount; 261 | } 262 | 263 | curr = midpoint(amount, curr, blk_size); 264 | } 265 | else 266 | { /* 0 < nread < blk_size */ 267 | free(buf); 268 | lseek(fd, 0, SEEK_SET); 269 | return amount + nread; 270 | } 271 | } 272 | else 273 | { 274 | amount = curr + blk_size; 275 | curr = amount * 2; 276 | } 277 | } 278 | 279 | free(buf); 280 | lseek(fd, 0, SEEK_SET); 281 | return amount; 282 | } 283 | 284 | off_t find_file_size(FILE *f) 285 | { 286 | int fd = fileno(f); 287 | struct stat sb; 288 | return 0; /*FIX ME SOLARIS FILE SIZE CAUSES SEG FAULT, for now just return 0*/ 289 | 290 | if (fstat(fd, &sb)) 291 | return 0; 292 | 293 | if (S_ISREG(sb.st_mode) || S_ISDIR(sb.st_mode)) 294 | return sb.st_size; 295 | else if (S_ISCHR(sb.st_mode) || S_ISBLK(sb.st_mode)) 296 | return find_dev_size(fd, sb.st_blksize); 297 | 298 | return 0; 299 | } 300 | 301 | #endif /* UNIX Flavors */ 302 | #endif /* ifdef __UNIX */ 303 | 304 | #if defined(__WIN32) 305 | off_t find_file_size(FILE *f) 306 | { 307 | off_t total = 0, original = ftello(f); 308 | 309 | if ((fseeko(f, 0, SEEK_END))) 310 | return 0; 311 | 312 | total = ftello(f); 313 | if ((fseeko(f, original, SEEK_SET))) 314 | return 0; 315 | 316 | return total; 317 | } 318 | 319 | #endif /* ifdef __WIN32 */ 320 | 321 | void print_search_specs(f_state *s) 322 | { 323 | int i = 0; 324 | int j = 0; 325 | printf("\nDUMPING BUILTIN SEARCH INFO\n\t"); 326 | for (i = 0; i < s->num_builtin; i++) 327 | { 328 | 329 | printf("%s:\n\t footer_len:=%d, header_len:=%d, max_len:=%llu ", 330 | search_spec[i].suffix, 331 | search_spec[i].footer_len, 332 | search_spec[i].header_len, 333 | search_spec[i].max_len); 334 | printf("\n\t header:\t"); 335 | printx(search_spec[i].header, 0, search_spec[i].header_len); 336 | printf("\t footer:\t"); 337 | printx(search_spec[i].footer, 0, search_spec[i].footer_len); 338 | for (j = 0; j < search_spec[i].num_markers; j++) 339 | { 340 | printf("\tmarker: \t"); 341 | printx(search_spec[i].markerlist[j].value, 0, search_spec[i].markerlist[j].len); 342 | } 343 | 344 | } 345 | 346 | } 347 | 348 | void print_stats(f_state *s) 349 | { 350 | int i = 0; 351 | audit_msg(s, "\n%d FILES EXTRACTED\n\t", s->fileswritten); 352 | for (i = 0; i < s->num_builtin; i++) 353 | { 354 | 355 | if (search_spec[i].found != 0) 356 | { 357 | if (search_spec[i].type == OLE) 358 | search_spec[i].suffix = "ole"; 359 | else if (search_spec[i].type == RIFF) 360 | search_spec[i].suffix = "rif"; 361 | else if (search_spec[i].type == ZIP) 362 | search_spec[i].suffix = "zip"; 363 | audit_msg(s, "%s:= %d", search_spec[i].suffix, search_spec[i].found); 364 | } 365 | } 366 | } 367 | 368 | int charactersMatch(char a, char b, int caseSensitive) 369 | { 370 | 371 | //if(a==b) return 1; 372 | if (a == wildcard || a == b) 373 | return 1; 374 | if (caseSensitive || (a < 'A' || a > 'z' || b < 'A' || b > 'z')) 375 | return 0; 376 | 377 | /* This line is equivalent to (abs(a-b)) == 'a' - 'A' */ 378 | return (abs(a - b) == 32); 379 | } 380 | 381 | int memwildcardcmp(const void *s1, const void *s2, size_t n, int caseSensitive) 382 | { 383 | if (n != 0) 384 | { 385 | register const unsigned char *p1 = s1, *p2 = s2; 386 | do 387 | { 388 | if (!charactersMatch(*p1++, *p2++, caseSensitive)) 389 | return (*--p1 -*--p2); 390 | } 391 | while (--n != 0); 392 | } 393 | 394 | return (0); 395 | } 396 | 397 | void printx(unsigned char *buf, int start, int end) 398 | { 399 | int i = 0; 400 | for (i = start; i < end; i++) 401 | { 402 | printf("%x ", buf[i]); 403 | } 404 | 405 | printf("\n"); 406 | } 407 | 408 | char *reverse_string(char *to, char *from, int startLocation, int endLocation) 409 | { 410 | int i = endLocation; 411 | int j = 0; 412 | for (j = startLocation; j < endLocation; j++) 413 | { 414 | i--; 415 | to[j] = from[i]; 416 | } 417 | 418 | return to; 419 | } 420 | 421 | unsigned short htos(unsigned char s[], int endian) 422 | { 423 | 424 | unsigned char *bytes = (unsigned char *)malloc(sizeof(unsigned short) * sizeof(char)); 425 | unsigned short size = 0; 426 | char temp = 'x'; 427 | bytes = memcpy(bytes, s, sizeof(short)); 428 | 429 | if (endian == FOREMOST_BIG_ENDIAN && BYTE_ORDER == LITTLE_ENDIAN) 430 | { 431 | 432 | //printf("switching the byte order\n"); 433 | temp = bytes[0]; 434 | bytes[0] = bytes[1]; 435 | bytes[1] = temp; 436 | 437 | } 438 | else if (endian == FOREMOST_LITTLE_ENDIAN && BYTE_ORDER == BIG_ENDIAN) 439 | { 440 | temp = bytes[0]; 441 | bytes[0] = bytes[1]; 442 | bytes[1] = temp; 443 | } 444 | 445 | size = *((unsigned short *)bytes); 446 | free(bytes); 447 | return size; 448 | } 449 | 450 | unsigned int htoi(unsigned char s[], int endian) 451 | { 452 | 453 | int length = sizeof(int); 454 | unsigned char *bytes = (unsigned char *)malloc(length * sizeof(char)); 455 | unsigned int size = 0; 456 | 457 | bytes = memcpy(bytes, s, length); 458 | 459 | if (endian == FOREMOST_BIG_ENDIAN && BYTE_ORDER == LITTLE_ENDIAN) 460 | { 461 | 462 | bytes = (unsigned char *)reverse_string((char *)bytes, (char *)s, 0, length); 463 | } 464 | else if (endian == FOREMOST_LITTLE_ENDIAN && BYTE_ORDER == BIG_ENDIAN) 465 | { 466 | 467 | bytes = (unsigned char *)reverse_string((char *)bytes, (char *)s, 0, length); 468 | } 469 | 470 | size = *((unsigned int *)bytes); 471 | 472 | free(bytes); 473 | return size; 474 | } 475 | 476 | u_int64_t htoll(unsigned char s[], int endian) 477 | { 478 | int length = sizeof(u_int64_t); 479 | unsigned char *bytes = (unsigned char *)malloc(length * sizeof(char)); 480 | u_int64_t size = 0; 481 | bytes = memcpy(bytes, s, length); 482 | #ifdef DEBUG 483 | printf("htoll len=%d endian=%d\n",length,endian); 484 | #endif 485 | if (endian == FOREMOST_BIG_ENDIAN && BYTE_ORDER == LITTLE_ENDIAN) 486 | { 487 | #ifdef DEBUG 488 | printf("reverse0\n"); 489 | #endif 490 | bytes = (unsigned char *)reverse_string((char *)bytes, (char *)s, 0, length); 491 | } 492 | else if (endian == FOREMOST_LITTLE_ENDIAN && BYTE_ORDER == BIG_ENDIAN) 493 | { 494 | #ifdef DEBUG 495 | printf("reverse1\n"); 496 | #endif 497 | bytes = (unsigned char *)reverse_string((char *)bytes, (char *)s, 0, length); 498 | } 499 | 500 | size = *((u_int64_t *)bytes); 501 | #ifdef DEBUG 502 | printf("htoll size=%llu\n",size); 503 | printx(bytes,0,length); 504 | #endif 505 | 506 | 507 | free(bytes); 508 | return size; 509 | } 510 | 511 | /* display Position: Tell the user how far through the infile we are */ 512 | int displayPosition(f_state *s, f_info *i, u_int64_t pos) 513 | { 514 | 515 | int percentDone = 0; 516 | static int last_val = 0; 517 | int count; 518 | int flag = FALSE; 519 | int factor = 4; 520 | int multiplier = 25; 521 | int number_of_stars = 0; 522 | char buffer[256]; 523 | long double skip = s->skip * s->block_size; 524 | 525 | long double tot_bytes = (long double)((i->total_bytes)); 526 | tot_bytes -= skip; 527 | if (i->total_bytes > 0) 528 | { 529 | percentDone = (((long double)pos) / ((long double)tot_bytes)) * 100; 530 | if (percentDone != last_val) 531 | flag = TRUE; 532 | last_val = percentDone; 533 | } 534 | else 535 | { 536 | flag = TRUE; 537 | factor = 4; 538 | multiplier = 25; 539 | } 540 | 541 | if (flag) 542 | { 543 | number_of_stars = percentDone / factor; 544 | 545 | printf("%s: |", s->input_file); 546 | for (count = 0; count < number_of_stars; count++) 547 | { 548 | printf("*"); 549 | } 550 | 551 | for (count = 0; count < (multiplier - number_of_stars); count++) 552 | { 553 | printf(" "); 554 | } 555 | 556 | if (i->total_bytes > 0) 557 | { 558 | printf("|\t %d%% done\n", percentDone); 559 | } 560 | else 561 | { 562 | printf("|\t %s done\n", human_readable(pos, buffer)); 563 | 564 | } 565 | } 566 | 567 | if (percentDone == 100) 568 | { 569 | last_val = 0; 570 | } 571 | 572 | return TRUE; 573 | } 574 | -------------------------------------------------------------------------------- /library/libiberty.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jin-stuff/foremost/f6813bcc29fa2aef8f04dafd0843e698a13cc9d0/library/libiberty.a -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /* FOREMOST 5 | * 6 | * By Jesse Kornblum and Kris Kendall 7 | * 8 | * This is a work of the US Government. In accordance with 17 USC 105, 9 | * copyright protection is not available for any work of the US Government. 10 | * 11 | * This program is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 14 | * 15 | * 16 | */ 17 | #include "main.h" 18 | 19 | #ifdef __WIN32 20 | 21 | /* Allows us to open standard input in binary mode by default 22 | See http://gnuwin32.sourceforge.net/compile.html for more */ 23 | int _CRT_fmode = _O_BINARY; 24 | #endif 25 | 26 | void catch_alarm(int signum) 27 | { 28 | signal_caught = signum; 29 | signal(signum, catch_alarm); 30 | } 31 | 32 | void register_signal_handler(void) 33 | { 34 | signal_caught = 0; 35 | 36 | if (signal(SIGINT, catch_alarm) == SIG_IGN) 37 | signal(SIGINT, SIG_IGN); 38 | if (signal(SIGTERM, catch_alarm) == SIG_IGN) 39 | signal(SIGTERM, SIG_IGN); 40 | 41 | #ifndef __WIN32 42 | 43 | /* Note: I haven't found a way to get notified of 44 | console resize events in Win32. Right now the statusbar 45 | will be too long or too short if the user decides to resize 46 | their console window while foremost runs.. */ 47 | 48 | /* RBF - Handle TTY events */ 49 | 50 | // The function setttywidth is in the old helpers.c 51 | // signal(SIGWINCH, setttywidth); 52 | #endif 53 | } 54 | 55 | void try_msg(void) 56 | { 57 | fprintf(stderr, "Try `%s -h` for more information.%s", __progname, NEWLINE); 58 | } 59 | 60 | /* The usage function should, at most, display 22 lines of text to fit 61 | on a single screen */ 62 | void usage(void) 63 | { 64 | fprintf(stderr, "%s version %s by %s.%s", __progname, VERSION, AUTHOR, NEWLINE); 65 | fprintf(stderr, 66 | "%s %s [-v|-V|-h|-T|-Q|-q|-a|-w-d] [-t ] [-s ] [-k ] \n\t[-b ] [-c ] [-o ] [-i time_stamp = TRUE; 162 | break; 163 | 164 | case 't': 165 | 166 | /*See if we have multiple file types to define*/ 167 | ptr1 = ptr2 = optarg; 168 | while (1) 169 | { 170 | if (!*ptr2) 171 | { 172 | if (!set_search_def(s, ptr1, 0)) 173 | { 174 | usage(); 175 | exit(EXIT_SUCCESS); 176 | } 177 | break; 178 | } 179 | 180 | if (*ptr2 == ',') 181 | { 182 | *ptr2 = '\0'; 183 | if (!set_search_def(s, ptr1, 0)) 184 | { 185 | usage(); 186 | exit(EXIT_SUCCESS); 187 | } 188 | 189 | *ptr2++ = ','; 190 | ptr1 = ptr2; 191 | } 192 | else 193 | { 194 | ptr2++; 195 | } 196 | } 197 | break; 198 | 199 | case 'h': 200 | usage(); 201 | exit(EXIT_SUCCESS); 202 | 203 | case 'V': 204 | printf("%s%s", VERSION, NEWLINE); 205 | 206 | /* We could just say printf(COPYRIGHT), but that's a good way 207 | to introduce a format string vulnerability. Better to always 208 | use good programming practice... */ 209 | printf("%s", COPYRIGHT); 210 | exit(EXIT_SUCCESS); 211 | 212 | default: 213 | try_msg(); 214 | exit(EXIT_FAILURE); 215 | 216 | } 217 | 218 | } 219 | 220 | #ifdef __DEBUG 221 | dump_state(s); 222 | #endif 223 | 224 | } 225 | 226 | int main(int argc, char **argv) 227 | { 228 | 229 | FILE *testFile = NULL; 230 | f_state *s = (f_state *)malloc(sizeof(f_state)); 231 | int input_files = 0; 232 | char **temp = argv; 233 | DIR* dir; 234 | 235 | #ifndef __GLIBC__ 236 | __progname = basename(argv[0]); 237 | #endif 238 | 239 | /*Initialize the global state struct*/ 240 | if (initialize_state(s, argc, argv)) 241 | fatal_error(s, "Unable to initialize state"); 242 | 243 | register_signal_handler(); 244 | process_command_line(argc, argv, s); 245 | 246 | load_config_file(s); 247 | 248 | if (s->num_builtin == 0) 249 | { 250 | 251 | /*Nothing specified via the command line or the conf 252 | file so default to all builtin search types*/ 253 | set_search_def(s, "all", 0); 254 | } 255 | 256 | if (create_output_directory(s)) 257 | fatal_error(s, "Unable to open output directory"); 258 | 259 | if (!get_mode(s, mode_write_audit)) 260 | { 261 | create_sub_dirs(s); 262 | } 263 | 264 | if (open_audit_file(s)) 265 | fatal_error(s, "Can't open audit file"); 266 | 267 | /* Scan for valid files to open */ 268 | while (*argv != NULL) 269 | { 270 | if(strcmp(*argv,"-c")==0) 271 | { 272 | /*jump past the conf file so we don't process it.*/ 273 | argv+=2; 274 | } 275 | testFile = fopen(*argv, "rb"); 276 | if (testFile) 277 | { 278 | fclose(testFile); 279 | dir = opendir(*argv); 280 | 281 | if(!strstr(s->config_file,*argv)!=0 && !dir) 282 | { 283 | input_files++; 284 | } 285 | 286 | if(dir) closedir(dir); 287 | } 288 | 289 | ++argv; 290 | } 291 | 292 | argv = temp; 293 | if (input_files > 1) 294 | { 295 | set_mode(s, mode_multi_file); 296 | } 297 | 298 | ++argv; 299 | while (*argv != NULL) 300 | { 301 | testFile = fopen(*argv, "rb"); 302 | 303 | if (testFile) 304 | { 305 | fclose(testFile); 306 | dir = opendir(*argv); 307 | if(!strstr(s->config_file,*argv)!=0 && !dir) 308 | { 309 | set_input_file(s, *argv); 310 | process_file(s); 311 | } 312 | if(dir) closedir(dir); 313 | } 314 | 315 | ++argv; 316 | } 317 | 318 | if (input_files == 0) 319 | { 320 | 321 | //printf("using stdin\n"); 322 | process_stdin(s); 323 | } 324 | 325 | print_stats(s); 326 | 327 | /*Lets try to clean up some of the extra sub_dirs*/ 328 | cleanup_output(s); 329 | 330 | if (close_audit_file(s)) 331 | { 332 | 333 | /* Hells bells. This is bad, but really, what can we do about it? 334 | Let's just report the error and try to get out of here! */ 335 | print_error(s, AUDIT_FILE_NAME, "Error closing audit file"); 336 | } 337 | 338 | free_state(s); 339 | free(s); 340 | return EXIT_SUCCESS; 341 | } 342 | -------------------------------------------------------------------------------- /main.h: -------------------------------------------------------------------------------- 1 | 2 | /* FOREMOST 3 | * 4 | * By Jesse Kornblum 5 | * 6 | * This is a work of the US Government. In accordance with 17 USC 105, 7 | * copyright protection is not available for any work of the US Government. 8 | * 9 | * This program is distributed in the hope that it will be useful, but 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | * 13 | */ 14 | 15 | //#define DEBUG 1 16 | 17 | #ifndef __FOREMOST_H 18 | #define __FOREMOST_H 19 | 20 | /* Version information is defined in the Makefile */ 21 | 22 | #define AUTHOR "Jesse Kornblum, Kris Kendall, and Nick Mikus" 23 | 24 | /* We use \r\n for newlines as this has to work on Win32. It's redundant for 25 | everybody else, but shouldn't cause any harm. */ 26 | #define COPYRIGHT "This program is a work of the US Government. "\ 27 | "In accordance with 17 USC 105,\r\n"\ 28 | "copyright protection is not available for any work of the US Government.\r\n"\ 29 | "This is free software; see the source for copying conditions. There is NO\r\n"\ 30 | "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n" 31 | 32 | #define _GNU_SOURCE 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | /* For va_arg */ 48 | #include 49 | 50 | #ifdef __LINUX 51 | #include 52 | #include 53 | #define u_int64_t unsigned long long 54 | #endif 55 | 56 | 57 | #ifdef __LINUX 58 | 59 | #ifndef __USE_BSD 60 | #define __USE_BSD 61 | #endif 62 | #include 63 | 64 | #elif defined (__SOLARIS) 65 | 66 | #define BIG_ENDIAN 4321 67 | #define LITTLE_ENDIAN 1234 68 | 69 | #include 70 | #ifdef _BIG_ENDIAN 71 | #define BYTE_ORDER BIG_ENDIAN 72 | #else 73 | #define BYTE_ORDER LITTLE_ENDIAN 74 | #endif 75 | 76 | #elif defined (__WIN32) 77 | #include 78 | #define __U16_TYPE unsigned short 79 | 80 | #elif defined (__MACOSX) 81 | #include 82 | #define __U16_TYPE unsigned short 83 | #endif 84 | 85 | 86 | #define TRUE 1 87 | #define FALSE 0 88 | #define ONE_MEGABYTE 1048576 89 | 90 | 91 | /* RBF - Do we need these type definitions? */ 92 | #ifdef __SOLARIS 93 | #define u_int32_t unsigned int 94 | #define u_int64_t unsigned long long 95 | #endif 96 | 97 | 98 | /* The only time we're *not* on a UNIX system is when we're on Windows */ 99 | #ifndef __WIN32 100 | #ifndef __UNIX 101 | #define __UNIX 102 | #endif /* ifndef __UNIX */ 103 | #endif /* ifndef __WIN32 */ 104 | 105 | 106 | #ifdef __UNIX 107 | 108 | #ifndef __U16_TYPE 109 | #define __U16_TYPE unsigned short 110 | #endif 111 | 112 | #include 113 | 114 | #ifndef BYTE_ORDER 115 | 116 | #define BIG_ENDIAN 4321 117 | #define LITTLE_ENDIAN 1234 118 | 119 | #define BYTE_ORDER LITTLE_ENDIAN 120 | 121 | #endif 122 | /* This avoids compiler warnings on older systems */ 123 | int fseeko(FILE *stream, off_t offset, int whence); 124 | off_t ftello(FILE *stream); 125 | 126 | 127 | #define CMD_PROMPT "$" 128 | #define DIR_SEPARATOR '/' 129 | #define NEWLINE "\n" 130 | #define LINE_LENGTH 74 131 | #define BLANK_LINE \ 132 | " " 133 | 134 | #endif /* #ifdef __UNIX */ 135 | 136 | /* This allows us to open standard input in binary mode by default 137 | See http://gnuwin32.sourceforge.net/compile.html for more */ 138 | #include 139 | 140 | /* Code specific to Microsoft Windows */ 141 | #ifdef __WIN32 142 | 143 | /* By default, Windows uses long for off_t. This won't do. We 144 | need an unsigned number at minimum. Windows doesn't have 64 bit 145 | numbers though. */ 146 | #ifdef off_t 147 | #undef off_t 148 | #endif 149 | #define off_t unsigned long 150 | 151 | #define CMD_PROMPT "c:\\>" 152 | #define DIR_SEPARATOR '\\' 153 | #define NEWLINE "\r\n" 154 | #define LINE_LENGTH 72 155 | #define BLANK_LINE \ 156 | " " 157 | 158 | 159 | /* It would be nice to use 64-bit file lengths in Windows */ 160 | #define ftello ftell 161 | #define fseeko fseek 162 | 163 | #ifndef __CYGWIN 164 | #define snprintf _snprintf 165 | #endif 166 | 167 | #define u_int32_t unsigned long 168 | 169 | /* We create macros for the Windows equivalent UNIX functions. 170 | No worries about lstat to stat; Windows doesn't have symbolic links */ 171 | #define lstat(A,B) stat(A,B) 172 | 173 | #define u_int64_t unsigned __int64 174 | 175 | #ifndef __CYGWIN 176 | #define realpath(A,B) _fullpath(B,A,PATH_MAX) 177 | #endif 178 | /* Not used in md5deep anymore, but left in here in case I 179 | ever need it again. Win32 documentation searches are evil. 180 | int asprintf(char **strp, const char *fmt, ...); 181 | */ 182 | 183 | char *basename(char *a); 184 | extern char *optarg; 185 | extern int optind; 186 | int getopt(int argc, char *const argv[], const char *optstring); 187 | 188 | #endif /* ifdef _WIN32 */ 189 | 190 | 191 | /* On non-glibc systems we have to manually set the __progname variable */ 192 | #ifdef __GLIBC__ 193 | extern char *__progname; 194 | #else 195 | char *__progname; 196 | #endif /* ifdef __GLIBC__ */ 197 | 198 | /* ----------------------------------------------------------------- 199 | Program Defaults 200 | ----------------------------------------------------------------- */ 201 | #define MAX_STRING_LENGTH 1024 202 | #define COMMENT_LENGTH 64 203 | 204 | /* Modes refer to options that can be set by the user. */ 205 | 206 | #define mode_none 0 207 | #define mode_verbose 1<<1 208 | #define mode_quiet 1<<2 209 | #define mode_ind_blk 1<<3 210 | #define mode_quick 1<<4 211 | #define mode_write_all 1<<5 212 | #define mode_write_audit 1<<6 213 | #define mode_multi_file 1<<7 214 | 215 | #define MAX_NEEDLES 254 216 | #define NUM_SEARCH_SPEC_ELEMENTS 6 217 | #define MAX_SUFFIX_LENGTH 8 218 | #define MAX_FILE_TYPES 100 219 | #define FOREMOST_NOEXTENSION_SUFFIX "NONE" 220 | /* Modes 3 to 31 are reserved for future use. We shouldn't use 221 | modes higher than 31 as Win32 can't go that high. */ 222 | 223 | #define DEFAULT_MODE mode_none 224 | #define DEFAULT_CONFIG_FILE "foremost.conf" 225 | #define DEFAULT_OUTPUT_DIRECTORY "output" 226 | #define AUDIT_FILE_NAME "audit.txt" 227 | #define FOREMOST_DIVIDER "------------------------------------------------------------------" 228 | 229 | #define JPEG 0 230 | #define GIF 1 231 | #define BMP 2 232 | #define MPG 3 233 | #define PDF 4 234 | #define DOC 5 235 | #define AVI 6 236 | #define WMV 7 237 | #define HTM 8 238 | #define ZIP 9 239 | #define MOV 10 240 | #define XLS 11 241 | #define PPT 12 242 | #define WPD 13 243 | #define CPP 14 244 | #define OLE 15 245 | #define GZIP 16 246 | #define RIFF 17 247 | #define WAV 18 248 | #define VJPEG 19 249 | #define SXW 20 250 | #define SXC 21 251 | #define SXI 22 252 | #define CONF 23 253 | #define PNG 24 254 | #define RAR 25 255 | #define EXE 26 256 | #define ELF 27 257 | #define REG 28 258 | #define DOCX 29 259 | #define XLSX 30 260 | #define PPTX 31 261 | #define MP4 32 262 | 263 | 264 | #define KILOBYTE 1024 265 | #define MEGABYTE 1024 * KILOBYTE 266 | #define GIGABYTE 1024 * MEGABYTE 267 | #define TERABYTE 1024 * GIGABYTE 268 | #define PETABYTE 1024 * TERABYTE 269 | #define EXABYTE 1024 * PETABYTE 270 | 271 | #define UNITS_BYTES 0 272 | #define UNITS_KILOB 1 273 | #define UNITS_MEGAB 2 274 | #define UNITS_GIGAB 3 275 | #define UNITS_TERAB 4 276 | #define UNITS_PETAB 5 277 | #define UNITS_EXAB 6 278 | 279 | #define SEARCHTYPE_FORWARD 0 280 | #define SEARCHTYPE_REVERSE 1 281 | #define SEARCHTYPE_FORWARD_NEXT 2 282 | #define SEARCHTYPE_ASCII 3 283 | 284 | #define FOREMOST_BIG_ENDIAN 0 285 | #define FOREMOST_LITTLE_ENDIAN 1 286 | /*DEFAULT CHUNK SIZE In MB*/ 287 | #define CHUNK_SIZE 100 288 | 289 | 290 | /* Wildcard is a global variable because it's used by very simple 291 | functions that don't need the whole state passed to them */ 292 | 293 | /* ----------------------------------------------------------------- 294 | State Variable and Global Variables 295 | ----------------------------------------------------------------- */ 296 | char wildcard; 297 | typedef struct f_state 298 | { 299 | off_t mode; 300 | char *config_file; 301 | char *input_file; 302 | char *output_directory; 303 | char *start_time; 304 | char *invocation; 305 | char *audit_file_name; 306 | FILE *audit_file; 307 | int audit_file_open; 308 | int num_builtin; 309 | int chunk_size; /*IN MB*/ 310 | int fileswritten; 311 | int block_size; 312 | int skip; 313 | 314 | int time_stamp; 315 | } f_state; 316 | 317 | typedef struct marker 318 | { 319 | unsigned char* value; 320 | int len; 321 | size_t marker_bm_table[UCHAR_MAX+1]; 322 | }marker; 323 | 324 | typedef struct s_spec 325 | { 326 | char* suffix; 327 | int type; 328 | u_int64_t max_len; 329 | unsigned char* header; 330 | unsigned int header_len; 331 | size_t header_bm_table[UCHAR_MAX+1]; 332 | 333 | unsigned char* footer; 334 | unsigned int footer_len; 335 | size_t footer_bm_table[UCHAR_MAX+1]; 336 | marker markerlist[5]; 337 | int num_markers; 338 | int searchtype; 339 | 340 | int case_sen; 341 | 342 | int found; 343 | 344 | char comment[MAX_STRING_LENGTH];/*Used for audit*/ 345 | int written; /*used for -a mode*/ 346 | }s_spec; 347 | 348 | s_spec search_spec[50]; /*ARRAY OF BUILTIN SEARCH TYPES*/ 349 | 350 | typedef struct f_info { 351 | char *file_name; 352 | off_t total_bytes; 353 | 354 | /* We never use the total number of bytes in a file, 355 | only the number of megabytes when we display a time estimate */ 356 | off_t total_megs; 357 | off_t bytes_read; 358 | 359 | #ifdef __WIN32 360 | /* Win32 is a 32-bit operating system and can't handle file sizes 361 | larger than 4GB. We use this to keep track of overflows */ 362 | off_t last_read; 363 | off_t overflow_count; 364 | #endif 365 | 366 | FILE *handle; 367 | int is_stdin; 368 | } f_info; 369 | 370 | /* Set if the user hits ctrl-c */ 371 | int signal_caught; 372 | 373 | /* ----------------------------------------------------------------- 374 | Function definitions 375 | ----------------------------------------------------------------- */ 376 | 377 | /* State functions */ 378 | 379 | int initialize_state(f_state *s, int argc, char **argv); 380 | void free_state(f_state *s); 381 | 382 | char *get_invocation(f_state *s); 383 | char *get_start_time(f_state *s); 384 | 385 | int set_config_file(f_state *s, char *fn); 386 | char* get_config_file(f_state *s); 387 | 388 | int set_output_directory(f_state *s, char *fn); 389 | char* get_output_directory(f_state *s); 390 | 391 | void set_audit_file_open(f_state *s); 392 | int get_audit_file_open(f_state *s); 393 | 394 | void set_mode(f_state *s, off_t new_mode); 395 | int get_mode(f_state *s, off_t check_mode); 396 | 397 | int set_search_def(f_state *s,char* ft,u_int64_t max_file_size); 398 | void get_search_def(f_state s); 399 | 400 | void set_input_file(f_state *s,char* filename); 401 | void get_input_file(f_state *s); 402 | 403 | void set_chunk(f_state *s, int size); 404 | 405 | void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype); 406 | 407 | void set_skip(f_state *s, int size); 408 | void set_block(f_state *s, int size); 409 | 410 | 411 | #ifdef __DEBUG 412 | void dump_state(f_state *s); 413 | #endif 414 | 415 | /* The audit file */ 416 | int open_audit_file(f_state *s); 417 | void audit_msg(f_state *s, char *format, ...); 418 | int close_audit_file(f_state *s); 419 | 420 | 421 | /* Set up our output directory */ 422 | int create_output_directory(f_state *s); 423 | int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf, u_int64_t t_offset); 424 | int create_sub_dirs(f_state *s); 425 | void cleanup_output(f_state *s); 426 | 427 | /* Configuration Files */ 428 | int load_config_file(f_state *s); 429 | 430 | 431 | /* Helper functions */ 432 | char *current_time(void); 433 | off_t find_file_size(FILE *f); 434 | char *human_readable(off_t size, char *buffer); 435 | char *units(unsigned int c); 436 | unsigned int chop(char *buf); 437 | void print_search_specs(f_state *s); 438 | int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive); 439 | int charactersMatch(char a, char b, int caseSensitive); 440 | void printx(unsigned char* buf,int start, int end); 441 | unsigned short htos(unsigned char s[],int endian); 442 | unsigned int htoi(unsigned char s[],int endian); 443 | u_int64_t htoll(unsigned char s[],int endian); 444 | int displayPosition(f_state* s,f_info* i,u_int64_t pos); 445 | 446 | 447 | /* Interface functions 448 | These functions stay the same regardless if we're using a 449 | command line interface or a GUI */ 450 | void fatal_error(f_state *s, char *msg); 451 | void print_error(f_state *s, char *fn, char *msg); 452 | void print_message(f_state *s, char *format, va_list argp); 453 | void print_stats(f_state *s); 454 | 455 | /* Engine */ 456 | int process_file(f_state *s); 457 | int process_stdin(f_state *s); 458 | unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len, 459 | size_t table[UCHAR_MAX + 1], int case_sen,int searchtype); 460 | unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len, 461 | size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ; 462 | #endif /* __FOREMOST_H */ 463 | 464 | /* BUILTIN */ 465 | unsigned char* extract_file(f_state *s, u_int64_t c_offset,unsigned char *foundat, u_int64_t buflen, s_spec * needle, u_int64_t f_offset); 466 | 467 | 468 | 469 | 470 | 471 | -------------------------------------------------------------------------------- /ole.h: -------------------------------------------------------------------------------- 1 | #define TRUE 1 2 | #define FALSE 0 3 | #define SPECIAL_BLOCK - 3 4 | #define END_OF_CHAIN - 2 5 | #define UNUSED - 1 6 | 7 | #define NO_ENTRY 0 8 | #define STORAGE 1 9 | #define STREAM 2 10 | #define ROOT 5 11 | #define SHORT_BLOCK 3 12 | 13 | #define FAT_START 0x4c 14 | #define OUR_BLK_SIZE 512 15 | #define DIRS_PER_BLK 4 16 | #ifndef __CYGWIN 17 | #define MIN(x, y) ((x) < (y) ? (x) : (y)) 18 | #endif 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | struct OLE_HDR 31 | { 32 | char magic[8]; /*0*/ 33 | char clsid[16]; /*8*/ 34 | __U16_TYPE uMinorVersion; /*24*/ 35 | __U16_TYPE uDllVersion; /*26*/ 36 | __U16_TYPE uByteOrder; /*28*/ 37 | __U16_TYPE uSectorShift; /*30*/ 38 | __U16_TYPE uMiniSectorShift; /*32*/ 39 | __U16_TYPE reserved; /*34*/ 40 | u_int32_t reserved1; /*36*/ 41 | u_int32_t reserved2; /*40*/ 42 | u_int32_t num_FAT_blocks; /*44*/ 43 | u_int32_t root_start_block; /*48*/ 44 | u_int32_t dfsignature; /*52*/ 45 | u_int32_t miniSectorCutoff; /*56*/ 46 | u_int32_t dir_flag; /*60 first sec in the mini fat chain*/ 47 | u_int32_t csectMiniFat; /*64 number of sectors in the minifat */ 48 | u_int32_t FAT_next_block; /*68*/ 49 | u_int32_t num_extra_FAT_blocks; /*72*/ 50 | /* FAT block list starts here !! first 109 entries */ 51 | }; 52 | 53 | struct OLE_DIR 54 | { 55 | char name[64]; 56 | unsigned short namsiz; 57 | char type; 58 | char bflags; //0 or 1 59 | unsigned long prev_dirent; 60 | unsigned long next_dirent; 61 | unsigned long dir_dirent; 62 | char clsid[16]; 63 | unsigned long userFlags; 64 | int secs1; 65 | int days1; 66 | int secs2; 67 | int days2; 68 | unsigned long start_block; //starting SECT of stream 69 | unsigned long size; 70 | short reserved; //must be 0 71 | }; 72 | 73 | struct DIRECTORY 74 | { 75 | char name[64]; 76 | int type; 77 | int level; 78 | int start_block; 79 | int size; 80 | int next; 81 | int prev; 82 | int dir; 83 | int s1; 84 | int s2; 85 | int d1; 86 | int d2; 87 | } 88 | *dirlist, *dl; 89 | 90 | int get_dir_block(unsigned char *fd, int blknum, int buffersize); 91 | int get_dir_info(unsigned char *src); 92 | void extract_stream(char *fd, int blknum, int size); 93 | void dump_header(struct OLE_HDR *h); 94 | int dump_dirent(int which_one); 95 | int get_block(unsigned char *fd, int blknum, unsigned char *dest, long long int buffersize); 96 | int get_FAT_block(unsigned char *fd, int blknum, int *dest, int buffersize); 97 | int reorder_dirlist(struct DIRECTORY *dir, int level); 98 | 99 | unsigned char *get_ole_block(unsigned char *fd, int blknum, unsigned long long buffersize); 100 | struct OLE_HDR *reverseBlock(struct OLE_HDR *dest, struct OLE_HDR *h); 101 | 102 | void dump_ole_header(struct OLE_HDR *h); 103 | void *Malloc(size_t bytes); 104 | void die(char *fmt, void *arg); 105 | void init_ole(); 106 | -------------------------------------------------------------------------------- /state.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "main.h" 4 | 5 | int initialize_state (f_state * s, int argc, char **argv) 6 | { 7 | char **argv_copy = argv; 8 | 9 | /* The routines in current_time return statically allocated memory. 10 | We strdup the result so that we don't accidently free() the wrong 11 | thing later on. */ 12 | s->start_time = strdup(current_time()); 13 | wildcard = '?'; 14 | s->audit_file_open = FALSE; 15 | s->mode = DEFAULT_MODE; 16 | s->input_file = NULL; 17 | s->fileswritten = 0; 18 | s->block_size = 512; 19 | 20 | /* We use the setter fuctions here to call realpath */ 21 | set_config_file(s, DEFAULT_CONFIG_FILE); 22 | set_output_directory(s, DEFAULT_OUTPUT_DIRECTORY); 23 | 24 | s->invocation = (char *)malloc(sizeof(char) * MAX_STRING_LENGTH); 25 | s->invocation[0] = 0; 26 | s->chunk_size = CHUNK_SIZE; 27 | s->num_builtin = 0; 28 | s->skip = 0; 29 | s->time_stamp = FALSE; 30 | do 31 | { 32 | strncat(s->invocation, *argv_copy, MAX_STRING_LENGTH - strlen(s->invocation)); 33 | strncat(s->invocation, " ", MAX_STRING_LENGTH - strlen(s->invocation)); 34 | ++argv_copy; 35 | } 36 | while (*argv_copy); 37 | 38 | return FALSE; 39 | } 40 | 41 | void free_state(f_state *s) 42 | { 43 | free(s->start_time); 44 | free(s->output_directory); 45 | free(s->config_file); 46 | } 47 | 48 | int get_audit_file_open(f_state *s) 49 | { 50 | return (s->audit_file_open); 51 | } 52 | 53 | char *get_invocation(f_state *s) 54 | { 55 | return (s->invocation); 56 | } 57 | 58 | char *get_start_time(f_state *s) 59 | { 60 | return (s->start_time); 61 | } 62 | 63 | char *get_config_file(f_state *s) 64 | { 65 | return (s->config_file); 66 | } 67 | 68 | int set_config_file(f_state *s, char *fn) 69 | { 70 | char temp[PATH_MAX]; 71 | 72 | /* If the configuration file doesn't exist, this realpath will return 73 | NULL. We don't error check here as the user may specify a file 74 | that doesn't currently exist */ 75 | realpath(fn, temp); 76 | 77 | /* RBF - Does this create a memory leak? What happens to the old value? */ 78 | s->config_file = strdup(temp); 79 | return FALSE; 80 | } 81 | 82 | char *get_output_directory(f_state *s) 83 | { 84 | return (s->output_directory); 85 | } 86 | 87 | int set_output_directory(f_state *s, char *fn) 88 | { 89 | char temp[PATH_MAX]; 90 | int fullpathlen=0; 91 | /* We don't error check here as it's quite possible that the 92 | output directory doesn't exist yet. If it doesn't, realpath 93 | resolves the path correctly, but still returns NULL. */ 94 | //strncpy(s->output_directory,fn,PATH_MAX); 95 | 96 | realpath(fn, temp); 97 | fullpathlen=strlen(temp); 98 | 99 | if(fullpathlen!=0) 100 | { 101 | s->output_directory = strdup(temp); 102 | } 103 | else 104 | { 105 | /*Realpath failed just use cwd*/ 106 | s->output_directory = strdup(fn); 107 | } 108 | return FALSE; 109 | } 110 | 111 | int get_mode(f_state *s, off_t check_mode) 112 | { 113 | return (s->mode & check_mode); 114 | } 115 | 116 | void set_mode(f_state *s, off_t new_mode) 117 | { 118 | s->mode |= new_mode; 119 | } 120 | 121 | void set_chunk(f_state *s, int size) 122 | { 123 | s->chunk_size = size; 124 | } 125 | 126 | void set_skip(f_state *s, int size) 127 | { 128 | s->skip = size; 129 | } 130 | 131 | void set_block(f_state *s, int size) 132 | { 133 | s->block_size = size; 134 | } 135 | 136 | void write_audit_header(f_state *s) 137 | { 138 | audit_msg(s, "Foremost version %s by %s", VERSION, AUTHOR); 139 | audit_msg(s, "Audit File"); 140 | audit_msg(s, ""); 141 | audit_msg(s, "Foremost started at %s", get_start_time(s)); 142 | audit_msg(s, "Invocation: %s", get_invocation(s)); 143 | audit_msg(s, "Output directory: %s", get_output_directory(s)); 144 | audit_msg(s, "Configuration file: %s", get_config_file(s)); 145 | } 146 | 147 | int open_audit_file(f_state *s) 148 | { 149 | char fn[MAX_STRING_LENGTH]; 150 | 151 | snprintf(fn, 152 | MAX_STRING_LENGTH, 153 | "%s%c%s", 154 | get_output_directory(s), 155 | DIR_SEPARATOR, 156 | AUDIT_FILE_NAME); 157 | 158 | if ((s->audit_file = fopen(fn, "w")) == NULL) 159 | { 160 | print_error(s, fn, strerror(errno)); 161 | fatal_error(s, "Can't open audit file"); 162 | } 163 | 164 | s->audit_file_open = TRUE; 165 | write_audit_header(s); 166 | 167 | return FALSE; 168 | } 169 | 170 | int close_audit_file(f_state *s) 171 | { 172 | audit_msg(s, FOREMOST_DIVIDER); 173 | audit_msg(s, ""); 174 | audit_msg(s, "Foremost finished at %s", current_time()); 175 | 176 | if (fclose(s->audit_file)) 177 | { 178 | print_error(s, AUDIT_FILE_NAME, strerror(errno)); 179 | return TRUE; 180 | } 181 | 182 | return FALSE; 183 | } 184 | 185 | void audit_msg(f_state *s, char *format, ...) 186 | { 187 | va_list argp; 188 | va_start(argp, format); 189 | 190 | if (get_mode(s, mode_verbose)) { 191 | print_message(s, format, argp); 192 | va_end(argp); 193 | va_start(argp, format); 194 | } 195 | 196 | vfprintf(s->audit_file, format, argp); 197 | va_end(argp); 198 | 199 | fprintf(s->audit_file, "%s", NEWLINE); 200 | fflush(stdout); 201 | } 202 | 203 | void set_input_file(f_state *s, char *filename) 204 | { 205 | s->input_file = (char *)malloc((strlen(filename) + 1) * sizeof(char)); 206 | strncpy(s->input_file, filename, strlen(filename) + 1); 207 | } 208 | 209 | /*Initialize any search specs*/ 210 | int init_builtin(f_state *s, int type, char *suffix, char *header, char *footer, int header_len, 211 | int footer_len, u_int64_t max_len, int case_sen) 212 | { 213 | 214 | int i = s->num_builtin; 215 | 216 | search_spec[i].type = type; 217 | search_spec[i].suffix = (char *)malloc((strlen(suffix)+1) * sizeof(char)); 218 | search_spec[i].num_markers = 0; 219 | strcpy(search_spec[i].suffix, suffix); 220 | 221 | search_spec[i].header_len = header_len; 222 | search_spec[i].footer_len = footer_len; 223 | 224 | search_spec[i].max_len = max_len; 225 | search_spec[i].found = 0; 226 | search_spec[i].header = (unsigned char *)malloc(search_spec[i].header_len * sizeof(unsigned char)); 227 | search_spec[i].footer = (unsigned char *)malloc(search_spec[i].footer_len * sizeof(unsigned char)); 228 | search_spec[i].case_sen = case_sen; 229 | memset(search_spec[i].comment, 0, COMMENT_LENGTH - 1); 230 | 231 | memcpy(search_spec[i].header, header, search_spec[i].header_len); 232 | memcpy(search_spec[i].footer, footer, search_spec[i].footer_len); 233 | 234 | init_bm_table(search_spec[i].header, 235 | search_spec[i].header_bm_table, 236 | search_spec[i].header_len, 237 | search_spec[i].case_sen, 238 | SEARCHTYPE_FORWARD); 239 | init_bm_table(search_spec[i].footer, 240 | search_spec[i].footer_bm_table, 241 | search_spec[i].footer_len, 242 | search_spec[i].case_sen, 243 | SEARCHTYPE_FORWARD); 244 | s->num_builtin++; 245 | 246 | return i; 247 | } 248 | 249 | /*Markers are a method to search for any unique information besides just the header and the footer*/ 250 | void add_marker(f_state *s, int index, char *marker, int markerlength) 251 | { 252 | int i = search_spec[index].num_markers; 253 | if (marker == NULL) 254 | { 255 | search_spec[index].num_markers = 0; 256 | return; 257 | } 258 | 259 | search_spec[index].markerlist[i].len = markerlength; 260 | search_spec[index].markerlist[i].value = (unsigned char *)malloc(search_spec[index].markerlist[i].len * sizeof(unsigned char)); 261 | 262 | memcpy(search_spec[index].markerlist[i].value, marker, search_spec[index].markerlist[i].len); 263 | init_bm_table(search_spec[index].markerlist[i].value, 264 | search_spec[index].markerlist[i].marker_bm_table, 265 | search_spec[index].markerlist[i].len, 266 | TRUE, 267 | SEARCHTYPE_FORWARD); 268 | search_spec[index].num_markers++; 269 | } 270 | 271 | /*Initial every search spec we know about*/ 272 | void init_all(f_state *state) 273 | { 274 | int index = 0; 275 | init_builtin(state, JPEG, "jpg", "\xff\xd8\xff", "\xff\xd9", 3, 2, 20 * MEGABYTE, TRUE); 276 | index = init_builtin(state, GIF, "gif", "\x47\x49\x46\x38", "\x00\x3b", 4, 2, MEGABYTE, TRUE); 277 | add_marker(state, index, "\x00\x00\x3b", 3); 278 | init_builtin(state, BMP, "bmp", "BM", NULL, 2, 0, 2 * MEGABYTE, TRUE); 279 | init_builtin(state, 280 | WMV, 281 | "wmv", 282 | "\x30\x26\xB2\x75\x8E\x66\xCF\x11", 283 | "\xA1\xDC\xAB\x8C\x47\xA9", 284 | 8, 285 | 6, 286 | 40 * MEGABYTE, 287 | TRUE); 288 | init_builtin(state, MOV, "mov", "moov", NULL, 4, 0, 40 * MEGABYTE, TRUE); 289 | init_builtin(state, MP4, "mp4", "\x00\x00\x00\x1c\x66\x74\x79\x70", NULL, 8, 0, 600 * MEGABYTE, TRUE); 290 | init_builtin(state, RIFF, "rif", "RIFF", "INFO", 4, 4, 20 * MEGABYTE, TRUE); 291 | init_builtin(state, HTM, "htm", "", 5, 7, MEGABYTE, FALSE); 292 | init_builtin(state, 293 | OLE, 294 | "ole", 295 | "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00", 296 | NULL, 297 | 16, 298 | 0, 299 | 5 * MEGABYTE, 300 | TRUE); 301 | init_builtin(state, 302 | ZIP, 303 | "zip", 304 | "\x50\x4B\x03\x04", 305 | "\x4b\x05\x06\x00", 306 | 4, 307 | 4, 308 | 100 * MEGABYTE, 309 | TRUE); 310 | init_builtin(state, 311 | RAR, 312 | "rar", 313 | "\x52\x61\x72\x21\x1A\x07\x00", 314 | "\x00\x00\x00\x00\x00\x00\x00\x00", 315 | 7, 316 | 8, 317 | 100 * MEGABYTE, 318 | TRUE); 319 | init_builtin(state, EXE, "exe", "MZ", NULL, 2, 0, 1 * MEGABYTE, TRUE); 320 | 321 | index = init_builtin(state, 322 | PNG, 323 | "png", 324 | "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", 325 | "IEND", 326 | 8, 327 | 4, 328 | 1 * MEGABYTE, 329 | TRUE); 330 | index = init_builtin(state, 331 | MPG, 332 | "mpg", 333 | "\x00\x00\x01\xba", 334 | "\x00\x00\x01\xb9", 335 | 4, 336 | 4, 337 | 50 * MEGABYTE, 338 | TRUE); 339 | add_marker(state, index, "\x00\x00\x01", 3); 340 | 341 | index = init_builtin(state, PDF, "pdf", "%PDF-1.", "%%EOF", 7, 5, 40 * MEGABYTE, TRUE); 342 | add_marker(state, index, "/L ", 3); 343 | add_marker(state, index, "obj", 3); 344 | add_marker(state, index, "/Linearized", 11); 345 | add_marker(state, index, "/Length", 7); 346 | } 347 | 348 | /*Process any command line args following the -t switch)*/ 349 | int set_search_def(f_state *s, char *ft, u_int64_t max_file_size) 350 | { 351 | int index = 0; 352 | 353 | if (strcmp(ft, "jpg") == 0 || strcmp(ft, "jpeg") == 0) 354 | { 355 | if (max_file_size == 0) 356 | max_file_size = 20 * MEGABYTE; 357 | init_builtin(s, JPEG, "jpg", "\xff\xd8\xff", "\xff\xd9", 3, 2, max_file_size, TRUE); 358 | } 359 | else if (strcmp(ft, "gif") == 0) 360 | { 361 | if (max_file_size == 0) 362 | max_file_size = 1 * MEGABYTE; 363 | index = init_builtin(s, 364 | GIF, 365 | "gif", 366 | "\x47\x49\x46\x38", 367 | "\x00\x3b", 368 | 4, 369 | 2, 370 | max_file_size, 371 | TRUE); 372 | 373 | add_marker(s, index, "\x00\x00\x3b", 3); 374 | } 375 | else if (strcmp(ft, "bmp") == 0) 376 | { 377 | 378 | if (max_file_size == 0) 379 | max_file_size = 2 * MEGABYTE; 380 | 381 | init_builtin(s, BMP, "bmp", "BM", NULL, 2, 0, max_file_size, TRUE); 382 | } 383 | else if (strcmp(ft, "mp4") == 0) 384 | { 385 | init_builtin(s, MP4, "mp4", "\x00\x00\x00\x1c\x66\x74\x79\x70", NULL, 8, 0, 600 * MEGABYTE, TRUE); 386 | } 387 | else if (strcmp(ft, "exe") == 0) 388 | { 389 | 390 | if (max_file_size == 0) 391 | max_file_size = 1 * MEGABYTE; 392 | 393 | init_builtin(s, EXE, "exe", "MZ", NULL, 2, 0, max_file_size, TRUE); 394 | } 395 | else if (strcmp(ft, "elf") == 0) 396 | { 397 | 398 | if (max_file_size == 0) 399 | max_file_size = 1 * MEGABYTE; 400 | 401 | init_builtin(s, ELF, "elf", "0x7fELF", NULL, 4, 0, max_file_size, TRUE); 402 | } 403 | else if (strcmp(ft, "reg") == 0) 404 | { 405 | 406 | if (max_file_size == 0) 407 | max_file_size = 2 * MEGABYTE; 408 | 409 | init_builtin(s, REG, "reg", "regf", NULL, 4, 0, max_file_size, TRUE); 410 | 411 | } 412 | else if (strcmp(ft, "mpg") == 0 || strcmp(ft, "mpeg") == 0) 413 | { 414 | if (max_file_size == 0) 415 | max_file_size = 50 * MEGABYTE; 416 | 417 | //20000000 \x00\x00\x01\xb3 \x00\x00\x01\xb7 //system data 418 | index = init_builtin(s, 419 | MPG, 420 | "mpg", 421 | "\x00\x00\x01\xba", 422 | "\x00\x00\x01\xb9", 423 | 4, 424 | 4, 425 | max_file_size, 426 | TRUE); 427 | add_marker(s, index, "\x00\x00\x01", 3); 428 | 429 | /* 430 | add_marker(s,index,"\x00\x00\x01\xBB",4); 431 | add_marker(s,index,"\x00\x00\x01\xBE",4); 432 | add_marker(s,index,"\x00\x00\x01\xB3",4); 433 | */ 434 | } 435 | else if (strcmp(ft, "wmv") == 0) 436 | { 437 | 438 | if (max_file_size == 0) 439 | max_file_size = 20 * MEGABYTE; 440 | 441 | init_builtin(s, 442 | WMV, 443 | "wmv", 444 | "\x30\x26\xB2\x75\x8E\x66\xCF\x11", 445 | "\xA1\xDC\xAB\x8C\x47\xA9", 446 | 8, 447 | 6, 448 | max_file_size, 449 | TRUE); 450 | } 451 | else if (strcmp(ft, "avi") == 0) 452 | { 453 | 454 | if (max_file_size == 0) 455 | max_file_size = 20 * MEGABYTE; 456 | 457 | init_builtin(s, AVI, "avi", "RIFF", "INFO", 4, 4, max_file_size, TRUE); 458 | } 459 | 460 | else if (strcmp(ft, "rif") == 0) 461 | { 462 | 463 | if (max_file_size == 0) 464 | max_file_size = 20 * MEGABYTE; 465 | init_builtin(s, RIFF, "rif", "RIFF", "INFO", 4, 4, max_file_size, TRUE); 466 | } 467 | else if (strcmp(ft, "wav") == 0) 468 | { 469 | 470 | if (max_file_size == 0) 471 | max_file_size = 20 * MEGABYTE; 472 | init_builtin(s, WAV, "wav", "RIFF", "INFO", 4, 4, max_file_size, TRUE); 473 | 474 | } 475 | else if (strcmp(ft, "html") == 0 || strcmp(ft, "htm") == 0) 476 | { 477 | 478 | if (max_file_size == 0) 479 | max_file_size = 1 * MEGABYTE; 480 | init_builtin(s, HTM, "htm", "", 5, 7, max_file_size, FALSE); 481 | } 482 | 483 | else if (strcmp(ft, "ole") == 0 || strcmp(ft, "office") == 0) 484 | { 485 | 486 | if (max_file_size == 0) 487 | max_file_size = 10 * MEGABYTE; 488 | init_builtin(s, 489 | OLE, 490 | "ole", 491 | "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00", 492 | NULL, 493 | 16, 494 | 0, 495 | max_file_size, 496 | TRUE); 497 | } 498 | else if (strcmp(ft, "doc") == 0) 499 | { 500 | if (max_file_size == 0) 501 | max_file_size = 20 * MEGABYTE; 502 | init_builtin(s, 503 | DOC, 504 | "doc", 505 | "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00", 506 | NULL, 507 | 16, 508 | 0, 509 | max_file_size, 510 | TRUE); 511 | } 512 | else if (strcmp(ft, "xls") == 0) 513 | { 514 | if (max_file_size == 0) 515 | max_file_size = 10 * MEGABYTE; 516 | 517 | init_builtin(s, 518 | XLS, 519 | "xls", 520 | "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00", 521 | NULL, 522 | 16, 523 | 0, 524 | max_file_size, 525 | TRUE); 526 | 527 | } 528 | else if (strcmp(ft, "ppt") == 0) 529 | { 530 | 531 | if (max_file_size == 0) 532 | max_file_size = 10 * MEGABYTE; 533 | init_builtin(s, 534 | PPT, 535 | "ppt", 536 | "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00\x00\x00\x00\x00\x00\x00", 537 | NULL, 538 | 16, 539 | 0, 540 | max_file_size, 541 | TRUE); 542 | } 543 | else if (strcmp(ft, "zip") == 0) 544 | { 545 | if (max_file_size == 0) 546 | max_file_size = 100 * MEGABYTE; 547 | 548 | init_builtin(s, 549 | ZIP, 550 | "zip", 551 | "\x50\x4B\x03\x04", 552 | "\x50\x4b\x05\x06", 553 | 4, 554 | 4, 555 | max_file_size, 556 | TRUE); 557 | 558 | } 559 | else if (strcmp(ft, "rar") == 0) 560 | { 561 | if (max_file_size == 0) 562 | max_file_size = 100 * MEGABYTE; 563 | 564 | init_builtin(s, 565 | RAR, 566 | "rar", 567 | "\x52\x61\x72\x21\x1A\x07\x00", 568 | "\x00\x00\x00\x00\x00\x00\x00\x00", 569 | 7, 570 | 8, 571 | max_file_size, 572 | TRUE); 573 | 574 | } 575 | else if (strcmp(ft, "sxw") == 0) 576 | { 577 | if (max_file_size == 0) 578 | max_file_size = 10 * MEGABYTE; 579 | 580 | init_builtin(s, 581 | SXW, 582 | "sxw", 583 | "\x50\x4B\x03\x04", 584 | "\x4b\x05\x06\x00", 585 | 4, 586 | 4, 587 | max_file_size, 588 | TRUE); 589 | 590 | } 591 | else if (strcmp(ft, "sxc") == 0) 592 | { 593 | if (max_file_size == 0) 594 | max_file_size = 10 * MEGABYTE; 595 | 596 | init_builtin(s, 597 | SXC, 598 | "sxc", 599 | "\x50\x4B\x03\x04", 600 | "\x4b\x05\x06\x00", 601 | 4, 602 | 4, 603 | max_file_size, 604 | TRUE); 605 | 606 | } 607 | else if (strcmp(ft, "sxi") == 0) 608 | { 609 | if (max_file_size == 0) 610 | max_file_size = 10 * MEGABYTE; 611 | 612 | init_builtin(s, 613 | SXI, 614 | "sxi", 615 | "\x50\x4B\x03\x04", 616 | "\x4b\x05\x06\x00", 617 | 4, 618 | 4, 619 | max_file_size, 620 | TRUE); 621 | 622 | } 623 | else if (strcmp(ft, "docx") == 0) 624 | { 625 | if (max_file_size == 0) 626 | max_file_size = 10 * MEGABYTE; 627 | 628 | init_builtin(s, 629 | DOCX, 630 | "docx", 631 | "\x50\x4B\x03\x04", 632 | "\x4b\x05\x06\x00", 633 | 4, 634 | 4, 635 | max_file_size, 636 | TRUE); 637 | 638 | } 639 | else if (strcmp(ft, "pptx") == 0) 640 | { 641 | if (max_file_size == 0) 642 | max_file_size = 10 * MEGABYTE; 643 | 644 | init_builtin(s, 645 | PPTX, 646 | "pptx", 647 | "\x50\x4B\x03\x04", 648 | "\x4b\x05\x06\x00", 649 | 4, 650 | 4, 651 | max_file_size, 652 | TRUE); 653 | 654 | } 655 | else if (strcmp(ft, "xlsx") == 0) 656 | { 657 | if (max_file_size == 0) 658 | max_file_size = 10 * MEGABYTE; 659 | 660 | init_builtin(s, 661 | XLSX, 662 | "xlsx", 663 | "\x50\x4B\x03\x04", 664 | "\x4b\x05\x06\x00", 665 | 4, 666 | 4, 667 | max_file_size, 668 | TRUE); 669 | 670 | } 671 | else if (strcmp(ft, "gzip") == 0 || strcmp(ft, "gz") == 0) 672 | { 673 | if (max_file_size == 0) 674 | max_file_size = 100 * MEGABYTE; 675 | 676 | init_builtin(s, GZIP, "gz", "\x1F\x8B", "\x00\x00\x00\x00", 2, 4, max_file_size, TRUE); 677 | } 678 | else if (strcmp(ft, "pdf") == 0) 679 | { 680 | if (max_file_size == 0) 681 | max_file_size = 20 * MEGABYTE; 682 | 683 | index = init_builtin(s, PDF, "pdf", "%PDF-1.", "%%EOF", 7, 5, max_file_size, TRUE); 684 | add_marker(s, index, "/L ", 3); 685 | add_marker(s, index, "obj", 3); 686 | add_marker(s, index, "/Linearized", 11); 687 | add_marker(s, index, "/Length", 7); 688 | } 689 | else if (strcmp(ft, "vjpeg") == 0) 690 | { 691 | if (max_file_size == 0) 692 | max_file_size = 40 * MEGABYTE; 693 | init_builtin(s, VJPEG, "mov", "pnot", NULL, 4, 0, max_file_size, TRUE); 694 | } 695 | else if (strcmp(ft, "mov") == 0) 696 | { 697 | if (max_file_size == 0) 698 | max_file_size = 40 * MEGABYTE; 699 | 700 | init_builtin(s, MOV, "mov", "moov", NULL, 4, 0, max_file_size, TRUE); 701 | } 702 | else if (strcmp(ft, "wpd") == 0) 703 | { 704 | if (max_file_size == 0) 705 | max_file_size = 1 * MEGABYTE; 706 | 707 | init_builtin(s, WPD, "wpd", "\xff\x57\x50\x43", NULL, 4, 0, max_file_size, TRUE); 708 | } 709 | else if (strcmp(ft, "cpp") == 0) 710 | { 711 | if (max_file_size == 0) 712 | max_file_size = 1 * MEGABYTE; 713 | 714 | index = init_builtin(s, CPP, "cpp", "#include", "char", 8, 4, max_file_size, TRUE); 715 | add_marker(s, index, "int", 3); 716 | } 717 | else if (strcmp(ft, "png") == 0) 718 | { 719 | if (max_file_size == 0) 720 | max_file_size = 1 * MEGABYTE; 721 | index = init_builtin(s, 722 | PNG, 723 | "png", 724 | "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A", 725 | "IEND", 726 | 8, 727 | 4, 728 | max_file_size, 729 | TRUE); 730 | } 731 | else if (strcmp(ft, "all") == 0) 732 | { 733 | init_all(s); 734 | } 735 | else 736 | { 737 | return FALSE; 738 | } 739 | 740 | return TRUE; 741 | 742 | } 743 | 744 | void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive, 745 | int searchtype) 746 | { 747 | size_t i = 0, j = 0, currentindex = 0; 748 | 749 | for (i = 0; i <= UCHAR_MAX; i++) 750 | table[i] = len; 751 | for (i = 0; i < len; i++) 752 | { 753 | if (searchtype == SEARCHTYPE_REVERSE) 754 | { 755 | 756 | currentindex = i; //If we are running our searches backwards 757 | //we count from the beginning of the string 758 | } 759 | else 760 | { 761 | currentindex = len - i - 1; //Count from the back of string 762 | } 763 | 764 | if (needle[i] == wildcard) //No skip entry can advance us past the last wildcard in the string 765 | { 766 | for (j = 0; j <= UCHAR_MAX; j++) 767 | table[j] = currentindex; 768 | } 769 | 770 | table[(unsigned char)needle[i]] = currentindex; 771 | if (!casesensitive) 772 | { 773 | 774 | //RBF - this is a little kludgy but it works and this isn't the part 775 | //of the code we really need to worry about optimizing... 776 | //If we aren't case sensitive we just set both the upper and lower case 777 | //entries in the jump table. 778 | table[tolower(needle[i])] = currentindex; 779 | table[toupper(needle[i])] = currentindex; 780 | } 781 | } 782 | } 783 | 784 | #ifdef __DEBUG 785 | void dump_state(f_state *s) 786 | { 787 | printf("Current state:\n"); 788 | printf("Config file: %s\n", s->config_file); 789 | printf("Output directory: %s\n", s->output_directory); 790 | printf("Mode: %llu\n", s->mode); 791 | 792 | } 793 | #endif 794 | --------------------------------------------------------------------------------