├── .dockerignore ├── .gitignore ├── Dockerfile ├── README.md ├── analyze_fails.py ├── compare_logs.py ├── database-config-example.json ├── ghcc ├── __init__.py ├── compile.py ├── database.py ├── parse │ ├── __init__.py │ ├── lexer.py │ ├── parser.py │ └── serialize.py ├── repo.py └── utils │ ├── __init__.py │ └── docker.py ├── main.py ├── match_functions.py ├── mypy.ini ├── purge_folder.py ├── requirements.txt ├── run_decompiler.py ├── scripts ├── decompiler_scripts │ ├── collect.py │ ├── dump_trees.py │ └── util.py ├── entrypoint.sh ├── fake_libc_include │ ├── X11 │ │ ├── Intrinsic.h │ │ ├── Xlib.h │ │ ├── _X11_fake_defines.h │ │ └── _X11_fake_typedefs.h │ ├── _ansi.h │ ├── _fake_defines.h │ ├── _fake_gcc_ext.h │ ├── _fake_typedefs.h │ ├── _syslist.h │ ├── aio.h │ ├── alloca.h │ ├── ar.h │ ├── argz.h │ ├── arpa │ │ └── inet.h │ ├── asm-generic │ │ └── int-ll64.h │ ├── assert.h │ ├── complex.h │ ├── cpio.h │ ├── ctype.h │ ├── dirent.h │ ├── dlfcn.h │ ├── emmintrin.h │ ├── endian.h │ ├── envz.h │ ├── errno.h │ ├── fastmath.h │ ├── fcntl.h │ ├── features.h │ ├── fenv.h │ ├── float.h │ ├── fmtmsg.h │ ├── fnmatch.h │ ├── ftw.h │ ├── getopt.h │ ├── glob.h │ ├── grp.h │ ├── iconv.h │ ├── ieeefp.h │ ├── immintrin.h │ ├── inttypes.h │ ├── iso646.h │ ├── langinfo.h │ ├── libgen.h │ ├── libintl.h │ ├── limits.h │ ├── linux │ │ ├── socket.h │ │ └── version.h │ ├── locale.h │ ├── malloc.h │ ├── math.h │ ├── mir_toolkit │ │ └── client_types.h │ ├── monetary.h │ ├── mqueue.h │ ├── ndbm.h │ ├── net │ │ └── if.h │ ├── netdb.h │ ├── netinet │ │ ├── in.h │ │ └── tcp.h │ ├── newlib.h │ ├── nl_types.h │ ├── openssl │ │ ├── err.h │ │ ├── evp.h │ │ ├── hmac.h │ │ ├── ssl.h │ │ └── x509v3.h │ ├── paths.h │ ├── poll.h │ ├── process.h │ ├── pthread.h │ ├── pwd.h │ ├── reent.h │ ├── regdef.h │ ├── regex.h │ ├── sched.h │ ├── search.h │ ├── semaphore.h │ ├── setjmp.h │ ├── signal.h │ ├── smmintrin.h │ ├── spawn.h │ ├── stdarg.h │ ├── stdbool.h │ ├── stddef.h │ ├── stdint.h │ ├── stdio.h │ ├── stdlib.h │ ├── string.h │ ├── strings.h │ ├── stropts.h │ ├── sys │ │ ├── ioctl.h │ │ ├── ipc.h │ │ ├── mman.h │ │ ├── msg.h │ │ ├── poll.h │ │ ├── resource.h │ │ ├── select.h │ │ ├── sem.h │ │ ├── shm.h │ │ ├── socket.h │ │ ├── stat.h │ │ ├── statvfs.h │ │ ├── sysctl.h │ │ ├── time.h │ │ ├── times.h │ │ ├── types.h │ │ ├── uio.h │ │ ├── un.h │ │ ├── utsname.h │ │ └── wait.h │ ├── syslog.h │ ├── tar.h │ ├── termios.h │ ├── tgmath.h │ ├── time.h │ ├── trace.h │ ├── ulimit.h │ ├── unctrl.h │ ├── unistd.h │ ├── utime.h │ ├── utmp.h │ ├── utmpx.h │ ├── wchar.h │ ├── wctype.h │ ├── wordexp.h │ ├── xcb │ │ └── xcb.h │ └── zlib.h └── mock_path │ ├── batch_make.py │ ├── cc │ ├── clang │ ├── gcc │ ├── install_libraries.py │ ├── pkg-config │ └── sudo └── tests ├── compile_test.py ├── parse_test.py └── repo_test.py /.dockerignore: -------------------------------------------------------------------------------- 1 | */ 2 | !ghcc/ 3 | !scripts/ 4 | !*.py 5 | !requirements.txt 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Example user template 3 | 4 | database-config.json 5 | 6 | # IntelliJ project files 7 | .idea 8 | *.iml 9 | out 10 | gen 11 | 12 | ### Python template 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | pip-wheel-metadata/ 36 | share/python-wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .nox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # celery beat schedule file 106 | celerybeat-schedule 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcc:10.3-buster 2 | 3 | # Install necessary packages. 4 | RUN apt-get update && apt-get install -y --no-install-recommends \ 5 | ca-certificates \ 6 | curl \ 7 | python3-dev 8 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ 9 | python3 get-pip.py 10 | 11 | # Credit: https://denibertovic.com/posts/handling-permissions-with-docker-volumes/ 12 | # Install `gosu` to avoid running as root. 13 | RUN gpg --keyserver keyserver.insect.com --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 14 | RUN curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/1.4/gosu-$(dpkg --print-architecture)" \ 15 | && curl -o /usr/local/bin/gosu.asc -SL "https://github.com/tianon/gosu/releases/download/1.4/gosu-$(dpkg --print-architecture).asc" \ 16 | && gpg --verify /usr/local/bin/gosu.asc \ 17 | && rm /usr/local/bin/gosu.asc \ 18 | && chmod +x /usr/local/bin/gosu 19 | 20 | # Install packages for compilation & ease-of-use. 21 | RUN apt-get install -y --no-install-recommends \ 22 | less \ 23 | vim \ 24 | bmake 25 | RUN apt-get install -y --no-install-recommends \ 26 | binutils-dev \ 27 | bison \ 28 | check \ 29 | dialog \ 30 | flex \ 31 | flite1-dev \ 32 | freeglut3-dev \ 33 | guile-2.0-dev \ 34 | lib3ds-dev \ 35 | liba52-0.7.4-dev \ 36 | libaa1-dev \ 37 | libacl1-dev \ 38 | libaio-dev \ 39 | libao-dev \ 40 | libargtable2-dev \ 41 | libasound2-dev \ 42 | libatlas-base-dev \ 43 | libatm1-dev \ 44 | libattr1-dev \ 45 | libaubio-dev \ 46 | libaudio-dev \ 47 | libaudit-dev \ 48 | libauparse-dev \ 49 | libavcodec-dev \ 50 | libavdevice-dev \ 51 | libavfilter-dev \ 52 | libavresample-dev \ 53 | libavutil-dev \ 54 | libbam-dev \ 55 | libbdd-dev \ 56 | libbluetooth-dev \ 57 | libbluray-dev \ 58 | libboost-regex-dev \ 59 | libboost-serialization-dev \ 60 | libboost-system-dev \ 61 | libboost-thread-dev \ 62 | libbrlapi-dev \ 63 | libbs2b-dev \ 64 | libbsd-dev \ 65 | libbtbb-dev \ 66 | libbwa-dev \ 67 | libcaca-dev \ 68 | libcap-dev \ 69 | libcap-ng-dev \ 70 | libcdb-dev \ 71 | libcdio-cdda-dev \ 72 | libcdio-dev \ 73 | libcdio-paranoia-dev \ 74 | libcfg-dev \ 75 | libcfitsio-dev \ 76 | libchewing3-dev \ 77 | libcjson-dev \ 78 | libcmap-dev \ 79 | libcmph-dev \ 80 | libcodec2-dev \ 81 | libcomedi-dev \ 82 | libconfig-dev \ 83 | libconfuse-dev \ 84 | libcpg-dev \ 85 | libcpufreq-dev \ 86 | libcrack2-dev \ 87 | libcrmcommon-dev \ 88 | libcunit1-dev \ 89 | libcups2-dev \ 90 | libczmq-dev \ 91 | libdbi-dev \ 92 | libdca-dev \ 93 | libdebconfclient0-dev \ 94 | libdebian-installer4-dev \ 95 | libdirectfb-dev \ 96 | libdlm-dev \ 97 | libdlmcontrol-dev \ 98 | libdnet-dev \ 99 | libdrm-dev \ 100 | libdts-dev \ 101 | libdv4-dev \ 102 | libdw-dev \ 103 | libdwarf-dev \ 104 | libedit-dev \ 105 | libelf-dev \ 106 | libenca-dev \ 107 | libepoxy-dev \ 108 | libev-dev \ 109 | libewf-dev \ 110 | libext2fs-dev \ 111 | libf2c2-dev \ 112 | libfaad-dev \ 113 | libfcgi-dev \ 114 | libfdt-dev \ 115 | libfftw3-dev \ 116 | libfiu-dev \ 117 | libflac-dev \ 118 | libfluidsynth-dev \ 119 | libforms-dev \ 120 | libfreecell-solver-dev \ 121 | libfreeimage-dev \ 122 | libfreenect-dev \ 123 | libftdi-dev \ 124 | libftdi1-dev \ 125 | libftgl-dev \ 126 | libftp-dev \ 127 | libfuse-dev \ 128 | libgadu-dev \ 129 | libgbm-dev \ 130 | libgc-dev \ 131 | libgcrypt20-dev \ 132 | libgd-dev \ 133 | libgenometools0-dev \ 134 | libgeoip-dev \ 135 | libgif-dev \ 136 | libgit2-dev \ 137 | libglew-dev \ 138 | libglfw3-dev \ 139 | libgnustep-base-dev \ 140 | libgpac-dev \ 141 | libgpm-dev \ 142 | libgps-dev \ 143 | libgraphicsmagick1-dev \ 144 | libgsl-dev \ 145 | libgsm1-dev \ 146 | libgtkdatabox-dev \ 147 | libharfbuzz-dev \ 148 | libhiredis-dev \ 149 | libiberty-dev \ 150 | libibmad-dev \ 151 | libibnetdisc-dev \ 152 | libibumad-dev \ 153 | libibverbs-dev \ 154 | libidn11-dev \ 155 | libigraph0-dev \ 156 | libiksemel-dev \ 157 | libimlib2-dev \ 158 | libimobiledevice-dev \ 159 | libiniparser-dev \ 160 | libiodbc2-dev \ 161 | libiptc-dev \ 162 | libircclient-dev \ 163 | libiscsi-dev \ 164 | libisl-dev \ 165 | libisns-dev \ 166 | libiso9660-dev \ 167 | libiw-dev \ 168 | libixp-dev \ 169 | libjack-dev \ 170 | libjansson-dev \ 171 | libjbig2dec0-dev \ 172 | libjemalloc-dev \ 173 | libjim-dev \ 174 | libjpgalleg4-dev \ 175 | libjson-c-dev \ 176 | libjudy-dev \ 177 | libkaz-dev \ 178 | libkmod-dev \ 179 | liblapack-dev \ 180 | libldap2-dev \ 181 | libldns-dev \ 182 | libleveldb-dev \ 183 | liblivemedia-dev \ 184 | liblo-dev \ 185 | liblua5.1-0-dev \ 186 | liblua5.2-dev \ 187 | liblua50-dev \ 188 | liblualib50-dev \ 189 | liblz4-dev \ 190 | liblzo2-dev \ 191 | libmad0-dev \ 192 | libmagic-dev \ 193 | libmarkdown2-dev \ 194 | libmatheval-dev \ 195 | libmbedtls-dev \ 196 | libmcrypt-dev \ 197 | libmd-dev \ 198 | libmemcached-dev \ 199 | libmetis-dev \ 200 | libmhash-dev \ 201 | libmicrohttpd-dev \ 202 | libminiupnpc-dev \ 203 | libmlt-dev \ 204 | libmng-dev \ 205 | libmnl-dev \ 206 | libmodbus-dev \ 207 | libmodplug-dev \ 208 | libmowgli-2-dev \ 209 | libmp3lame-dev \ 210 | libmpc-dev \ 211 | libmpcdec-dev \ 212 | libmpfr-dev \ 213 | libmpg123-dev \ 214 | libmtp-dev \ 215 | libmunge-dev \ 216 | libneon27-dev \ 217 | libnet1-dev \ 218 | libnetcdf-dev \ 219 | libnetfilter-conntrack-dev \ 220 | libnetfilter-queue-dev \ 221 | libnetpbm10-dev \ 222 | libnewt-dev \ 223 | libnfnetlink-dev \ 224 | libnids-dev \ 225 | libnl-3-dev \ 226 | libnl-genl-3-dev \ 227 | libnl-nf-3-dev \ 228 | libnlopt-dev \ 229 | libnorm-dev \ 230 | libnotify-dev \ 231 | libnuma-dev \ 232 | liboauth-dev \ 233 | libopenal-dev \ 234 | libopencc-dev \ 235 | libopencore-amrnb-dev \ 236 | libopencore-amrwb-dev \ 237 | libopencv-core-dev \ 238 | libopencv-flann-dev \ 239 | libopencv-imgproc-dev \ 240 | libopenhpi-dev \ 241 | libopenr2-dev \ 242 | libosip2-dev \ 243 | libpam0g-dev \ 244 | libpapi-dev \ 245 | libparted-dev \ 246 | libpcap-dev \ 247 | libpci-dev \ 248 | libpciaccess-dev \ 249 | libpcl1-dev \ 250 | libpcp-pmda3-dev \ 251 | libpcp3-dev \ 252 | libpcsclite-dev \ 253 | libperl-dev \ 254 | libpfm4-dev \ 255 | libpgm-dev \ 256 | libpopt-dev \ 257 | libportmidi-dev \ 258 | libpri-dev \ 259 | libproj-dev \ 260 | libpsl-dev \ 261 | libpth-dev \ 262 | libpulse-dev \ 263 | libpython2.7-dev \ 264 | libqrencode-dev \ 265 | libquicktime-dev \ 266 | libquorum-dev \ 267 | librabbitmq-dev \ 268 | librados-dev \ 269 | librbd-dev \ 270 | librdf0-dev \ 271 | librdkafka-dev \ 272 | librdmacm-dev \ 273 | librrd-dev \ 274 | librtmp-dev \ 275 | libs3-dev \ 276 | libsamplerate0-dev \ 277 | libsasl2-dev \ 278 | libsctp-dev \ 279 | libsdl-gfx1.2-dev \ 280 | libsdl-image1.2-dev \ 281 | libsdl-mixer1.2-dev \ 282 | libsdl-ttf2.0-dev \ 283 | libsdl2-mixer-dev \ 284 | libsdl2-net-dev \ 285 | libsgutils2-dev \ 286 | libshout3-dev \ 287 | libsigsegv-dev \ 288 | libslang2-dev \ 289 | libsmbclient-dev \ 290 | libsmi2-dev \ 291 | libsnappy-dev \ 292 | libsndfile1-dev \ 293 | libsndio-dev \ 294 | libsocks4 \ 295 | libsodium-dev \ 296 | libsoil-dev \ 297 | libspandsp-dev \ 298 | libspectrum-dev \ 299 | libspeex-dev \ 300 | libspeexdsp-dev \ 301 | libspiro-dev \ 302 | libsprng2-dev \ 303 | libsqlite0-dev \ 304 | libss7-dev \ 305 | libssh-dev \ 306 | libssh2-1-dev \ 307 | libst-dev \ 308 | libstrophe-dev \ 309 | libswresample-dev \ 310 | libswscale-dev \ 311 | libsysfs-dev \ 312 | libtalloc-dev \ 313 | libtar-dev \ 314 | libtcc-dev \ 315 | libtcl8.6 \ 316 | libtdb-dev \ 317 | libtheora-dev \ 318 | libtokyocabinet-dev \ 319 | libtokyotyrant-dev \ 320 | libtommath-dev \ 321 | libtonezone-dev \ 322 | libtpm-unseal-dev \ 323 | libtrace3-dev \ 324 | libtre-dev \ 325 | libtrio-dev \ 326 | libtspi-dev \ 327 | libtwolame-dev \ 328 | libucl-dev \ 329 | libudev-dev \ 330 | libunbound-dev \ 331 | libunwind-dev \ 332 | liburcu-dev \ 333 | libusb-1.0-0-dev \ 334 | libusb-dev \ 335 | libusbmuxd-dev \ 336 | libuv1-dev \ 337 | libvdeplug-dev \ 338 | libvdpau-dev \ 339 | libvirt-dev \ 340 | libvncserver-dev \ 341 | libvo-amrwbenc-dev \ 342 | libvorbis-dev \ 343 | libvpx-dev \ 344 | libwavpack-dev \ 345 | libwbclient-dev \ 346 | libwebsockets-dev \ 347 | libwrap0-dev \ 348 | libx264-dev \ 349 | libxaw7-dev \ 350 | libxcb-icccm4-dev \ 351 | libxcb-randr0-dev \ 352 | libxcb-xinerama0-dev \ 353 | libxerces-c-dev \ 354 | libxft-dev \ 355 | libxi-dev \ 356 | libxmltok1-dev \ 357 | libxmu-dev \ 358 | libxnvctrl-dev \ 359 | libxosd-dev \ 360 | libxpm-dev \ 361 | libxtables-dev \ 362 | libxtst-dev \ 363 | libxvidcore-dev \ 364 | libxxf86dga-dev \ 365 | libxxhash-dev \ 366 | libyajl-dev \ 367 | libzdb-dev \ 368 | libzip-dev \ 369 | libzmq3-dev \ 370 | libzstd-dev \ 371 | nasm \ 372 | ocl-icd-opencl-dev \ 373 | opt \ 374 | portaudio19-dev \ 375 | tcl-dev \ 376 | vstream-client-dev 377 | 378 | # Install Python libraries. 379 | COPY requirements.txt /usr/src/ 380 | RUN pip install -r /usr/src/requirements.txt && \ 381 | rm /usr/src/requirements.txt 382 | 383 | # Download convenience scripts. 384 | ENV CUSTOM_PATH="/usr/custom" 385 | RUN mkdir -p $CUSTOM_PATH 386 | RUN curl -sSL https://github.com/shyiko/commacd/raw/v1.0.0/commacd.sh -o $CUSTOM_PATH/.commacd.sh 387 | 388 | # Create entrypoint that sets appropriate group and user IDs. 389 | COPY scripts/entrypoint.sh /usr/local/bin/entrypoint.sh 390 | RUN chmod +x /usr/local/bin/entrypoint.sh 391 | ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] 392 | 393 | # Copy `ghcc` files into image, and set PYTHONPATH and PATH. 394 | COPY ghcc/ $CUSTOM_PATH/ghcc/ 395 | COPY scripts/ $CUSTOM_PATH/scripts/ 396 | ENV PATH="$CUSTOM_PATH/scripts/mock_path:$PATH" 397 | ENV PYTHONPATH="$CUSTOM_PATH/:$PYTHONPATH" 398 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GitHub Cloner & Compiler 2 | 3 | This project serves as the data collection process for training neural decompilers, such as 4 | [CMUSTRUDEL/DIRE](https://github.com/CMUSTRUDEL/DIRE). 5 | 6 | The code for compilation is adapted from 7 | [bvasiles/decompilationRenaming](https://github.com/bvasiles/decompilationRenaming). The code for decompilation is 8 | adapted from [CMUSTRUDEL/DIRE](https://github.com/CMUSTRUDEL/DIRE). 9 | 10 | 11 | ## Setup 12 | 13 | 1. Install [Docker](https://docs.docker.com/install/) and [MongoDB](https://docs.mongodb.com/manual/installation/). 14 | 2. Install required Python packages by: 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 3. Rename `database-config-example.json` to `database-config.json`, and fill in appropriate values. This will be used 19 | to connect to your MongoDB server. 20 | 4. Build the Docker image used for compiling programs by: 21 | ```bash 22 | docker build -t gcc-custom . 23 | ``` 24 | 25 | 26 | ## Usage 27 | 28 | ### Running the Compiler 29 | 30 | You will need a list of GitHub repository URLs to run the code. The current code expects one URL per line, for example: 31 | ``` 32 | https://github.com/huzecong/ghcc.git 33 | https://www.github.com/torvalds/linux 34 | FFmpeg/FFmpeg 35 | https://api.github.com/repos/pytorch/pytorch 36 | ``` 37 | 38 | To run, simply execute: 39 | ```bash 40 | python main.py --repo-list-file path/to/your/list [arguments...] 41 | ``` 42 | 43 | The following arguments are supported: 44 | 45 | - `--repo-list-file [path]`: Path to the list of repository URLs. 46 | - `--clone-folder [path]`: The temporary directory to store cloned repository files. Defaults to `repos/`. 47 | - `--binary-folder [path]`: The directory to store compiled binaries. Defaults to `binaries/`. 48 | - `--archive-folder [path]`: The directory to store archived repository files. Defaults to `archives/`. 49 | - `--n-procs [int]`: Number of worker processes to spawn. Defaults to 0 (single-process execution). 50 | - `--log-file [path]`: Path to the log file. Defaults to `log.txt`. 51 | - `--clone-timeout [int]`: Maximum cloning time (seconds) for one repository. Defaults to 600 (10 minutes). 52 | - `--force-reclone`: If specified, all repositories are cloned regardless of whether it has been processed before or 53 | whether an archived version exists. 54 | - `--compile-timeout [int]`: Maximum compilation time (seconds) for all Makefiles under a repository. Defaults to 900 55 | (15 minutes). 56 | - `--force-recompile`: If specified, all repositories are compiled regardless of whether is has been processed before. 57 | - `--docker-batch-compile`: Batch compile all Makefiles in one repository using one Docker invocation. This is on by 58 | default, and you almost always want this. Use the `--no-docker-batch-compile` flag to disable it. 59 | - `--compression-type [str]`: Format of the repository archive, available options are `gzip` (faster) and `xz` 60 | (smaller). Defaults to `gzip`. 61 | - `--max-archive-size [int]`: Maximum size (bytes) of repositories to archive. Repositories with greater sizes will not 62 | be archived. Defaults to 104,857,600 (100MB). 63 | - `--record-libraries [path]`: If specified, a list of libraries used during failed compilations will be written to the 64 | specified path. See [Collecting and Installing Libraries](#collecting-and-installing-libraries) for details. 65 | - `--logging-level [str]`: The logging level. Defaults to `info`. 66 | - `--max-repos [int]`: If specified, only the first `max_repos` repositories from the list will be processed. 67 | - `--recursive-clone`: If specified, submodules in the repository will also be cloned if exists. This is on by default. 68 | Use the `--no-recursive-clone` flag to disable it. 69 | - `--write-db`: If specified, compilation results will be written to database. This is on by default. Use the 70 | `--no-write-db` flag to disable it. 71 | - `--record-metainfo`: If specified, additional statistics will be recorded. 72 | - `--gcc-override-flags`: If specified, these are passed as compiler flags to GCC. By default `-O0` is used. 73 | 74 | ### Utilities 75 | 76 | - If compilation is interrupted, there may be leftovers that cannot be removed due to privilege issues. Purge them by: 77 | ```bash 78 | ./purge_folder.py /path/to/clone/folder 79 | ``` 80 | This is because intermediate files are created under different permissions, and we need root privileges (sneakily 81 | obtained via Docker) to purge those files. This is also performed at the beginning of the `main.py` script. 82 | - If something messed up seriously, drop the database by: 83 | ```bash 84 | python -m ghcc.database clear 85 | ``` 86 | - If the code is modified, remember to rebuild the image since the `batch_make.py` script (executed inside Docker to 87 | compile Makefiles) depends on the library code. If you don't do so, well, GHCC will remind you and refuse to proceed. 88 | 89 | ### Running the Decompiler 90 | 91 | Decompilation requires an active installation of IDA with the Hex-Rays plugin. To run, simply execute: 92 | ```bash 93 | python run_decompiler.py --ida path/to/idat64 [arguments...] 94 | ``` 95 | 96 | The following arguments are supported: 97 | 98 | - `--ida [path]`: Path to the `idat64` executable found under the IDA installation folder. 99 | - `--binaries-dir [path]`: The directory where binaries are stored, i.e. the same value for `--binary-folder` in the 100 | compilation arguments. Defaults to `binaries/`. 101 | - `--output-dir [path]`: The directory to store decompiled code. Defaults to `decompile_output/`. 102 | - `--log-file [path]`: Path to the log file. Defaults to `decompile-log.txt`. 103 | - `--timeout [int]`: Maximum decompilation time (seconds) for one binary. Defaults to 30. 104 | - `--n-procs [int]`: Number of worker processes to spawn. Defaults to 0 (single-process execution). 105 | 106 | 107 | ## Advanced Topics 108 | 109 | ### Heuristics for Compilation 110 | 111 | The following procedure happens when compiling a Makefile: 112 | 113 | 1. **Check if directory is "make"-able:** A directory is marked as "make"-able if it contains (case-insensitively) at 114 | least one set of files among the following: 115 | 116 | - *(Make)* `Makefile` 117 | - *(automake)* `Makefile.am` 118 | 119 | If the directory is not "make"-able, skip the following steps. 120 | 121 | 2. **Clean Git repository:** 122 | 123 | ```bash 124 | git reset --hard # reset modified files 125 | git clean -xffd # clean unversioned files 126 | # do the same for submodules 127 | git submodule foreach --recursive git reset --hard 128 | git submodule foreach --recursive git clean -xffd 129 | ``` 130 | 131 | If any command fails, ignore it and continue executing the rest. 132 | 133 | 3. **Build:** 134 | 135 | 1. If exists a file named `Makefile.am`, run `automake`: 136 | 137 | ```bash 138 | autoreconf && automake --add-missing 139 | ``` 140 | 141 | 2. If exists a file named `configure`, run the configuration script: 142 | 143 | ```bash 144 | chmod +x ./configure && ./configure --disable-werror 145 | ``` 146 | 147 | The `--disable-werror` prevents warnings being treated as errors in cases where `-Werror` is specified. 148 | 149 | If command fails within 2 seconds, try again without `--disable-werror`. 150 | 151 | 3. Run `make`: 152 | 153 | ```bash 154 | make --always-make --keep-going -j1 155 | ``` 156 | 157 | The `--always-make` flag rebuilds all dependent targets even if they exist. The `--keep-going` flag allows Make to 158 | continue for targets if errors occur in non-dependent targets. 159 | 160 | If command fails within 2 seconds and the output contains `"Missing separator"`, try again with `bmake` 161 | *(BSD Make)*. 162 | 163 | **Note:** We override certain program with our "wrapped" versions by modifying the `PATH` variable. The list of 164 | wrapped programs are: 165 | 166 | - **GCC:** (`gcc`, `cc`, `clang`) Swallows unnecessary and/or error-prone flags (`-Werror`, `-march`, 167 | `-mlittle-endian`), records libraries used (`-l`), overrides the optimization level (`-O0`), adds override flags 168 | specified in the arguments, and calls the real GCC. If the real GCC fails, writes the libraries to a predefined 169 | path. 170 | - **`sudo`:** Does not prompt for the password, but instead just tries to execute the command without privileges. 171 | - **`pkg-config`:** Records libraries used, and calls the real `pkg-config`. If it fails (meaning packages cannot 172 | be resolved), write the libraries to a predefined path. 173 | 174 | ### Collecting and Installing Libraries 175 | 176 | Most repositories require linking to external libraries. To collect libraries that are linked to in Makefiles, run the 177 | script with the flag `--record-libraries path/to/library_log.txt`. Only libraries in commands that failed to execute 178 | (GCC return code is non-zero) are recorded in the log file. 179 | 180 | After gathering the library log, run `install_libraries.py path/to/library_log.txt` to resolve libraries to package 181 | names (based on `apt-cache`). This step requires actually installing packages, so it's recommended to run it in a Docker 182 | environment: 183 | ```bash 184 | docker run --rm \ 185 | -v /absolute/path/to/directory/:/usr/src/ \ 186 | gcc-custom \ 187 | "install_libraries.py /usr/src/library_log.txt" 188 | ``` 189 | This gives a list of packages to install. Add the list of packages to `Dockerfile` (the command that begins with 190 | `RUN apt-get install -y --no-install-recommends`) and rebuild the image to apply changes. 191 | 192 | ### Notes on Docker Safety 193 | 194 | Compiling random code from GitHub is basically equivalent to running `curl | bash`, and doing so in Docker would be like 195 | `curl | sudo bash` as Docker (by default) doesn't protect you against kernel panics and fork bombs. The following notes 196 | describe what is done to (partly) ensure safety of the host machine when compiling code. 197 | 198 | 1. Never run Docker as root. This means two things: 1) don't use `sudo docker run ...`, and 2) don't execute commands in 199 | Docker as the root user (default). The first goal can be achieved by create a `docker` user group, and the second 200 | can be achieved using a special entry-point: create a non-privileged user and use `gosu` to switch to that user and 201 | run commands. 202 | 203 | **Caveats:** When creating the non-privileged user, assign the same UID (user ID) or GID (group ID) as the host user, 204 | so files created inside the container can be accessed/modified by the host user. 205 | 206 | 2. Limit the number of processes. This is to prevent things like fork bombs or badly written recursive Makefiles from 207 | taking up the kernel memory. A simple solution is to use `ulimit -u ` to set the maximum allowed number of 208 | processes, but such limits are on a per-user basis instead of a per-container or per-process-tree basis. 209 | 210 | What we can do is: for each container we spawn, create a user that has the same GID as the host user, but with a 211 | distinct UID, and call `ulimit` for that user. This serves as a workaround for per-container limits. 212 | 213 | Don't forget to `chmod g+w` for files that need to be accessed from host. 214 | -------------------------------------------------------------------------------- /analyze_fails.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import os 4 | import random 5 | import re 6 | from collections import defaultdict 7 | from typing import Dict, List, Tuple 8 | 9 | import flutes 10 | import numpy as np 11 | from IPython import embed 12 | from tqdm import tqdm 13 | 14 | import ghcc 15 | 16 | # tag -> (date_time, value_at_time) 17 | InfoDict = Dict[str, List[Tuple[str, int]]] 18 | TAGS = ["n_partial", "n_binaries", "n_total"] 19 | 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("log_file") 22 | args = parser.parse_args() 23 | 24 | 25 | def all_equal(xs) -> bool: 26 | r"""Returns whether all elements in the list :attr:`xs` are equal.""" 27 | return not xs or all(x == xs[0] for x in xs[1:]) 28 | 29 | 30 | def changed_repos(repo_info: Dict[str, InfoDict]) -> Dict[str, InfoDict]: 31 | r"""Filters out the repositories in the InfoDict such that not all recorded values are the same.""" 32 | changed = {} 33 | for name, info in repo_info.items(): 34 | if any(not all_equal([v for _, v in vals]) for vals in info.values()): 35 | changed[name] = info 36 | return changed 37 | 38 | 39 | def analyze_logs(path: str) -> Dict[str, InfoDict]: 40 | r"""Reads and parse the compilation log generated by ``main.py``, and returns information for each repository.""" 41 | with open(path, "r") as f: 42 | logs = f.read().split("\n") 43 | 44 | repo_info: Dict[str, InfoDict] = defaultdict(lambda: {tag: [] for tag in TAGS}) 45 | regex = re.compile(r"(?P[0-9-]{10} [0-9:]{8}),\d{3} \w+: " 46 | r"(?P\d+) \((?P\d+)\) out of (?P\d+) Makefile\(s\) in " 47 | r"(?P\w+)/(?P\w+) compiled \(partially\), " 48 | r"yielding (?P\d+) binaries") 49 | for idx, line in enumerate(logs): 50 | match = regex.search(line) 51 | if match is not None: 52 | repo_owner, repo_name = match.group("repo_owner"), match.group("repo_name") 53 | repo_full_name = f"{repo_owner}/{repo_name}" 54 | date_time = match.group("date_time") 55 | for tag in TAGS: 56 | value = int(match.group(tag)) 57 | repo_info[repo_full_name][tag].append((date_time, value)) 58 | return repo_info 59 | 60 | 61 | def main(): 62 | flutes.register_ipython_excepthook() 63 | random.seed(ghcc.__MAGIC__) 64 | np.random.seed(ghcc.__MAGIC__) 65 | 66 | repo_info = analyze_logs(args.log_file) 67 | changed = changed_repos(repo_info) 68 | 69 | # Sample 100 failed repositories. 70 | repos_with_fail = [repo for repo, info in repo_info.items() 71 | if info["n_partial"][-1] < info["n_total"][-1]] 72 | samples = np.random.choice(len(repos_with_fail), 100, replace=False) 73 | _repo_samples = [repos_with_fail[x] for x in samples] 74 | 75 | # Remove repositories with more than 50 Makefiles. 76 | repo_samples = [] 77 | for repo in _repo_samples: 78 | _, val = repo_info[repo]["n_total"][-1] 79 | if val <= 50: 80 | repo_samples.append(repo) 81 | else: 82 | print(f"{repo} contains {val} Makefiles, skipping") 83 | 84 | # Clone the repositories. 85 | for repo in tqdm(repo_samples, desc="Cloning repos"): 86 | owner, name = repo.split("/") 87 | ghcc.clone(owner, name, "test_compile") 88 | 89 | # Write repository information into a CSV file. 90 | # Each line is a separate Makefile. 91 | db = ghcc.RepoDB() 92 | with open("repo_samples.csv", "w") as f: 93 | writer = csv.writer(f) 94 | writer.writerow(["Repo", "Makefile", "Status", "Failed Reason?"]) 95 | 96 | for repo in tqdm(repo_samples, desc="Writing CSV"): 97 | makefiles = ghcc.find_makefiles(os.path.join("test_compile", repo)) 98 | owner, name = repo.split("/") 99 | entry = db.get(owner, name) 100 | success_makefiles = set() 101 | for makefile_info in entry['makefiles']: 102 | directory = makefile_info["directory"] 103 | directory = "/".join([owner, name] + directory.split("/")[4:]) 104 | success_makefiles.add(directory) 105 | for makefile in makefiles: 106 | directory = "/".join(makefile.split("/")[1:]) 107 | status = "" if directory in success_makefiles else "Failed" 108 | writer.writerow([repo, directory, status]) 109 | print(repo, directory, status) 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /compare_logs.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | import re 3 | import sys 4 | from collections import defaultdict 5 | from typing import Dict, Tuple 6 | 7 | import flutes 8 | 9 | DiffDict = Dict[str, Tuple[int, int]] 10 | 11 | TAGS = ["n_success", "n_total"] 12 | 13 | 14 | def parse_logs(path: str) -> Dict[str, Dict[str, int]]: 15 | r"""Reads and parse the compilation log generated by ``main.py``, and returns information for each repository.""" 16 | with open(path, "r") as f: 17 | logs = f.read().split("\n") 18 | 19 | repo_info: Dict[str, Dict[str, int]] = defaultdict(dict) 20 | regex_success = re.compile(r"(?P[0-9-]{10} [0-9:]{8}),\d{3} \w+: \(Worker \s*\d+\) " 21 | r"(?P\d+) \((?P\d+)\) out of (?P\d+) Makefile\(s\) in " 22 | r"(?P\S+?)/(?P\S+?) compiled \(partially\), " 23 | r"yielding (?P\d+) binaries") 24 | regex_no_mkfile = re.compile(r"(?P[0-9-]{10} [0-9:]{8}),\d{3} \w+: \(Worker \s*\d+\) " 25 | r"No Makefiles found in (?P\S+?)/(?P\S+?), " 26 | r"repository deleted") 27 | for idx, line in enumerate(logs): 28 | match = regex_success.search(line) 29 | if match is not None: 30 | repo_owner, repo_name = match.group("repo_owner"), match.group("repo_name") 31 | repo_full_name = f"{repo_owner}/{repo_name}" 32 | for tag in TAGS: 33 | value = int(match.group(tag)) 34 | repo_info[repo_full_name][tag] = value 35 | else: 36 | match = regex_no_mkfile.search(line) 37 | if match is not None: 38 | repo_owner, repo_name = match.group("repo_owner"), match.group("repo_name") 39 | repo_full_name = f"{repo_owner}/{repo_name}" 40 | for tag in TAGS: 41 | repo_info[repo_full_name][tag] = 0 42 | return repo_info 43 | 44 | 45 | def compare_logs(info_old: Dict[str, Dict[str, int]], info_new: Dict[str, Dict[str, int]]) -> Dict[str, DiffDict]: 46 | for repo_name in info_new: 47 | if repo_name not in info_old: 48 | flutes.log(f"{repo_name} missing in OLD log", "error") 49 | repo_diff: Dict[str, DiffDict] = defaultdict(dict) 50 | for repo_name in info_old: 51 | if repo_name not in info_new: 52 | flutes.log(f"{repo_name} missing in NEW", "error") 53 | continue 54 | old_repo_info = info_old[repo_name] 55 | new_repo_info = info_new[repo_name] 56 | difference = [] 57 | for tag in TAGS: 58 | old_val = old_repo_info[tag] 59 | new_val = new_repo_info[tag] 60 | if old_val != new_val: 61 | difference.append(f"{tag} {old_val}->{new_val}") 62 | repo_diff[repo_name][tag] = (old_val, new_val) 63 | if len(difference) > 0: 64 | flutes.log(f"{repo_name}: {', '.join(difference)}") 65 | return repo_diff 66 | 67 | 68 | def main(): 69 | info_old = parse_logs(sys.argv[1]) 70 | info_new = parse_logs(sys.argv[2]) 71 | print(f"Old size: {len(info_old)}, New size: {len(info_new)}") 72 | repo_diff = compare_logs(info_old, info_new) 73 | 74 | for tag in TAGS: 75 | print(tag) 76 | pprint.pprint({ 77 | repo_name: diff[tag] 78 | for repo_name, diff in repo_diff.items() 79 | if tag in diff and diff[tag][0] > diff[tag][1] 80 | }) 81 | 82 | 83 | if __name__ == '__main__': 84 | main() 85 | -------------------------------------------------------------------------------- /database-config-example.json: -------------------------------------------------------------------------------- 1 | { 2 | "host": "localhost", 3 | "port": 27017, 4 | "auth_db_name": "auth_db", 5 | "db_name": "my_db", 6 | "username": "username", 7 | "password": "password" 8 | } 9 | -------------------------------------------------------------------------------- /ghcc/__init__.py: -------------------------------------------------------------------------------- 1 | from .compile import * 2 | from .database import * 3 | from .repo import * 4 | from . import parse 5 | from . import utils 6 | 7 | __MAGIC__ = 19260817 8 | -------------------------------------------------------------------------------- /ghcc/compile.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import pickle 4 | import shutil 5 | import subprocess 6 | import time 7 | from enum import Enum, auto 8 | from typing import Callable, Dict, Iterator, List, NamedTuple, Optional 9 | 10 | from flutes.run import run_command 11 | 12 | from .database import RepoDB 13 | from .repo import clean 14 | from .utils.docker import run_docker_command 15 | 16 | MOCK_PATH = os.path.abspath(os.path.join(os.path.split(__file__)[0], "..", "..", "scripts", "mock_path")) 17 | 18 | ELF_FILE_TAG = b"ELF" # Linux 19 | 20 | __all__ = [ 21 | "contains_files", 22 | "find_makefiles", 23 | "CompileErrorType", 24 | "CompileResult", 25 | "unsafe_make", 26 | "docker_make", 27 | "compile_and_move", 28 | "docker_batch_compile", 29 | ] 30 | 31 | 32 | def contains_files(path: str, names: List[str]) -> bool: 33 | r"""Check (non-recursively) whether the directory contains at least one file with an acceptable name 34 | (case-insensitive). 35 | 36 | :param path: The directory to check for. 37 | :param names: List of acceptable names. Note that all names must be in lowercase! 38 | :return: Whether the check succeeded. 39 | """ 40 | for file in os.listdir(path): 41 | if file.lower() in names and os.path.isfile(os.path.join(path, file)): 42 | return True 43 | return False 44 | 45 | 46 | def find_makefiles(path: str) -> List[str]: 47 | r"""Find all subdirectories under the given directory that contains Makefiles. 48 | 49 | :param path: Path to the directory to scan. 50 | :return: A list of absolute paths to subdirectories that contain Makefiles. 51 | """ 52 | directories = [] 53 | for subdir, dirs, files in os.walk(path): 54 | if contains_files(subdir, ["makefile"]): 55 | directories.append(subdir) 56 | return directories 57 | 58 | 59 | class CompileErrorType(Enum): 60 | Timeout = auto() 61 | CompileFailed = auto() 62 | Unknown = auto() 63 | 64 | 65 | class CompileResult(NamedTuple): 66 | success: bool 67 | elf_files: List[str] # list of paths to ELF files 68 | error_type: Optional[CompileErrorType] = None 69 | captured_output: Optional[str] = None 70 | 71 | 72 | def _create_result(success: bool = False, elf_files: Optional[List[str]] = None, 73 | error_type: Optional[CompileErrorType] = None, 74 | captured_output: Optional[str] = None) -> CompileResult: 75 | if elf_files is None: 76 | elf_files = [] 77 | return CompileResult(success, elf_files=elf_files, error_type=error_type, captured_output=captured_output) 78 | 79 | 80 | def _check_elf_fn(directory: str, file: str) -> bool: 81 | r"""Checks whether the specified file is a binary file. 82 | 83 | :param directory: The directory containing the Makefile. 84 | :param file: The path to the file to check, relative to the directory. 85 | :return: If ``True``, the file is a binary (ELF) file. 86 | """ 87 | path = os.path.join(directory, file) 88 | output = subprocess.check_output(["file", path], timeout=10) 89 | output = output[len(path):] # first part is file name 90 | return ELF_FILE_TAG in output 91 | 92 | 93 | def _make_skeleton(directory: str, timeout: Optional[float] = None, 94 | env: Optional[Dict[str, str]] = None, 95 | verbose: bool = True, 96 | *, make_fn, 97 | check_file_fn: Callable[[str, str], bool] = _check_elf_fn) -> CompileResult: 98 | r"""A composable routine for different compilation methods. Different routines can be composed by specifying 99 | different ``make_fn``\ s and ``check_file_fn``\ s. 100 | 101 | :param directory: The directory containing the Makefile. 102 | :param timeout: Maximum compilation time. 103 | :param env: A dictionary of environment variables. 104 | :param verbose: If ``True``, print out executed commands and outputs. 105 | :param make_fn: The function to call for compilation. The function takes as input variables ``directory``, 106 | ``timeout``, and ``env``. 107 | :param check_file_fn: A function to determine whether a generated file should be collected, i.e., whether it is a 108 | binary file. The function takes as input variables ``directory`` and ``file``, where ``file`` is the path of the 109 | file to check, relative to ``directory``. Defaults to :meth:`_check_elf_fn`, which checks whether the file is an 110 | ELF file. 111 | """ 112 | directory = os.path.abspath(directory) 113 | 114 | try: 115 | # Clean unversioned files by previous compilations. 116 | clean(directory) 117 | 118 | # Call the actual function for `make`. 119 | make_fn(directory, timeout=timeout, env=env, verbose=verbose) 120 | result = _create_result(True) 121 | 122 | except subprocess.TimeoutExpired as e: 123 | # Even if exceptions occur, we still check for ELF files, just in case. 124 | result = _create_result(error_type=CompileErrorType.Timeout, captured_output=e.output) 125 | except subprocess.CalledProcessError as e: 126 | result = _create_result(error_type=CompileErrorType.CompileFailed, captured_output=e.output) 127 | except OSError as e: 128 | result = _create_result(error_type=CompileErrorType.Unknown, captured_output=str(e)) 129 | 130 | try: 131 | # Use Git to find all unversioned files -- these would be the products of compilation. 132 | output = run_command(["git", "ls-files", "--others"], cwd=directory, 133 | timeout=timeout, return_output=True).captured_output 134 | assert output is not None 135 | diff_files = [ 136 | # files containing escape characters are in quotes 137 | file if file[0] != '"' else file[1:-1] 138 | for file in output.decode('unicode_escape').split("\n") if file] # file names could contain spaces 139 | 140 | # Inspect each file and find ELF files. 141 | for file in diff_files: 142 | if check_file_fn(directory, file): 143 | result.elf_files.append(file) 144 | except subprocess.TimeoutExpired as e: 145 | return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Timeout, captured_output=e.output) 146 | except subprocess.CalledProcessError as e: 147 | return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Unknown, captured_output=e.output) 148 | except OSError as e: 149 | return _create_result(elf_files=result.elf_files, error_type=CompileErrorType.Unknown, captured_output=str(e)) 150 | 151 | return result 152 | 153 | 154 | def _unsafe_make(directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, 155 | verbose: bool = False) -> None: 156 | env = {"PATH": f"{MOCK_PATH}:{os.environ['PATH']}", **(env or {})} 157 | # Try GNU Automake first. Note that errors are ignored because it's possible that the original files still work. 158 | if contains_files(directory, ["configure.ac", "configure.in"]): 159 | start_time = time.time() 160 | if os.path.isfile(os.path.join(directory, "autogen.sh")): 161 | # Some projects with non-trivial build instructions provide an "autogen.sh" script. 162 | run_command(["chmod", "+x", "./autogen.sh"], env=env, cwd=directory, verbose=verbose) 163 | run_command(["./autogen.sh"], env=env, cwd=directory, timeout=timeout, verbose=verbose, ignore_errors=True) 164 | else: 165 | run_command(["autoreconf", "--force", "--install"], 166 | env=env, cwd=directory, timeout=timeout, ignore_errors=True, verbose=verbose) 167 | end_time = time.time() 168 | if timeout is not None: 169 | timeout = max(1.0, timeout - int(end_time - start_time)) 170 | 171 | # Try running `./configure` if it exists. 172 | if os.path.isfile(os.path.join(directory, "configure")): 173 | start_time = time.time() 174 | run_command(["chmod", "+x", "./configure"], env=env, cwd=directory, verbose=verbose) 175 | ret = run_command(["./configure", "--disable-werror"], env=env, cwd=directory, timeout=timeout, 176 | verbose=verbose, ignore_errors=True) 177 | end_time = time.time() 178 | if ret.return_code != 0 and end_time - start_time <= 2: 179 | # The configure file might not support `--disable-werror` and died instantly. Try again without the flag. 180 | run_command(["./configure"], env=env, cwd=directory, timeout=timeout, verbose=verbose) 181 | end_time = time.time() 182 | if timeout is not None: 183 | timeout = max(1.0, timeout - int(end_time - start_time)) 184 | 185 | # Make while ignoring errors. 186 | # `-B/--always-make` could give strange errors for certain Makefiles, e.g. ones containing "%:" 187 | try: 188 | run_command(["make", "--keep-going", "-j1"], env=env, cwd=directory, timeout=timeout, verbose=verbose) 189 | except subprocess.CalledProcessError as err: 190 | expected_msg = b"missing separator" 191 | if not (err.output is not None and expected_msg in err.output): 192 | raise err 193 | else: 194 | # Try again using BSD Make instead of GNU Make. Note BSD Make does not have a flag equivalent to 195 | # `-B/--always-make`. 196 | run_command(["bmake", "-k", "-j1"], env=env, cwd=directory, timeout=timeout, verbose=verbose) 197 | 198 | 199 | def unsafe_make(directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, 200 | verbose: bool = False) -> CompileResult: 201 | r"""Run ``make`` in the given directory and collect compilation outputs. 202 | 203 | .. warning:: 204 | This will run ``make`` on your physical machine under the same privilege granted to the Python program. 205 | Never run programs from unvalidated sources as malicious programs could break your system. 206 | 207 | :param directory: Path to the directory containing the Makefile. 208 | :param timeout: Maximum time allowed for compilation, in seconds. Defaults to ``None`` (unlimited time). 209 | :param env: The environment variables to use when calling ``make``. 210 | :param verbose: If ``True``, print out executed commands and outputs. 211 | :return: An instance of :class:`CompileResult` indicating the result. Fields ``success`` and ``elf_files`` are not 212 | ``None``. 213 | 214 | - If compilation failed, the fields ``error_type`` and ``captured_output`` are also not ``None``. 215 | """ 216 | return _make_skeleton(directory, timeout, env, verbose, make_fn=_unsafe_make) 217 | 218 | 219 | def _docker_make(directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, 220 | verbose: bool = False) -> None: 221 | if os.path.isfile(os.path.join(directory, "configure")): 222 | # Try running `./configure` if it exists. 223 | run_docker_command("chmod +x configure && ./configure && make --keep-going -j1", 224 | user=0, cwd="/usr/src", directory_mapping={directory: "/usr/src"}, 225 | timeout=timeout, shell=True, env=env, verbose=verbose) 226 | else: 227 | # Make while ignoring errors. 228 | # `-B/--always-make` could give strange errors for certain Makefiles, e.g. ones containing "%:" 229 | run_docker_command(["make", "--keep-going", "-j1"], 230 | user=0, cwd="/usr/src", directory_mapping={directory: "/usr/src"}, 231 | timeout=timeout, env=env, verbose=verbose) 232 | 233 | 234 | def docker_make(directory: str, timeout: Optional[float] = None, env: Optional[Dict[str, str]] = None, 235 | verbose: bool = False) -> CompileResult: 236 | r"""Run ``make`` within Docker and collect compilation outputs. 237 | 238 | .. note:: 239 | The ``gcc-custom`` Docker image is used. You can build this using the Dockerfile under the root directory. 240 | 241 | .. warning:: 242 | It is only possible to run one bash command in a (non-interactive) Docker session, the compilation heuristics 243 | used here is the original version. It is recommended to call `batch_make.py` instead of relying on this method. 244 | 245 | :param directory: Path to the directory containing the Makefile. 246 | :param timeout: Maximum time allowed for compilation, in seconds. Defaults to ``None`` (unlimited time). 247 | :param env: The environment variables to use when calling ``make``. 248 | :param verbose: If ``True``, print out executed commands and outputs. 249 | :return: An instance of :class:`CompileResult` indicating the result. Fields ``success`` and ``elf_files`` are not 250 | ``None``. 251 | 252 | - If compilation failed, the fields ``error_type`` and ``captured_output`` are also not ``None``. 253 | """ 254 | return _make_skeleton(directory, timeout, env, verbose, make_fn=_docker_make) 255 | 256 | 257 | def _hash_file_sha256(directory: str, path: str) -> str: 258 | r"""Generate the SHA256 hash signature of the file located at the specified path. 259 | 260 | :param path: Path to the file to compute signature for. 261 | :return: The SHA256 signature. 262 | """ 263 | path = os.path.join(directory, path) 264 | hash_obj = hashlib.sha256() 265 | with open(path, "rb") as f: 266 | hash_obj.update(f.read()) 267 | return hash_obj.hexdigest() 268 | 269 | 270 | def compile_and_move(repo_binary_dir: str, repo_path: str, makefile_dirs: List[str], 271 | compile_timeout: Optional[float] = None, record_libraries: bool = False, 272 | gcc_override_flags: Optional[str] = None, 273 | compile_fn=docker_make, hash_fn: Callable[[str, str], str] = _hash_file_sha256) \ 274 | -> Iterator[RepoDB.MakefileEntry]: 275 | r"""Compile all Makefiles as provided, and move generated binaries to the binary directory. 276 | 277 | :param repo_binary_dir: Path to the directory where generated binaries for the repository will be stored. 278 | :param repo_path: Path to the repository. 279 | :param makefile_dirs: A list of all subdirectories containing Makefiles. 280 | :param compile_timeout: Maximum time allowed for compilation of all Makefiles, in seconds. Defaults to ``None`` 281 | (unlimited time). 282 | :param record_libraries: If ``True``, A file named ``libraries.txt`` will be generated under 283 | :attr:`repo_binary_dir`, recording the libraries used in compilation. Defaults to ``False``. 284 | :param gcc_override_flags: If not ``None``, these flags will be appended to each invocation of GCC. 285 | :param compile_fn: The method to call for compilation. Possible values are :meth:`ghcc.unsafe_make` and 286 | :meth:`ghcc.docker_make` (default). 287 | :param hash_fn: The method to call to generate a hash signature for collected binaries. The binaries will be moved 288 | to :attr:`repo_binary_dir` and renamed to the generated hash signature. The function takes as input variables 289 | ``directory`` and ``file``, where ``directory`` is the path of the directory containing the Makefile, and 290 | ``file`` is the path of the binary, relative to ``directory``. 291 | :return: A list of Makefile compilation results. 292 | """ 293 | env = {} 294 | if record_libraries: 295 | env["MOCK_GCC_LIBRARY_LOG"] = os.path.join(repo_binary_dir, "libraries.txt") 296 | if gcc_override_flags is not None: 297 | env["MOCK_GCC_OVERRIDE_FLAGS"] = gcc_override_flags 298 | remaining_time = compile_timeout 299 | for make_dir in makefile_dirs: 300 | if remaining_time is not None and remaining_time <= 0.0: 301 | break 302 | start_time = time.time() 303 | compile_result = compile_fn(make_dir, timeout=remaining_time, env=env) 304 | elapsed_time = time.time() - start_time 305 | if remaining_time is not None: 306 | remaining_time -= elapsed_time 307 | # Only record Makefiles that either successfully compiled or yielded binaries. 308 | # Successful compilations might not generate binaries, while failed compilations may also yield binaries. 309 | if len(compile_result.elf_files) > 0 or compile_result.success: 310 | hashes: List[str] = [] 311 | for path in compile_result.elf_files: 312 | signature = hash_fn(make_dir, path) 313 | hashes.append(signature) 314 | full_path = os.path.join(make_dir, path) 315 | shutil.move(full_path, os.path.join(repo_binary_dir, signature)) 316 | yield { 317 | "directory": make_dir, 318 | "success": compile_result.success, 319 | "binaries": compile_result.elf_files, 320 | "sha256": hashes, 321 | } 322 | clean(repo_path) 323 | 324 | 325 | def docker_batch_compile(repo_binary_dir: str, repo_path: str, 326 | compile_timeout: Optional[float] = None, record_libraries: bool = False, 327 | gcc_override_flags: Optional[str] = None, 328 | use_makefile_info_pkl: bool = False, verbose: bool = False, 329 | user_id: Optional[int] = None, directory_mapping: Optional[Dict[str, str]] = None, 330 | exception_log_fn=None) -> List[RepoDB.MakefileEntry]: 331 | r"""Run batch compilation in Docker. 332 | 333 | :param repo_binary_dir: Path to store collected binaries. 334 | :param repo_path: Path to the code repository. 335 | :param compile_timeout: Timeout for compilation. 336 | :param record_libraries: If ``True``, libraries used during compilation are written under 337 | ``repo_binary_dir/libraries.txt``. 338 | :param gcc_override_flags: Additional flags to pass to GCC during compilation. 339 | :param use_makefile_info_pkl: If ``True``, the caller must prepare a file named ``makefiles.pkl`` under 340 | ``repo_binary_dir``, that contains a pickled object of type ``Dict[str, Dict[str, str]]``, that maps Makefile 341 | directories to a mapping from binary paths to SHA256 hashes. 342 | :param verbose: If ``True``, print out executed commands and outputs. 343 | :param user_id: The user ID to use inside the Docker container. See :meth:`ghcc.utils.docker.run_docker_command`. 344 | :param directory_mapping: Additional directory mappings for Docker. Optional. 345 | :param exception_log_fn: A function to log exceptions occurred in Docker. The function takes the exception object 346 | as input and returns nothing. 347 | :return: A list of Makefile entries. 348 | """ 349 | start_time = time.time() 350 | try: 351 | # Don't rely on Docker timeout, but instead constrain running time in script run in Docker. Otherwise we won't 352 | # get the results file if any compilation task timeouts. 353 | cmd = [ 354 | "batch_make.py", 355 | *(["--record-libraries"] if record_libraries else []), 356 | *(["--compile-timeout", str(compile_timeout)] if compile_timeout is not None else []), 357 | # We use "--flag=value" instead of "--flag value" because the GCC flags are, you know, flags, which may be 358 | # incorrectly interpreted by `argparse`. 359 | *([f'--gcc-override-flags="{gcc_override_flags}"'] if gcc_override_flags is not None else []), 360 | *(["--use-makefile-info-pkl"] if use_makefile_info_pkl else []), 361 | *(["--verbose"] if verbose else []), 362 | ] 363 | ret = run_docker_command(cmd, user=user_id, return_output=True, 364 | directory_mapping={repo_path: "/usr/src/repo", repo_binary_dir: "/usr/src/bin", 365 | **(directory_mapping or {})}) 366 | except subprocess.CalledProcessError as e: 367 | end_time = time.time() 368 | if ((compile_timeout is not None and end_time - start_time > compile_timeout) or 369 | b"Resource temporarily unavailable" in e.output): 370 | # Usually exceptions at this stage are due to some badly written Makefiles that gets trapped in an infinite 371 | # recursion. We suppress the exception and proceed normally, so we won't have to deal with it again when 372 | # the program is rerun. 373 | if exception_log_fn is not None: 374 | exception_log_fn(e) 375 | else: 376 | # Otherwise, it might be because Docker broke down or something. 377 | raise e 378 | 379 | log_path = os.path.join(repo_binary_dir, "log.pkl") 380 | makefiles: List[RepoDB.MakefileEntry] = [] 381 | if os.path.exists(log_path): 382 | try: 383 | with open(log_path, "rb") as f: 384 | makefiles = pickle.load(f) 385 | except Exception: 386 | makefiles = [] 387 | os.remove(log_path) 388 | return makefiles 389 | -------------------------------------------------------------------------------- /ghcc/database.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import json 3 | import os 4 | import sys 5 | from typing import Any, Dict, Iterator, List, Optional, Set, Type 6 | 7 | import pymongo 8 | from mypy_extensions import TypedDict 9 | 10 | __all__ = [ 11 | "RepoDB", 12 | "BinaryDB", 13 | "MatchFuncDB", 14 | ] 15 | 16 | Index = Dict[str, Any] 17 | 18 | 19 | class BaseEntry(TypedDict, total=False): 20 | r"""The base class for MongoDB entries. Setting ``total=False`` allows not setting the ``_id`` key when creating a 21 | dictionary to be inserted. 22 | """ 23 | _id: Any # actually `bson.ObjectId`, but we don't care 24 | 25 | 26 | class Database(abc.ABC): 27 | r"""A wrapper over MongoDB that handles the connection and authentication. This is an abstract base class, concrete 28 | classes must override the :meth:`collection_name` property. 29 | """ 30 | 31 | # For some reason, mypy thinks `TypedDict` is a function... 32 | Entry: Type[TypedDict] # type: ignore 33 | 34 | class Config(TypedDict): 35 | host: str 36 | port: int 37 | auth_db_name: str 38 | db_name: str 39 | username: str 40 | password: str 41 | 42 | @property 43 | @abc.abstractmethod 44 | def collection_name(self) -> str: 45 | r"""Name of the collection that the DB object uses.""" 46 | raise NotImplementedError 47 | 48 | @property 49 | def index(self) -> List[Index]: 50 | r"""Sets of key(s) to use as index. Each key is represented as a tuple of (name, order), where order is 51 | ``pymongo.ASCENDING`` (1) or ``pymongo.DESCENDING`` (-1). 52 | 53 | By default, all indexes are unique. To add a non-unique index, include ``"$unique": False`` in the dictionary. 54 | """ 55 | return [] 56 | 57 | def __init__(self, config_file: str = "./database-config.json", **override_config): 58 | r"""Create a connection to the database. 59 | """ 60 | if not os.path.exists(config_file): 61 | raise ValueError(f"DB config file not found at '{config_file}'. " 62 | f"Please refer to 'database-config-example.json' for the format") 63 | with open(config_file) as f: 64 | config: Database.Config = json.load(f) 65 | config.update(override_config) 66 | missing_keys = [key for key in Database.Config.__annotations__ if key not in config] 67 | if len(missing_keys) > 0: 68 | raise ValueError(f"Keys {missing_keys} are missing from the DB config file at '{config_file}'.from " 69 | f"Please refer to 'database-config-example.json' for the format") 70 | 71 | self.client = pymongo.MongoClient( 72 | config['host'], port=config['port'], authSource=config['auth_db_name'], 73 | username=config['username'], password=config['password']) 74 | self.collection = self.client[config['db_name']][self.collection_name] 75 | 76 | for new_index in self.index: 77 | new_index = new_index.copy() 78 | unique = True 79 | if "$unique" in new_index: 80 | unique = new_index["$unique"] 81 | del new_index["$unique"] 82 | for key in new_index: 83 | if key not in self.Entry.__annotations__: 84 | raise ValueError(f"Index contains key '{key}', which is not in Entry definition") 85 | if not any(index["key"].to_dict() == new_index for index in self.collection.list_indexes()): 86 | # Only create index if no such index exists. 87 | # This check is required because `create_index` seems not idempotent, although it should be. 88 | self.collection.create_index(list(new_index.items()), unique=unique, background=True) 89 | 90 | def count(self, estimate: bool = True) -> int: 91 | if estimate: 92 | return self.collection.estimated_document_count() 93 | return self.collection.count_documents({}) 94 | 95 | def close(self) -> None: 96 | del self.collection 97 | self.client.close() 98 | 99 | def safe_iter(self, batch_size: int = 1000, static: bool = False) -> Iterator['Entry']: # type: ignore 100 | r"""Safely iterate over all documents. The normal way (``for entry in collection.find()``) result in cursor 101 | timeout if the iteration takes too long, and it's unavoidable unless configured on the server. 102 | 103 | The approach here follows that in the StackOverflow answer (https://stackoverflow.com/a/40434043/4909228). We 104 | choose a unique index, sort the entire collection according to the index, and then fetch a batch of entries. 105 | When the batch is depleted, a new batch is fetched. 106 | 107 | :param batch_size: The size (number of entries) of each fetched batch. A larger batch sizer reduces iteration 108 | overhead, but uses more memory. Defaults to 1000. 109 | :param static: Whether the DB is static, i.e., whether there would be modifications during iteration. If not, 110 | a set of IDs for iterated entries will be recorded to prevent yielding the same entry twice, which incurs 111 | overhead. 112 | :return: An iterator over all entries. 113 | """ 114 | # Find a unique index. 115 | if all(not index.get("$unique", True) for index in self.index): 116 | raise ValueError(f"`safe_iter` does not work for database {self.__class__.__name__} because there are no " 117 | f"unique indexes.") 118 | index = next(index for index in self.index if index.get("$unique", True)) 119 | if "$unique" in index: 120 | del index["$unique"] 121 | 122 | yielded_ids: Set[Any] = set() 123 | prev_index = 0 124 | while True: 125 | cursor = self.collection.find().sort(list(index.items())).skip(prev_index).limit(batch_size) 126 | if static: 127 | entries = list(cursor) 128 | else: 129 | entries = [] 130 | for entry in cursor: 131 | if entry["_id"] not in yielded_ids: 132 | entries.append(entry) 133 | yielded_ids.add(entry["_id"]) 134 | if len(entries) == 0: 135 | break 136 | yield from entries 137 | prev_index += batch_size 138 | 139 | 140 | class RepoDB(Database): 141 | r"""An abstraction over MongoDB that stores information about repositories. 142 | """ 143 | 144 | class MakefileEntry(BaseEntry): 145 | directory: str # directory containing the Makefile 146 | success: bool # whether compilation was successful (return code 0) 147 | binaries: List[str] # list of paths to binaries generated by make operation 148 | sha256: List[str] # SHA256 hashes for each binary 149 | 150 | class Entry(BaseEntry): 151 | repo_owner: str 152 | repo_name: str 153 | repo_size: int # size of the repo in bytes 154 | clone_successful: bool # whether the repo has been successfully cloned to the server 155 | compiled: bool # whether the repo has been tested for compilation 156 | num_makefiles: int # number of compilable Makefiles (required because MongoDB cannot aggregate list lengths) 157 | num_binaries: int # number of generated binaries (required because MongoDB cannot aggregate list lengths) 158 | makefiles: 'List[RepoDB.MakefileEntry]' # list of Makefiles 159 | 160 | @property 161 | def collection_name(self) -> str: 162 | return "repos" 163 | 164 | @property 165 | def index(self) -> List[Index]: 166 | return [{ 167 | "repo_owner": pymongo.ASCENDING, 168 | "repo_name": pymongo.ASCENDING, 169 | }] 170 | 171 | def get(self, repo_owner: str, repo_name: str) -> Optional[Entry]: 172 | r"""Get the DB entry corresponding to the specified repository. 173 | 174 | :return: If entry exists, it is returned as a dictionary; otherwise ``None`` is returned. 175 | """ 176 | return self.collection.find_one({"repo_owner": repo_owner, "repo_name": repo_name}) 177 | 178 | def add_repo(self, repo_owner: str, repo_name: str, clone_successful: bool, repo_size: int = -1) -> None: 179 | r"""Add a new DB entry for the specified repository. Arguments correspond to the first three fields in 180 | :class:`RepoEntry`. Other fields are set to sensible default values (``False`` and ``[]``). 181 | 182 | :param repo_owner: Owner of the repository. 183 | :param repo_name: Name of the repository. 184 | :param clone_successful: Whether the repository was successfully cloned. 185 | :param repo_size: Size (in bytes) of the cloned repository, or ``-1`` (default) if cloning failed. 186 | """ 187 | record = self.get(repo_owner, repo_name) 188 | if record is None: 189 | record = { 190 | "repo_owner": repo_owner, 191 | "repo_name": repo_name, 192 | "clone_successful": clone_successful, 193 | "repo_size": repo_size, 194 | "compiled": False, 195 | "num_makefiles": 0, 196 | "num_binaries": 0, 197 | "makefiles": [], 198 | } 199 | self.collection.insert_one(record) 200 | else: 201 | self.collection.update_one({"_id": record["_id"]}, {"$set": { 202 | "clone_successful": clone_successful, 203 | "repo_size": repo_size, 204 | }}) 205 | 206 | def update_makefile(self, repo_owner: str, repo_name: str, makefiles: List[MakefileEntry], 207 | ignore_length_mismatch: bool = False) -> bool: 208 | r"""Update Makefile compilation results for a given repository. 209 | 210 | :param repo_owner: Owner of the repository. 211 | :param repo_name: Name of the repository. 212 | :param makefiles: List of Makefile compilation results. 213 | :param ignore_length_mismatch: If ``False``, a :exc:`ValueError` is raised if the number of Makefiles previously 214 | stored in the DB is different from the length of :attr:`makefiles` (unless there were no Makefiles 215 | previously). 216 | :return: A boolean value, indicating whether the write succeeded. Note that it is considered unsuccessful if 217 | the :attr:`makefiles` list was not stored due to Unicode encoding errors. 218 | """ 219 | entry = self.get(repo_owner, repo_name) 220 | if entry is None: 221 | raise ValueError(f"Specified repository {repo_owner}/{repo_name} does not exist") 222 | if not ignore_length_mismatch and len(entry["makefiles"]) not in [0, len(makefiles)]: 223 | raise ValueError(f"Number of makefiles stored in entry of {repo_owner}/{repo_name} " 224 | f"({len(entry['makefiles'])}) does not match provided list ({len(makefiles)})") 225 | update_entries = { 226 | "compiled": True, 227 | "num_makefiles": len(makefiles), 228 | "num_binaries": sum(len(makefile["binaries"]) for makefile in makefiles), 229 | "makefiles": makefiles, 230 | } 231 | try: 232 | result = self.collection.update_one({"_id": entry["_id"]}, {"$set": update_entries}) 233 | assert result.matched_count == 1 234 | return True 235 | except UnicodeEncodeError as e: 236 | update_entries["makefiles"] = [] # some path might contain strange characters; just don't store it 237 | result = self.collection.update_one({"_id": entry["_id"]}, {"$set": update_entries}) 238 | assert result.matched_count == 1 239 | return False 240 | 241 | def _aggregate_sum(self, field_name: str) -> int: 242 | cursor = self.collection.aggregate( 243 | [{"$match": {"compiled": True}}, 244 | {"$group": {"_id": None, "total": {"$sum": f"${field_name}"}}}]) 245 | return next(cursor)["total"] 246 | 247 | def count_makefiles(self) -> int: 248 | return self._aggregate_sum("num_makefiles") 249 | 250 | def count_binaries(self) -> int: 251 | return self._aggregate_sum("num_binaries") 252 | 253 | 254 | class BinaryDB(Database): 255 | class Entry(BaseEntry): 256 | repo_owner: str 257 | repo_name: str 258 | sha: str 259 | success: bool 260 | 261 | @property 262 | def collection_name(self) -> str: 263 | return "binaries" 264 | 265 | @property 266 | def index(self) -> List[Index]: 267 | return [ 268 | {"repo_owner": pymongo.ASCENDING, 269 | "repo_name": pymongo.ASCENDING, 270 | "$unique": False}, 271 | {"sha": pymongo.ASCENDING}, 272 | ] 273 | 274 | def get(self, sha: str) -> Optional[Entry]: 275 | r"""Get the DB entry corresponding to the specified binary hash. 276 | 277 | :return: If entry exists, it is returned as a dictionary; otherwise ``None`` is returned. 278 | """ 279 | return self.collection.find_one({"sha": sha}) 280 | 281 | def get_binaries_by_repo(self, repo_owner: str, repo_name: str, success: bool = True) -> Iterator[Entry]: 282 | r"""Get all matching DB entries given a repository. 283 | 284 | :param repo_owner: Owner of the repository. 285 | :param repo_name: Name of the repository. 286 | :param success: The decompilation status of the binary. 287 | :return: An iterator (``pymongo.Cursor``) over matching entries. 288 | """ 289 | return self.collection.find({"repo_owner": repo_owner, "repo_name": repo_name, "success": success}) 290 | 291 | def add_binary(self, repo_owner: str, repo_name: str, sha: str, success: bool) -> None: 292 | r"""Add a new DB entry for the specified binary. 293 | 294 | :param repo_owner: Owner of the repository. 295 | :param repo_name: Name of the repository. 296 | :param sha: Hash of the binary. 297 | :param success: Whether the binary was successfully decompiled. 298 | """ 299 | record = self.get(sha) 300 | if record is None: 301 | record = { 302 | "repo_owner": repo_owner, 303 | "repo_name": repo_name, 304 | "sha": sha, 305 | "success": success, 306 | } 307 | self.collection.insert_one(record) 308 | else: 309 | self.collection.update_one({"_id": record["_id"]}, {"$set": { 310 | "success": success, 311 | }}) 312 | 313 | 314 | class MatchFuncDB(Database): 315 | class Entry(BaseEntry): 316 | repo_owner: str 317 | repo_name: str 318 | files_found: int 319 | funcs_found: int 320 | funcs_matched: int 321 | funcs_matched_without_ast: int 322 | 323 | @property 324 | def collection_name(self) -> str: 325 | return "match_func" 326 | 327 | @property 328 | def index(self) -> List[Index]: 329 | return [{ 330 | "repo_owner": pymongo.ASCENDING, 331 | "repo_name": pymongo.ASCENDING, 332 | }] 333 | 334 | def get(self, repo_owner: str, repo_name: str) -> Optional[Entry]: 335 | r"""Get the DB entry corresponding to the specified repository. 336 | 337 | :return: If entry exists, it is returned as a dictionary; otherwise ``None`` is returned. 338 | """ 339 | return self.collection.find_one({"repo_owner": repo_owner, "repo_name": repo_name}) 340 | 341 | def add_repo(self, repo_owner: str, repo_name: str, 342 | files_found: int, funcs_found: int, funcs_matched: int, funcs_matched_without_ast: int) -> None: 343 | r"""Add a new DB entry for the specified repository.""" 344 | record = self.get(repo_owner, repo_name) 345 | if record is None: 346 | record = { 347 | "repo_owner": repo_owner, 348 | "repo_name": repo_name, 349 | "files_found": files_found, 350 | "funcs_found": funcs_found, 351 | "funcs_matched": funcs_matched, 352 | "funcs_matched_without_ast": funcs_matched_without_ast, 353 | } 354 | self.collection.insert_one(record) 355 | else: 356 | self.collection.update_one({"_id": record["_id"]}, {"$set": { 357 | "files_found": files_found, 358 | "funcs_found": funcs_found, 359 | "funcs_matched": funcs_matched, 360 | "funcs_matched_without_ast": funcs_matched_without_ast, 361 | }}) 362 | 363 | 364 | if __name__ == '__main__': 365 | db = RepoDB() 366 | if len(sys.argv) > 1 and sys.argv[1] == "clear": 367 | confirm = input("This will drop the entire repository database. Confirm? [y/N] ") 368 | if confirm.lower() in ["y", "yes"]: 369 | db.collection.delete_many({}) 370 | db.close() 371 | print("Database dropped.") 372 | else: 373 | print("Operation cancelled.") 374 | else: 375 | print("Interact with the database using `db`, e.g.:\n" 376 | "> db.count_makefiles()\n") 377 | from IPython import embed 378 | 379 | embed() 380 | -------------------------------------------------------------------------------- /ghcc/parse/__init__.py: -------------------------------------------------------------------------------- 1 | from .lexer import * 2 | from .parser import * 3 | from .serialize import * 4 | -------------------------------------------------------------------------------- /ghcc/parse/lexer.py: -------------------------------------------------------------------------------- 1 | from typing import Iterator, List, NamedTuple 2 | 3 | from pycparser.c_lexer import CLexer 4 | 5 | __all__ = [ 6 | "Token", 7 | "LexToken", 8 | "CachedCLexer", 9 | "convert_to_tokens", 10 | "LexerWrapper", 11 | ] 12 | 13 | 14 | class Token(NamedTuple): 15 | name: str 16 | line: int 17 | column: int 18 | 19 | 20 | class LexToken: # stub 21 | type: str 22 | value: str 23 | lineno: int 24 | lexpos: int 25 | 26 | 27 | class CachedCLexer(CLexer): 28 | # `ply` uses reflection to build the lexer, which somehow requires accessing the `__module__` attribute. 29 | __module__ = CLexer.__module__ 30 | _cached_tokens: List[LexToken] 31 | 32 | def __init__(self, error_func, on_lbrace_func, on_rbrace_func, type_lookup_func) -> None: 33 | self._cached_tokens = [] 34 | super().__init__(error_func, on_lbrace_func, on_rbrace_func, type_lookup_func) 35 | 36 | def reset_lineno(self): 37 | self._cached_tokens = [] 38 | super().reset_lineno() 39 | 40 | def token(self) -> LexToken: 41 | tok = super().token() 42 | if tok is not None: 43 | self._cached_tokens.append(tok) 44 | return tok 45 | 46 | @property 47 | def cached_tokens(self) -> List[LexToken]: 48 | return self._cached_tokens 49 | 50 | 51 | def convert_to_tokens(code: str, lex_tokens: List[LexToken]) -> List[Token]: 52 | # `line_start[lineno - 1]` stores the `lexpos` right before the beginning of line `lineno`. 53 | # So `tok.lexpos - line_start[tok.lineno - 1]` gives the column of the token. 54 | line_start = [-1] + [i for i, ch in enumerate(code) if ch == "\n"] 55 | tokens = [] 56 | for tok in lex_tokens: 57 | tokens.append(Token(tok.value, tok.lineno, tok.lexpos - line_start[tok.lineno - 1])) 58 | return tokens 59 | 60 | 61 | class LexerWrapper: 62 | @staticmethod 63 | def _error_func(msg, loc0, loc1): 64 | pass 65 | 66 | @staticmethod 67 | def _brace_func(): 68 | pass 69 | 70 | @staticmethod 71 | def _type_lookup_func(typ): 72 | return False 73 | 74 | def __init__(self) -> None: 75 | self.lexer = CLexer(self._error_func, self._brace_func, self._brace_func, self._type_lookup_func) 76 | self.lexer.build(optimize=True, lextab='pycparser.lextab') 77 | 78 | def lex_tokens(self, code: str) -> Iterator[LexToken]: 79 | self.lexer.reset_lineno() 80 | self.lexer.input(code) 81 | while True: 82 | token = self.lexer.token() 83 | if token is None: 84 | break 85 | yield token 86 | 87 | def lex(self, code: str) -> List[str]: 88 | return [token.value for token in self.lex_tokens(code)] 89 | -------------------------------------------------------------------------------- /ghcc/parse/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import tempfile 4 | from pathlib import Path 5 | from typing import Dict, Optional, Set, Tuple 6 | 7 | import pycparser 8 | from flutes.fs import remove_prefix 9 | from flutes.run import run_command 10 | from pycparser import c_ast 11 | from pycparser.c_ast import Node as ASTNode 12 | from pycparser.c_generator import CGenerator 13 | from pycparser.c_parser import CParser 14 | 15 | from .lexer import LexerWrapper 16 | 17 | __all__ = [ 18 | "FAKE_LIBC_PATH", 19 | "FAKE_LIBC_END_LINE", 20 | "FunctionExtractor", 21 | "FunctionReplacer", 22 | "parse_decompiled_code", 23 | "PreprocessError", 24 | "preprocess", 25 | "preprocess_file", 26 | ] 27 | 28 | FAKE_LIBC_PATH = str((Path(__file__).parent.parent.parent / "scripts" / "fake_libc_include").absolute()) 29 | FAKE_LIBC_END_LINE = "typedef int __end_of_fake_libc__;" 30 | 31 | 32 | class FunctionExtractor: 33 | r"""An wrapped AST visitor that extracts all function definitions from an AST.""" 34 | 35 | class FuncDefVisitor(c_ast.NodeVisitor): 36 | func_def_asts: Dict[str, ASTNode] 37 | 38 | def visit_FuncDef(self, node: c_ast.FuncDef): 39 | func_name = node.decl.name 40 | self.func_def_asts[func_name] = node 41 | 42 | def __init__(self): 43 | self._visitor = self.FuncDefVisitor() 44 | 45 | def find_functions(self, node: ASTNode) -> Dict[str, ASTNode]: 46 | r"""Find all function definitions given an AST. 47 | 48 | :param node: The ``pycparser`` AST node object. 49 | :return: A dictionary mapping function names to function definition AST subtrees. 50 | """ 51 | ret = self._visitor.func_def_asts = {} 52 | self._visitor.visit(node) 53 | del self._visitor.func_def_asts 54 | return ret 55 | 56 | 57 | class FunctionReplacer(CGenerator): 58 | r"""An AST visitor inherited from ``pycparser.CGenerator``, but replaces ``FuncDef`` nodes with other function code. 59 | 60 | Use the :meth:`visit` method to generate code with functions replaced. 61 | """ 62 | 63 | BOUNDARY_PREFIX = "typedef int __func__" 64 | BEGIN_SUFFIX = "__begin;" 65 | END_SUFFIX = "__end;" 66 | 67 | def __init__(self, func_defs: Dict[str, str]): 68 | super().__init__() 69 | self._func_defs = func_defs 70 | 71 | def visit_FuncDef(self, node: c_ast.FuncDef): 72 | func_name = node.decl.name 73 | if func_name in self._func_defs: 74 | # Add dummy typedefs around the function code, so we can still extract raw code even if we can't parse it. 75 | func_begin = self.BOUNDARY_PREFIX + func_name + self.BEGIN_SUFFIX 76 | func_end = self.BOUNDARY_PREFIX + func_name + self.END_SUFFIX 77 | return "\n".join(["", func_begin, self._func_defs[func_name], func_end, ""]) 78 | return super().visit_FuncDef(node) 79 | 80 | def extract_func_name(self, line: str) -> Tuple[Optional[str], bool]: 81 | r"""Extracts the function name from a function boundary marker. 82 | 83 | :param line: The line of code containing the boundary marker. 84 | :return: A tuple containing the function name and a ``bool`` indicating whether the marker indicates the 85 | beginning of a function. If the line is not a boundary marker, or the function name is not recognized, 86 | ``None`` is returned instead of the extracted name. 87 | """ 88 | func_name = None 89 | is_begin = False 90 | if line.startswith(self.BOUNDARY_PREFIX): 91 | if line.endswith(self.BEGIN_SUFFIX): 92 | func_name = line[len(self.BOUNDARY_PREFIX):-len(self.BEGIN_SUFFIX)] 93 | is_begin = True 94 | if line.endswith(self.END_SUFFIX): 95 | func_name = line[len(self.BOUNDARY_PREFIX):-len(self.END_SUFFIX)] 96 | if func_name is not None and func_name in self._func_defs: 97 | return func_name, is_begin 98 | return None, False 99 | 100 | 101 | class PreprocessError(Exception): 102 | pass 103 | 104 | 105 | LINE_CONTROL_REGEX = re.compile(r'^#[^\n]*$', flags=re.MULTILINE) 106 | 107 | 108 | def _preprocess(input_path: str, output_path: str) -> str: 109 | compile_ret = run_command( 110 | ["gcc", "-E", "-nostdlib", "-I" + FAKE_LIBC_PATH, "-o", output_path, input_path], ignore_errors=True) 111 | 112 | if compile_ret.return_code != 0: 113 | if compile_ret.captured_output is not None: 114 | raise PreprocessError(compile_ret.captured_output.decode("utf-8")) 115 | raise PreprocessError 116 | 117 | with open(output_path, "r") as f: 118 | preprocessed_code = f.read() 119 | # Remove line control macros so we can programmatically locate errors. 120 | preprocessed_code = LINE_CONTROL_REGEX.sub("", preprocessed_code) 121 | return preprocessed_code 122 | 123 | 124 | def preprocess(code: str) -> str: 125 | r"""Run preprocessor on code snippet by invoking GCC with ``-E`` flag. 126 | 127 | :raises PreprocessError: When GCC returns non-zero code. 128 | 129 | :return: The preprocessed code. 130 | """ 131 | with tempfile.TemporaryDirectory() as temp_dir: 132 | input_path = os.path.join(temp_dir, "test.c") 133 | output_path = os.path.join(temp_dir, "test.prep.c") 134 | with open(input_path, "w") as f: 135 | f.write(code) 136 | return _preprocess(input_path, output_path) 137 | 138 | 139 | def preprocess_file(path: str) -> str: 140 | r"""Run preprocessor on given file by invoking GCC with ``-E`` flag. 141 | 142 | :raises PreprocessError: When GCC returns non-zero code. 143 | 144 | :return: The preprocessed code. 145 | """ 146 | with tempfile.TemporaryDirectory() as temp_dir: 147 | output_path = os.path.join(temp_dir, "test.prep.c") 148 | return _preprocess(path, output_path) 149 | 150 | 151 | PARSE_ERROR_REGEX = re.compile(r'.*?:(?P\d+):(?P\d+): (?P.+)') 152 | 153 | 154 | def parse_decompiled_code(code: str, lexer: LexerWrapper, parser: CParser, 155 | max_type_fix_tries: int = 10) -> Tuple[ASTNode, str]: 156 | r"""Parse preprocessed decompiled code and heuristically fix errors caused by undefined types. 157 | 158 | If a parse error is encountered, we attempt to fix the code by parsing the error message and checking whether if 159 | could be an undefined type error. If it is, we prepend a dummy ``typedef`` and retry parsing, until either the code 160 | parses or we run out of tries. 161 | 162 | :raises ValueError: When we've run out of tries for fixing types, or the issue cannot be resolved by adding a 163 | ``typedef`` (i.e., getting the same error after adding ``typedef``). 164 | :raises pycparser.c_parser.ParseError: When we cannot identify the error. 165 | 166 | :param code: The preprocessed code to parse 167 | :param lexer: The lexer to use while parsing. 168 | :param parser: The parser to use while parsing. 169 | :param max_type_fix_tries: Maximum retries to fix type errors. 170 | :return: A tuple containing the parsed AST and the modified code. 171 | """ 172 | added_types: Set[str] = set() 173 | code_lines = code.split("\n") 174 | for _ in range(max_type_fix_tries): 175 | try: 176 | decompiled_ast = parser.parse(code) 177 | break 178 | except pycparser.c_parser.ParseError as e: 179 | error_match = PARSE_ERROR_REGEX.match(str(e)) 180 | if error_match is None or not error_match.group("msg").startswith("before: "): 181 | raise 182 | before_token = remove_prefix(error_match.group("msg"), "before: ") 183 | error_line = code_lines[int(error_match.group("line")) - 1] 184 | error_pos = int(error_match.group("col")) - 1 185 | tokens = list(lexer.lex_tokens(error_line)) 186 | try: 187 | error_token_idx = next( 188 | idx for idx, token in enumerate(tokens) 189 | if token.lexpos == error_pos and token.value == before_token) 190 | # There are multiple possible cases here: 191 | # 1. The type is the first ID-type token before the reported token (`type token`). It might not 192 | # be the one immediately in front (for example, `(type) token`, `type *token`). 193 | # 2. The type is the token itself. This is rare and only happens in a situation like: 194 | # `int func(const token var)` or `int func(int a, token b)` 195 | # Replacing `const` with any combination of type qualifiers also works. 196 | if (error_token_idx > 0 and 197 | tokens[error_token_idx - 1].type in ["CONST", "VOLATILE", "RESTRICT", 198 | "__CONST", "__RESTRICT", "__EXTENSION__", 199 | "COMMA"]): 200 | type_token = tokens[error_token_idx] 201 | else: 202 | type_token = next( 203 | tokens[idx] for idx in range(error_token_idx - 1, -1, -1) 204 | if tokens[idx].type == "ID") 205 | except StopIteration: 206 | # If we don't catch this, it would terminate the for-loop in `main()`. Stupid design. 207 | raise e from None 208 | 209 | if type_token.value in added_types: 210 | raise ValueError(f"Type {type_token.value} already added (types so far: {list(added_types)})") 211 | added_types.add(type_token.value) 212 | typedef_line = f"typedef int {type_token.value};" 213 | code = typedef_line + "\n" + code 214 | code_lines.insert(0, typedef_line) 215 | else: 216 | raise ValueError(f"Type fixes exceeded limit ({max_type_fix_tries})") 217 | return decompiled_ast, code 218 | -------------------------------------------------------------------------------- /ghcc/parse/serialize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for serialization and deserialization of ``pycparser`` ASTs. 3 | Adapted from ``pycparser`` example ``c_json.py``. 4 | """ 5 | 6 | import functools 7 | import re 8 | from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union 9 | 10 | from pycparser.c_ast import Node as ASTNode 11 | from pycparser.plyparser import Coord 12 | 13 | from .lexer import Token 14 | 15 | __all__ = [ 16 | "ast_to_dict", 17 | "dict_to_ast", 18 | "get_ast_class", 19 | "visit_dict", 20 | "JSONNode", 21 | "NODE_TYPE_ATTR", 22 | "CHILDREN_ATTR", 23 | "TOKEN_POS_ATTR", 24 | ] 25 | 26 | T = TypeVar('T') 27 | K = TypeVar('K') 28 | MaybeList = Union[T, List[T]] 29 | JSONNode = Dict[str, Any] 30 | 31 | RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]') 32 | RE_INTERNAL_ATTR = re.compile('__.*__') 33 | 34 | AVAILABLE_NODES: Dict[str, Type[ASTNode]] = {klass.__name__: klass for klass in ASTNode.__subclasses__()} 35 | NODE_TYPE_ATTR = "_t" 36 | CHILDREN_ATTR = "_c" 37 | TOKEN_POS_ATTR = "_p" 38 | 39 | 40 | @functools.lru_cache() 41 | def child_attrs_of(klass: Type[ASTNode]): 42 | r"""Given a Node class, get a set of child attrs. 43 | Memoized to avoid highly repetitive string manipulation 44 | """ 45 | non_child_attrs = set(klass.attr_names) 46 | all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)]) 47 | all_attrs -= {"coord"} 48 | return all_attrs - non_child_attrs 49 | 50 | 51 | def find_first(arr: List[T], cond_fn: Callable[[T], bool]) -> int: 52 | r"""Use binary search to find the index of the first element in ``arr`` such that ``cond_fn`` returns ``True``.""" 53 | l, r = 0, len(arr) 54 | while l < r: 55 | mid = (l + r) >> 1 56 | if not cond_fn(arr[mid]): 57 | l = mid + 1 58 | else: 59 | r = mid 60 | return l 61 | 62 | 63 | def ast_to_dict(root: ASTNode, tokens: Optional[List[Token]] = None) -> JSONNode: 64 | r"""Recursively convert an AST into dictionary representation. 65 | 66 | :param root: The AST to convert. 67 | :param tokens: A list of lexed token coordinates. If specified, will replace node position (``coord``) with the 68 | index in the lexed token list. 69 | """ 70 | if tokens is not None: 71 | tokens_ = tokens # so that the `find_token` function type-checks 72 | line_range: Dict[int, Tuple[int, int]] = {} 73 | 74 | def find_token(line: int, column: int) -> Optional[int]: 75 | if line not in line_range: 76 | l = find_first(tokens_, lambda tok: line <= tok.line) 77 | r = find_first(tokens_, lambda tok: line < tok.line) 78 | line_range[line] = l, r 79 | else: 80 | l, r = line_range[line] 81 | ret = find_first(tokens_[l:r], lambda tok: column < tok.column) + l - 1 82 | if ret < 0: 83 | # In rare cases `ret` where `l == 0` and the first code token has `column > 1`, the coordinates of the root 84 | # node might still have `column == 1`, which results in `ret == -1`. 85 | return None 86 | return ret 87 | 88 | def traverse(node: ASTNode, depth: int = 0) -> JSONNode: 89 | klass = node.__class__ 90 | 91 | result = {} 92 | 93 | # Node type 94 | result[NODE_TYPE_ATTR] = klass.__name__ 95 | 96 | # Local node attributes 97 | for attr in klass.attr_names: 98 | result[attr] = getattr(node, attr) 99 | 100 | # Token position 101 | if tokens is not None: 102 | if node.coord is not None and node.coord.line > 0: # some nodes have invalid coordinate (0, 1) 103 | coord: Coord = node.coord 104 | pos = find_token(coord.line, coord.column) 105 | result[TOKEN_POS_ATTR] = pos 106 | else: 107 | result[TOKEN_POS_ATTR] = None 108 | 109 | # node_name = (" " * (2 * depth) + klass.__name__).ljust(35) 110 | # if node.coord is not None: 111 | # coord: Coord = node.coord 112 | # pos = result['coord'] 113 | # print(node_name, coord.line, coord.column, pos, (tokens[pos] if pos else None), sep='\t') 114 | # else: 115 | # print(node_name) 116 | 117 | # Children nodes 118 | children: Dict[str, Optional[MaybeList[JSONNode]]] = {} 119 | for child_name, child in node.children(): 120 | child_dict = traverse(child, depth + 1) 121 | # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]') 122 | match = RE_CHILD_ARRAY.match(child_name) 123 | if match: 124 | array_name, array_index = match.groups() 125 | array_index = int(array_index) 126 | # arrays come in order, so we verify and append. 127 | array: List[JSONNode] = children.setdefault(array_name, []) # type: ignore 128 | if array_index != len(array): 129 | raise ValueError(f"Internal ast error. Array {array_name} out of order. " 130 | f"Expected index {len(array)}, got {array_index}") 131 | array.append(child_dict) 132 | else: 133 | children[child_name] = child_dict 134 | # Missing children are filled with `None` values in the dictionary. 135 | for child_attr in child_attrs_of(klass): 136 | if child_attr not in children: 137 | children[child_attr] = None 138 | result[CHILDREN_ATTR] = children 139 | 140 | return result 141 | 142 | ast_json = traverse(root) 143 | return ast_json 144 | 145 | 146 | def visit_dict(visit_fn: Callable[[JSONNode, List[T]], T], node_dict: JSONNode) -> T: 147 | # visit_fn: (node, [children_result]) -> result 148 | children_result: List[T] = [] 149 | for name, child in node_dict[CHILDREN_ATTR].items(): 150 | if isinstance(child, list): 151 | children_result.extend(visit_dict(visit_fn, item) for item in child) 152 | elif child is not None: 153 | children_result.append(visit_dict(visit_fn, child)) 154 | return visit_fn(node_dict, children_result) 155 | 156 | 157 | def get_ast_class(name: str) -> Type[ASTNode]: 158 | return AVAILABLE_NODES[name] 159 | 160 | 161 | def dict_to_ast(node_dict: JSONNode) -> ASTNode: 162 | r"""Recursively build an AST from dictionary representation. Coordinate information is discarded. 163 | """ 164 | class_name = node_dict[NODE_TYPE_ATTR] 165 | klass = get_ast_class(class_name) 166 | 167 | # Create a new dict containing the key-value pairs which we can pass to node constructors. 168 | kwargs: Dict[str, Any] = {'coord': None} 169 | children: Dict[str, MaybeList[JSONNode]] = node_dict[CHILDREN_ATTR] 170 | for name, child in children.items(): 171 | if isinstance(child, list): 172 | kwargs[name] = [dict_to_ast(item) for item in child] 173 | else: 174 | kwargs[name] = dict_to_ast(child) if child is not None else None 175 | 176 | for key, value in node_dict.items(): 177 | if key in [NODE_TYPE_ATTR, CHILDREN_ATTR, TOKEN_POS_ATTR]: 178 | continue 179 | kwargs[key] = value # must be primitive attributes 180 | 181 | return klass(**kwargs) 182 | -------------------------------------------------------------------------------- /ghcc/repo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import subprocess 4 | import time 5 | from enum import Enum, auto 6 | from typing import NamedTuple, Optional 7 | 8 | from flutes.run import run_command 9 | 10 | __all__ = [ 11 | "CloneErrorType", 12 | "CloneResult", 13 | "clean", 14 | "clone" 15 | ] 16 | 17 | 18 | class CloneErrorType(Enum): 19 | FolderExists = auto() 20 | Timeout = auto() 21 | PrivateOrNonexistent = auto() 22 | Unknown = auto() 23 | SubmodulesFailed = auto() 24 | 25 | 26 | class CloneResult(NamedTuple): 27 | repo_owner: str 28 | repo_name: str 29 | success: bool = False 30 | error_type: Optional[CloneErrorType] = None 31 | time: Optional[float] = None 32 | captured_output: Optional[bytes] = None 33 | 34 | 35 | def clean(repo_folder: str) -> None: 36 | r"""Clean all unversioned files in a Git repository. 37 | 38 | :param repo_folder: Path to the Git repository. 39 | """ 40 | # Reset modified files. 41 | run_command(["git", "reset", "--hard"], cwd=repo_folder, ignore_errors=True) 42 | # Use `-f` twice to really clean everything. 43 | run_command(["git", "clean", "-xffd"], cwd=repo_folder, ignore_errors=True) 44 | # Do the same thing for submodules, if submodules exist. 45 | if os.path.exists(os.path.join(repo_folder, ".gitmodules")): 46 | run_command(["git", "submodule", "foreach", "--recursive", "git", "reset", "--hard"], 47 | cwd=repo_folder, ignore_errors=True) 48 | run_command(["git", "submodule", "foreach", "--recursive", "git", "clean", "-xffd"], 49 | cwd=repo_folder, ignore_errors=True) 50 | 51 | 52 | def clone(repo_owner: str, repo_name: str, clone_folder: str, folder_name: Optional[str] = None, *, 53 | default_branch: Optional[str] = None, timeout: Optional[float] = None, 54 | recursive: bool = False, skip_if_exists: bool = True) -> CloneResult: 55 | r"""Clone a repository on GitHub, for instance, ``torvalds/linux``. 56 | 57 | :param repo_owner: Name of the repository owner, e.g., ``torvalds``. 58 | :param repo_name: Name of the repository, e.g., ``linux``. 59 | :param clone_folder: Path to the folder where the repository will be stored. 60 | :param folder_name: Name of the folder of the cloned repository. If ``None``, ``repo_owner/repo_name`` is used. 61 | :param default_branch: Name of the default branch of the repository. Cloning behavior differs slightly depending on 62 | whether the argument is ``None``. If ``None``, then the following happens: 63 | 64 | 1. Attempts a shallow clone on only the ``master`` branch. 65 | 2. If error occurs, attempts a shallow clone for all branches. 66 | 3. If error still occurs, raise the error. 67 | 68 | If not ``None``, then the following happens: 69 | 70 | 1. Attempts a shallow clone on only the default branch. 71 | 2. If error occurs, raise the error. 72 | :param timeout: Maximum time allowed for cloning, in seconds. Defaults to ``None`` (unlimited time). 73 | :param recursive: If ``True``, passes the ``--recursive`` flag to Git, which recursively clones submodules. 74 | :param skip_if_exists: Whether to skip cloning if the destination folder already exists. If ``False``, the folder 75 | will be deleted. 76 | 77 | :return: An instance of :class:`CloneResult` indicating the result. Fields ``repo_owner``, ``repo_name``, and 78 | ``success`` are not ``None``. 79 | 80 | - If cloning succeeded, the field ``time`` is also not ``None``. 81 | - If cloning failed, the fields ``error_type`` and ``captured_output`` are also not ``None``. 82 | """ 83 | start_time = time.time() 84 | url = f"https://github.com/{repo_owner}/{repo_name}.git" 85 | if folder_name is None: 86 | folder_name = f"{repo_owner}/{repo_name}" 87 | clone_folder = os.path.join(clone_folder, folder_name) 88 | if os.path.exists(clone_folder): 89 | if not skip_if_exists: 90 | shutil.rmtree(clone_folder) 91 | else: 92 | return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.FolderExists) 93 | 94 | # Certain repos might have turned private or been deleted, and git prompts for username/password when it happens. 95 | # Setting the environment variable `GIT_TERMINAL_PROMPT` to 0 could disable such behavior and let git fail promptly. 96 | # Lucky that this is introduced in version 2.3; otherwise would have to poll waiting channel of current process 97 | # and see if it's waiting for IO. 98 | # See: https://askubuntu.com/questions/19442/what-is-the-waiting-channel-of-a-process 99 | env = {"GIT_TERMINAL_PROMPT": "0"} 100 | 101 | def try_clone(): 102 | # If a true git error was thrown, re-raise it and let the outer code deal with it. 103 | try: 104 | try_branch = default_branch or "master" 105 | # Try cloning only 'master' branch, but it's possible there's no branch named 'master'. 106 | run_command( 107 | ["git", "clone", "--depth=1", f"--branch={try_branch}", "--single-branch", url, clone_folder], 108 | env=env, timeout=timeout) 109 | return 110 | except subprocess.CalledProcessError as err: 111 | expected_msg = b"fatal: Remote branch master not found in upstream origin" 112 | if default_branch is not None or not (err.output is not None and expected_msg in err.output): 113 | # If `default_branch` is specified, always re-raise the exception. 114 | raise err 115 | # 'master' branch doesn't exist; do a shallow clone of all branches. 116 | run_command(["git", "clone", "--depth=1", url, clone_folder], env=env, timeout=timeout) 117 | 118 | try: 119 | try_clone() 120 | end_time = time.time() 121 | elapsed_time = end_time - start_time 122 | except subprocess.CalledProcessError as e: 123 | no_ssh_expected_msg = b"fatal: could not read Username for 'https://github.com': terminal prompts disabled" 124 | ssh_expected_msg = b"remote: Repository not found." 125 | if e.output is not None and (no_ssh_expected_msg in e.output or ssh_expected_msg in e.output): 126 | return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.PrivateOrNonexistent) 127 | else: 128 | return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.Unknown, captured_output=e.output) 129 | except subprocess.TimeoutExpired as e: 130 | return CloneResult(repo_owner, repo_name, error_type=CloneErrorType.Timeout, captured_output=e.output) 131 | 132 | if recursive: 133 | submodule_timeout = (timeout - elapsed_time) if timeout is not None else None 134 | try: 135 | # If this fails, still treat it as a success, but include a special error type. 136 | run_command(["git", "submodule", "update", "--init", "--recursive"], 137 | env=env, cwd=clone_folder, timeout=submodule_timeout) 138 | except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: 139 | return CloneResult(repo_owner, repo_name, success=True, time=elapsed_time, 140 | error_type=CloneErrorType.SubmodulesFailed, captured_output=e.output) 141 | end_time = time.time() 142 | elapsed_time = end_time - start_time 143 | 144 | return CloneResult(repo_owner, repo_name, success=True, time=elapsed_time) 145 | -------------------------------------------------------------------------------- /ghcc/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .docker import * 2 | -------------------------------------------------------------------------------- /ghcc/utils/docker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from datetime import datetime 4 | from pathlib import Path 5 | from typing import Dict, List, Optional, Tuple, Union 6 | 7 | from flutes.log import log 8 | from flutes.run import CommandResult, error_wrapper, run_command 9 | 10 | __all__ = [ 11 | "run_docker_command", 12 | "verify_docker_image", 13 | ] 14 | 15 | 16 | def run_docker_command(command: Union[str, List[str]], cwd: Optional[str] = None, 17 | user: Optional[Union[int, Tuple[int, int]]] = None, 18 | directory_mapping: Optional[Dict[str, str]] = None, 19 | timeout: Optional[float] = None, **kwargs) -> CommandResult: 20 | r"""Run a command inside a container based on the ``gcc-custom`` Docker image. 21 | 22 | :param command: The command to run. Should be either a `str` or a list of `str`. Note: they're treated the same way, 23 | because a shell is always spawn in the entry point. 24 | :param cwd: The working directory of the command to run. If None, uses the default (probably user home). 25 | :param user: The user ID to use inside the Docker container. Additionally, group ID can be specified by passing 26 | a tuple of two `int`\ s for this argument. If not specified, the current user and group IDs are used. As a 27 | special case, pass in ``0`` to run as root. 28 | :param directory_mapping: Mapping of host directories to container paths. Mapping is performed via "bind mount". 29 | :param timeout: Maximum running time for the command. If running time exceeds the specified limit, 30 | ``subprocess.TimeoutExpired`` is thrown. 31 | :param kwargs: Additional keyword arguments to pass to :meth:`ghcc.utils.run_command`. 32 | """ 33 | # Validate `command` argument, and append call to `bash` if `shell` is True. 34 | if isinstance(command, list): 35 | command = ' '.join(command) 36 | command = f"'{command}'" 37 | 38 | # Construct the `docker run` command. 39 | docker_command = ["docker", "run", "--rm"] 40 | for host, container in (directory_mapping or {}).items(): 41 | docker_command.extend(["-v", f"{os.path.abspath(host)}:{container}"]) 42 | if cwd is not None: 43 | docker_command.extend(["-w", cwd]) 44 | 45 | # Assign user and group IDs based on `user` argument. 46 | if user != 0: 47 | user_id: Union[str, int] = "`id -u $USER`" 48 | group_id: Union[str, int] = "`id -g $USER`" 49 | if user is not None: 50 | if isinstance(user, tuple): 51 | user_id, group_id = user 52 | else: 53 | user_id = user 54 | docker_command.extend(["-e", f"LOCAL_USER_ID={user_id}"]) 55 | docker_command.extend(["-e", f"LOCAL_GROUP_ID={group_id}"]) 56 | 57 | docker_command.append("gcc-custom") 58 | if timeout is not None: 59 | # Timeout is implemented by calling `timeout` inside Docker container. 60 | docker_command.extend(["timeout", f"{timeout}s"]) 61 | docker_command.append(command) 62 | ret = run_command(' '.join(docker_command), shell=True, **kwargs) 63 | 64 | # Check whether exceeded timeout limit by inspecting return code. 65 | if ret.return_code == 124: 66 | assert timeout is not None 67 | raise error_wrapper(subprocess.TimeoutExpired(ret.command, timeout, output=ret.captured_output)) 68 | return ret 69 | 70 | 71 | def verify_docker_image(verbose: bool = False, print_checked_paths: bool = False) -> bool: 72 | r"""Checks whether the Docker image is up-to-date. This is done by verifying the modification dates for all library 73 | files are earlier than the Docker image build date. 74 | 75 | :param verbose: If ``True``, prints out error message telling the user to rebuild Docker image. 76 | :param print_checked_paths: If ``True``, prints out paths of all checked files. 77 | """ 78 | output = run_command( 79 | ["docker", "image", "ls", "gcc-custom", "--format", "{{.CreatedAt}}"], return_output=True).captured_output 80 | assert output is not None 81 | image_creation_time_string = output.decode("utf-8").strip() 82 | image_creation_timestamp = datetime.strptime(image_creation_time_string, "%Y-%m-%d %H:%M:%S %z %Z").timestamp() 83 | 84 | repo_root: Path = Path(__file__).parent.parent.parent 85 | paths_to_check = ["ghcc", "scripts", ".dockerignore", "Dockerfile", "requirements.txt"] 86 | paths_to_ignore = ["ghcc/parse", "ghcc/database.py", "scripts/fake_libc_include"] 87 | prefixes_to_ignore = [str(repo_root / path) for path in paths_to_ignore] 88 | max_timestamp = 0.0 89 | for repo_path in paths_to_check: 90 | path = str(repo_root / repo_path) 91 | if os.path.isfile(path) and not any(path.startswith(prefix) for prefix in prefixes_to_ignore): 92 | if print_checked_paths: 93 | print(path) 94 | max_timestamp = max(max_timestamp, os.path.getmtime(path)) 95 | else: 96 | for subdir, dirs, files in os.walk(path): 97 | if subdir.endswith("__pycache__"): 98 | continue 99 | for f in files: 100 | file_path = os.path.join(subdir, f) 101 | if not any(file_path.startswith(prefix) for prefix in prefixes_to_ignore): 102 | if print_checked_paths: 103 | print(file_path) 104 | max_timestamp = max(max_timestamp, os.path.getmtime(file_path)) 105 | up_to_date = max_timestamp <= image_creation_timestamp 106 | 107 | if not up_to_date and verbose: 108 | image_path = os.path.relpath(os.path.join(__file__, "..", "..", ".."), os.getcwd()) 109 | log("ERROR: Your Docker image is out-of-date. Please rebuild the image by: " 110 | f"`docker build -t gcc-custom {image_path}`", "error", force_console=True) 111 | return up_to_date 112 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_unused_ignores = True 3 | warn_unused_configs = True 4 | warn_redundant_casts = True 5 | no_implicit_optional = True 6 | follow_imports = silent 7 | ignore_missing_imports = True 8 | mypy_path = ./, ./stubs/ 9 | allow_redefinition = True 10 | -------------------------------------------------------------------------------- /purge_folder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os 4 | import subprocess 5 | 6 | import flutes 7 | 8 | import ghcc 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("folder", type=str) # the folder to clean up 12 | parser.add_argument("-y", action="store_true", default=False) # yes 13 | args = parser.parse_args() 14 | 15 | try: 16 | parent = os.path.abspath(os.path.join(args.folder, "..")) 17 | folder = os.path.split(os.path.abspath(args.folder))[1] 18 | yes = args.y 19 | if not yes: 20 | confirm = input(f"This will delete {parent} / {folder}. Confirm? [y/N] ") 21 | yes = confirm.lower() in ["y", "yes"] 22 | if yes: 23 | ghcc.utils.run_docker_command(["rm", "-rf", f"/usr/src/{folder}"], 24 | user=0, directory_mapping={parent: "/usr/src"}) 25 | except subprocess.CalledProcessError as e: 26 | flutes.log(f"Command failed with retcode {e.returncode}", "error") 27 | output = e.output.decode("utf-8") 28 | if len(output) > 200: 29 | output = output[:200] + "... (omitted)" 30 | flutes.log("Captured output: " + output) 31 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argtyped 2 | flutes >= 0.2.0 3 | mypy_extensions 4 | pycparser >= 2.20 5 | pymongo 6 | termcolor 7 | tqdm 8 | -------------------------------------------------------------------------------- /run_decompiler.py: -------------------------------------------------------------------------------- 1 | # Runs the decompiler to collect variable names from binaries containing 2 | # debugging information, then strips the binaries and injects the collected 3 | # names into that decompilation output. 4 | # This generates an aligned, parallel corpus for training translation models. 5 | 6 | import base64 7 | import contextlib 8 | import datetime 9 | import errno 10 | import functools 11 | import os 12 | import pickle 13 | import subprocess 14 | import tempfile 15 | from enum import Enum, auto 16 | from pathlib import Path 17 | from typing import Dict, Iterator, NamedTuple, Optional, Tuple, Callable 18 | 19 | import argtyped 20 | import flutes 21 | import tqdm 22 | from mypy_extensions import TypedDict 23 | 24 | import ghcc 25 | 26 | EnvDict = Dict[str, str] 27 | 28 | 29 | class Arguments(argtyped.Arguments): 30 | binaries_dir: str = "binaries/" # directory containing binaries 31 | output_dir: str = "decompile_output/" # output directory 32 | log_file: str = "decompile-log.txt" 33 | ida: str = "/data2/jlacomis/ida/idat64" # location of the `idat64` binary 34 | binary_mapping_cache_file: Optional[str] = "binary_mapping.pkl" 35 | timeout: int = 30 # decompilation timeout 36 | n_procs: int = 0 # number of processes 37 | 38 | 39 | SCRIPTS_DIR = Path(__file__).parent / "scripts" / "decompiler_scripts" 40 | COLLECT = str((SCRIPTS_DIR / 'collect.py').absolute()) 41 | DUMP_TREES = str((SCRIPTS_DIR / 'dump_trees.py').absolute()) 42 | 43 | 44 | def make_directory(dir_path: str) -> None: 45 | r"""Make a directory, with clean error messages.""" 46 | try: 47 | os.makedirs(dir_path) 48 | except OSError as e: 49 | if not os.path.isdir(dir_path): 50 | raise NotADirectoryError(f"'{dir_path}' is not a directory") 51 | if e.errno != errno.EEXIST: 52 | raise 53 | 54 | 55 | def write_pseudo_registry(): 56 | encoded_registry_data = b""" 57 | aURhNwFIaWRkZW4gTWVzc2FnZXMAIHYgcyAgcyAgcyAgIHMgVG8gcmVuZXcgaXQgIHBsZWFzZSB2 58 | aXNpdCBvdXIgd2ViIHNpdGUgICAgcyAgVGhhbmsgeW91IGZvciB1c2luZyBJREEgAAQAAAAEAQAA 59 | AFRoZSB0ZWNobmljYWwgc3VwcG9ydCBwZXJpb2Qgb2YgdGhlIEhleCBSYXlzIGRlY29tcGlsZXIg 60 | aGFzIGV4cGlyZWQgIFlvdSBoYXZlIDMgbW9udGggZ3JhY2UgcGVyaW9kIHQABAAAAAQAAAAAAAFI 61 | aXN0b3J5NjQAMAAhAAAAAS9kYXRhMy96ZWNvbmcvRElSRS9kYXRhc2V0LWdlbi9scwBBdXRvQ2hl 62 | Y2tVcGRhdGVzAAQAAAAEAAAAAEF1dG9SZXF1ZXN0VXBkYXRlcwAEAAAABAAAAABJbmZvcm1lZEFi 63 | b3V0VXBkYXRlczIABAAAAAQBAAAATGljZW5zZSBDTVVfIFNvZnR3YXJlIEVuZ2luZWVyaW5nIElu 64 | c3RpdHV0ZQAEAAAABAEAAABTZWFyY2hCaW4AAAAAAAFTZWFyY2hUZXh0AAAAAAABU3VwcG9ydEV4 65 | cGlyZWREaXNwbGF5ZWRfNy4xLjAuMTgwMjI3AAQAAAAEuncKXnVpQ29uZmlnNjQAaAAAAAP///// 66 | /////wAAAAAAAAAAAAAAAAAAAAACAAAAAQAAAAAAAAD/////AAABAP//////////AAAAAAAAAAAA 67 | AAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAATQABBQAAAAAAAAAAAAAAAGy/rUI= 68 | """ 69 | path = os.path.expanduser("~/.idapro/ida.reg") 70 | with open(path, "wb") as f: 71 | f.write(base64.decodebytes(encoded_registry_data)) 72 | 73 | 74 | def run_decompiler(file_name: str, script: str, env: Optional[EnvDict] = None, 75 | timeout: Optional[int] = None): 76 | r"""Run a decompiler script. 77 | 78 | :param file_name: The binary to be decompiled. 79 | :param env: An `os.environ` mapping, useful for passing arguments. 80 | :param script: The script file to run. 81 | :param timeout: Timeout in seconds (default no timeout). 82 | """ 83 | idacall = [args.ida, '-B', f'-S{script}', file_name] 84 | try: 85 | flutes.run_command(idacall, env=env, timeout=timeout) 86 | except subprocess.CalledProcessError as e: 87 | if b"Traceback (most recent call last):" in e.output: 88 | # Exception raised by Python script called by IDA, throw it up. 89 | raise e 90 | flutes.run_command(['rm', '-f', f'{file_name}.i64']) 91 | if b"Corrupted pseudo-registry file" in e.output: 92 | write_pseudo_registry() 93 | # Run again without try-catch; if it fails, it should crash. 94 | flutes.run_command(idacall, env=env, timeout=timeout) 95 | 96 | 97 | class BinaryInfo(TypedDict): 98 | repo_owner: str 99 | repo_name: str 100 | path: str 101 | path_in_repo: str 102 | 103 | 104 | class DecompilationStatus(Enum): 105 | Success = auto() 106 | TimedOut = auto() 107 | NoVariables = auto() 108 | UnknownError = auto() 109 | 110 | 111 | class DecompilationResult(NamedTuple): 112 | info: BinaryInfo 113 | hash: str 114 | status: DecompilationStatus 115 | time: Optional[datetime.timedelta] = None 116 | 117 | 118 | def exception_handler(e, binary_info: BinaryInfo): 119 | binary_path = binary_info["path"] 120 | flutes.log_exception(e, f"Exception occurred when processing {binary_path}") 121 | 122 | 123 | @flutes.exception_wrapper(exception_handler) 124 | def decompile(binary_info: BinaryInfo, output_dir: str, binary_dir: str, 125 | timeout: Optional[int] = None) -> DecompilationResult: 126 | binary_path = binary_info["path"] 127 | original_path = binary_info["path_in_repo"] 128 | binary_hash = os.path.split(binary_path)[1] 129 | 130 | def create_result(status: DecompilationStatus, time: Optional[datetime.timedelta] = None) -> DecompilationResult: 131 | return DecompilationResult(binary_info, binary_hash, status, time) 132 | 133 | output_path = os.path.join(output_dir, f"{binary_hash}.jsonl") 134 | if os.path.exists(output_path): 135 | # Binary already decompiled, but for some reason it wasn't written to the DB. 136 | return create_result(DecompilationStatus.Success) 137 | 138 | start = datetime.datetime.now() 139 | env: EnvDict = os.environ.copy() 140 | env['IDALOG'] = '/dev/stdout' 141 | env['PREFIX'] = binary_hash 142 | file_path = os.path.join(binary_dir, binary_path) 143 | 144 | # Create a temporary directory, since the decompiler makes a lot of additional 145 | # files that we can't clean up from here. 146 | with tempfile.TemporaryDirectory() as tempdir: 147 | # Put the output JSONL file here as well to prevent partially-generated files. 148 | env['OUTPUT_DIR'] = os.path.abspath(tempdir) 149 | with tempfile.NamedTemporaryFile(dir=tempdir) as collected_vars: 150 | # First collect variables. 151 | env['COLLECTED_VARS'] = collected_vars.name 152 | with tempfile.NamedTemporaryFile(dir=tempdir) as orig: 153 | flutes.run_command(['cp', file_path, orig.name]) 154 | # Timeout after 30 seconds for first run. 155 | try: 156 | run_decompiler(orig.name, COLLECT, env=env, timeout=timeout) 157 | except subprocess.TimeoutExpired: 158 | flutes.log(f"[TIMED OUT] {original_path} ({binary_path})", "warning") 159 | return create_result(DecompilationStatus.TimedOut) 160 | try: 161 | assert pickle.load(collected_vars) # non-empty 162 | except: 163 | flutes.log(f"[NO VARS] {original_path} ({binary_path})", "warning") 164 | return create_result(DecompilationStatus.NoVariables) 165 | # Make a new stripped copy and pass it the collected vars. 166 | with tempfile.NamedTemporaryFile(dir=tempdir) as stripped: 167 | flutes.run_command(['cp', file_path, stripped.name]) 168 | flutes.run_command(['strip', '--strip-debug', stripped.name]) 169 | # Dump the trees. 170 | # No timeout here, we know it'll run in a reasonable amount of 171 | # time and don't want mismatched files. 172 | run_decompiler(stripped.name, DUMP_TREES, env=env) 173 | jsonl_path = os.path.join(tempdir, f"{binary_hash}.jsonl") 174 | flutes.run_command(['cp', jsonl_path, output_path]) 175 | end = datetime.datetime.now() 176 | duration = end - start 177 | flutes.log(f"[OK {duration.total_seconds():5.2f}s] {original_path} ({binary_path})", "success") 178 | return create_result(DecompilationStatus.Success, duration) 179 | 180 | 181 | def iter_binaries(db: ghcc.BinaryDB, binaries: Dict[str, BinaryInfo]) -> Iterator[BinaryInfo]: 182 | binary_entries = {entry["sha"]: entry for entry in db.collection.find()} # getting stuff in batch is much faster 183 | skipped_count = 0 184 | migrated_count = 0 185 | for sha, info in binaries.items(): 186 | entry = binary_entries.get(sha, None) 187 | if entry is not None: 188 | if "repo_owner" in entry: 189 | skipped_count += 1 190 | else: 191 | db.collection.update_one({"_id": entry["_id"]}, {"$set": { 192 | "repo_owner": info["repo_owner"], 193 | "repo_name": info["repo_name"], 194 | }}) 195 | migrated_count += 1 196 | continue 197 | if migrated_count > 0: 198 | flutes.log(f"Migrated {migrated_count} binary entries", force_console=True) 199 | migrated_count = 0 200 | if skipped_count > 0: 201 | flutes.log(f"Skipped {skipped_count} binaries that have been processed", force_console=True) 202 | skipped_count = 0 203 | yield info 204 | 205 | 206 | def get_binary_mapping(cache_path: Optional[str] = None) -> Dict[str, BinaryInfo]: 207 | @flutes.cache(cache_path, name="binary mapping cache") 208 | def _compute_binary_mapping() -> Dict[str, BinaryInfo]: 209 | binaries: Dict[str, BinaryInfo] = {} # sha -> binary_info 210 | with contextlib.closing(ghcc.RepoDB()) as repo_db: 211 | all_repos = repo_db.collection.find() 212 | for repo in tqdm.tqdm(all_repos, total=all_repos.count(), ncols=120, desc="Deduplicating binaries"): 213 | prefix = f"{repo['repo_owner']}/{repo['repo_name']}" 214 | for makefile in repo['makefiles']: 215 | # Absolute Docker paths were used when compiling; remove them. 216 | directory = f"{prefix}/" + flutes.remove_prefix(makefile['directory'], "/usr/src/repo/") 217 | for path, sha in zip(makefile['binaries'], makefile['sha256']): 218 | binaries[sha] = BinaryInfo({ 219 | "repo_owner": repo['repo_owner'], 220 | "repo_name": repo['repo_name'], 221 | "path": f"{prefix}/{sha}", 222 | "path_in_repo": f"{directory}/{path}", 223 | }) 224 | return binaries 225 | 226 | return _compute_binary_mapping 227 | 228 | 229 | def main() -> None: 230 | if args.n_procs == 0: 231 | # Only do this on the single-threaded case. 232 | flutes.register_ipython_excepthook() 233 | flutes.log(f"Running with {args.n_procs} worker processes", "warning") 234 | 235 | # Check for/create output directories 236 | make_directory(args.output_dir) 237 | 238 | # Use RAM-backed memory for tmp if available 239 | if os.path.exists('/dev/shm'): 240 | tempfile.tempdir = '/dev/shm' 241 | 242 | flutes.set_log_file(args.log_file) 243 | write_pseudo_registry() 244 | 245 | # Obtain a list of all binaries 246 | binaries = get_binary_mapping(args.binary_mapping_cache_file) 247 | 248 | flutes.log(f"{len(binaries)} binaries to process.") 249 | file_count = 0 250 | db = ghcc.BinaryDB() 251 | 252 | with flutes.safe_pool(args.n_procs, closing=[db]) as pool: 253 | decompile_fn: Callable[[BinaryInfo], DecompilationResult] = functools.partial( 254 | decompile, output_dir=args.output_dir, binary_dir=args.binaries_dir, timeout=args.timeout) 255 | for result in pool.imap_unordered(decompile_fn, iter_binaries(db, binaries)): 256 | file_count += 1 257 | if result is not None: 258 | db.add_binary(result.info["repo_owner"], result.info["repo_name"], 259 | result.hash, result.status is DecompilationStatus.Success) 260 | if file_count % 100 == 0: 261 | flutes.log(f"Processed {file_count} binaries", force_console=True) 262 | 263 | 264 | if __name__ == '__main__': 265 | args = Arguments() 266 | main() 267 | -------------------------------------------------------------------------------- /scripts/decompiler_scripts/collect.py: -------------------------------------------------------------------------------- 1 | # Usage: IDALOG=/dev/stdout ./idat64 -B -S/path/to/collect.py /path/to/binary 2 | 3 | from collections import defaultdict 4 | from util import UNDEF_ADDR, CFuncGraph, GraphBuilder, hexrays_vars, get_expr_name 5 | import idaapi 6 | import ida_hexrays 7 | import ida_kernwin 8 | import ida_pro 9 | import ida_gdl 10 | import pickle 11 | import os 12 | 13 | varmap = dict() # frozenset of addrs -> varname 14 | 15 | # Collect a map of a set of addresses to a variable name. 16 | # For each variable, this collects the addresses corresponding to its uses. 17 | class CollectGraph(CFuncGraph): 18 | def collect_vars(self): 19 | rev_dict = defaultdict(set) 20 | for n in xrange(len(self.items)): 21 | item = self.items[n] 22 | if item.op is ida_hexrays.cot_var: 23 | name = get_expr_name(item.cexpr) 24 | if not hexrays_vars.match(name): 25 | if item.ea != UNDEF_ADDR: 26 | rev_dict[name].add(item.ea) 27 | else: 28 | ea = self.get_pred_ea(n) 29 | if ea != UNDEF_ADDR: 30 | rev_dict[name].add(ea) 31 | # ::NONE:: is a sentinel value used to indicate that two different 32 | # variables map to the same set of addresses. This happens in small 33 | # functions that use all of their arguments to call another function. 34 | for name, addrs in rev_dict.iteritems(): 35 | addrs = frozenset(addrs) 36 | if (addrs in varmap): 37 | varmap[addrs] = '::NONE::' 38 | else: 39 | varmap[addrs] = name 40 | 41 | def func(ea): 42 | f = idaapi.get_func(ea) 43 | if f is None: 44 | print('Please position the cursor within a function') 45 | return True 46 | cfunc = None 47 | try: 48 | cfunc = idaapi.decompile(f) 49 | except ida_hexrays.DecompilationFailure: 50 | pass 51 | 52 | if cfunc is None: 53 | print('Failed to decompile %x!' % ea) 54 | return True 55 | 56 | # Build decompilation graph 57 | cg = CollectGraph(None) 58 | gb = GraphBuilder(cg) 59 | gb.apply_to(cfunc.body, None) 60 | cg.collect_vars() 61 | 62 | class custom_action_handler(ida_kernwin.action_handler_t): 63 | def __init__(self): 64 | ida_kernwin.action_handler_t.__init__(self) 65 | 66 | class collect_vars(custom_action_handler): 67 | def activate(self, ctx): 68 | print('Collecting vars.') 69 | for ea in Functions(): 70 | func(ea) 71 | print('Vars collected.') 72 | return 1 73 | 74 | class dump_info(custom_action_handler): 75 | def activate(self, ctx): 76 | with open(os.environ['COLLECTED_VARS'], 'w') as vars_fh: 77 | pickle.dump(varmap, vars_fh) 78 | vars_fh.flush() 79 | return 1 80 | 81 | idaapi.autoWait() 82 | if not idaapi.init_hexrays_plugin(): 83 | idaapi.load_plugin('hexrays') 84 | idaapi.load_plugin('hexx64') 85 | if not idaapi.init_hexrays_plugin(): 86 | print('Unable to load Hex-rays') 87 | else: 88 | print('Hex-rays version %s has been detecetd' % idaapi.get_hexrays_version()) 89 | 90 | def main(): 91 | cv = collect_vars() 92 | cv.activate(None) 93 | dv = dump_info() 94 | dv.activate(None) 95 | 96 | main() 97 | ida_pro.qexit(0) 98 | -------------------------------------------------------------------------------- /scripts/decompiler_scripts/dump_trees.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from util import UNDEF_ADDR, CFuncGraph, GraphBuilder, hexrays_vars, get_expr_name 3 | import idaapi 4 | import ida_hexrays 5 | import ida_kernwin 6 | import ida_pro 7 | import json 8 | import jsonlines 9 | import pickle 10 | import os 11 | import re 12 | 13 | varmap = dict() 14 | # Dictionary mapping variable ids to (orig, renamed) pairs 15 | varnames = dict() 16 | var_id = 0 17 | sentinel_vars = re.compile('@@VAR_[0-9]+') 18 | 19 | class RenamedGraphBuilder(GraphBuilder): 20 | def __init__(self, cg, func, addresses): 21 | self.func = func 22 | self.addresses = addresses 23 | super(RenamedGraphBuilder, self).__init__(cg) 24 | 25 | def visit_expr(self, e): 26 | global var_id 27 | if e.op is ida_hexrays.cot_var: 28 | # Save original name of variable 29 | original_name = get_expr_name(e) 30 | if not sentinel_vars.match(original_name): 31 | # Get new name of variable 32 | addresses = frozenset(self.addresses[original_name]) 33 | if addresses in varmap and varmap[addresses] != '::NONE::': 34 | new_name = varmap[addresses] 35 | else: 36 | new_name = original_name 37 | # Save names 38 | varnames[var_id] = (original_name, new_name) 39 | # Rename variables to @@VAR_[id]@@[orig name]@@[new name] 40 | self.func.get_lvars()[e.v.idx].name = '@@VAR_' + str(var_id) + '@@' + original_name + '@@' + new_name 41 | var_id += 1 42 | return self.process(e) 43 | 44 | class AddressCollector: 45 | def __init__(self, cg): 46 | self.cg = cg 47 | self.addresses = defaultdict(set) 48 | 49 | def collect(self): 50 | for item in self.cg.items: 51 | if item.op is ida_hexrays.cot_var: 52 | name = get_expr_name(item) 53 | if item.ea != UNDEF_ADDR: 54 | self.addresses[name].add(item.ea) 55 | else: 56 | item_id = self.cg.reverse[item] 57 | ea = self.cg.get_pred_ea(item_id) 58 | if ea != UNDEF_ADDR: 59 | self.addresses[name].add(ea) 60 | 61 | # Process a single function given its EA 62 | def func(ea): 63 | f = idaapi.get_func(ea) 64 | function_name = GetFunctionName(ea) 65 | if f is None: 66 | print('Please position the cursor within a function') 67 | 68 | cfunc = None 69 | try: 70 | cfunc = idaapi.decompile(f) 71 | except ida_hexrays.DecompilationFailure as e: 72 | print('Failed to decompile %x: %s!' % (ea, function_name)) 73 | raise e 74 | 75 | renamed_file = renamed_prefix + '_' + function_name + '.c' 76 | 77 | # Rename decompilation graph 78 | cg = CFuncGraph(None) 79 | gb = GraphBuilder(cg) 80 | gb.apply_to(cfunc.body, None) 81 | ac = AddressCollector(cg) 82 | ac.collect() 83 | rg = RenamedGraphBuilder(cg, cfunc, ac.addresses) 84 | rg.apply_to(cfunc.body, None) 85 | 86 | # Create tree from collected names 87 | cfunc.build_c_tree() 88 | new_graph = CFuncGraph(None) 89 | new_builder = GraphBuilder(new_graph) 90 | new_builder.apply_to(cfunc.body, None) 91 | function_info = dict() 92 | function_info["function"] = function_name 93 | function_info["ast"] = new_graph.json_tree(0) 94 | raw_code = "" 95 | for line in cfunc.get_pseudocode(): 96 | raw_code += idaapi.tag_remove(line.line) + '\n' 97 | function_info["raw_code"] = raw_code 98 | return function_info 99 | 100 | class custom_action_handler(ida_kernwin.action_handler_t): 101 | def __init__(self): 102 | ida_kernwin.action_handler_t.__init__(self) 103 | 104 | class collect_vars(custom_action_handler): 105 | def activate(self, ctx): 106 | print('Collecting vars.') 107 | jsonl_file_name = os.path.join(os.environ['OUTPUT_DIR'], 108 | os.environ['PREFIX']) + '.jsonl' 109 | with open(jsonl_file_name, 'w+') as jsonl_file: 110 | with jsonlines.Writer(jsonl_file) as writer: 111 | for ea in Functions(): 112 | try: 113 | info = func(ea) 114 | writer.write(func(ea)) 115 | except (ida_hexrays.DecompilationFailure, ValueError): 116 | continue 117 | print('Vars collected.') 118 | return 1 119 | 120 | def main(): 121 | global renamed_prefix 122 | global varmap 123 | global varnames 124 | renamed_prefix = os.path.join(os.environ['OUTPUT_DIR'], 'functions', 125 | os.environ['PREFIX']) 126 | # Load collected variables 127 | with open(os.environ['COLLECTED_VARS']) as vars_fh: 128 | varmap = pickle.load(vars_fh) 129 | 130 | # Collect decompilation info 131 | cv = collect_vars() 132 | cv.activate(None) 133 | 134 | idaapi.autoWait() 135 | if not idaapi.init_hexrays_plugin(): 136 | idaapi.load_plugin('hexrays') 137 | idaapi.load_plugin('hexx64') 138 | if not idaapi.init_hexrays_plugin(): 139 | print('Unable to load Hex-rays') 140 | else: 141 | print('Hex-rays version %s has been detected' % idaapi.get_hexrays_version()) 142 | main() 143 | ida_pro.qexit(0) 144 | -------------------------------------------------------------------------------- /scripts/decompiler_scripts/util.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import ida_hexrays 3 | import ida_lines 4 | import ida_pro 5 | import json 6 | import re 7 | 8 | UNDEF_ADDR = 0xFFFFFFFFFFFFFFFF 9 | 10 | hexrays_vars = re.compile("^(v|a)[0-9]+$") 11 | 12 | def get_expr_name(expr): 13 | name = expr.print1(None) 14 | name = ida_lines.tag_remove(name) 15 | name = ida_pro.str2user(name) 16 | return name 17 | 18 | class CFuncGraph: 19 | def __init__(self, highlight): 20 | self.items = [] # list of citem_t 21 | self.reverse = dict() # citem_t -> node # 22 | self.succs = [] # list of lists of next nodes 23 | self.preds = [] # list of lists of previous nodes 24 | self.highlight = highlight 25 | 26 | def nsucc(self, n): 27 | return len(self.succs[n]) if self.size() else 0 28 | 29 | def npred(self, n): 30 | return len(self.preds[n]) if self.size() else 0 31 | 32 | def succ(self, n, i): 33 | return self.succs[n][i] 34 | 35 | def pred(self, n, i): 36 | return self.preds[n][i] 37 | 38 | def size(self): 39 | return len(self.preds) 40 | 41 | def add_node(self): 42 | n = self.size() 43 | 44 | def resize(array, new_size): 45 | if new_size > len(array): 46 | while len(array) < new_size: 47 | array.append([]) 48 | else: 49 | array = array[:new_size] 50 | return array 51 | 52 | self.preds = resize(self.preds, n+1) 53 | self.succs = resize(self.succs, n+1) 54 | return n 55 | 56 | def add_edge(self, x, y): 57 | self.preds[y].append(x) 58 | self.succs[x].append(y) 59 | 60 | def get_pred_ea(self, n): 61 | if self.npred(n) == 1: 62 | pred = self.pred(n, 0) 63 | pred_item = self.items[pred] 64 | if pred_item.ea == UNDEF_ADDR: 65 | return self.get_pred_ea(pred) 66 | return pred_item.ea 67 | return UNDEF_ADDR 68 | 69 | def get_node_label(self, n): 70 | item = self.items[n] 71 | op = item.op 72 | insn = item.cinsn 73 | expr = item.cexpr 74 | parts = [ida_hexrays.get_ctype_name(op)] 75 | if op == ida_hexrays.cot_ptr: 76 | parts.append(".%d" % expr.ptrsize) 77 | elif op == ida_hexrays.cot_memptr: 78 | parts.append(".%d (m=%d)" % (expr.ptrsize, expr.m)) 79 | elif op == ida_hexrays.cot_memref: 80 | parts.append(" (m=%d)" % (expr.m,)) 81 | elif op in [ 82 | ida_hexrays.cot_obj, 83 | ida_hexrays.cot_var]: 84 | name = get_expr_name(expr) 85 | parts.append(".%d %s" % (expr.refwidth, name)) 86 | elif op in [ 87 | ida_hexrays.cot_num, 88 | ida_hexrays.cot_helper, 89 | ida_hexrays.cot_str]: 90 | name = get_expr_name(expr) 91 | parts.append(" %s" % (name,)) 92 | elif op == ida_hexrays.cit_goto: 93 | parts.append(" LABEL_%d" % insn.cgoto.label_num) 94 | elif op == ida_hexrays.cit_asm: 95 | parts.append("") 96 | # parts.append(" %a.%d" % ()) 97 | parts.append(", ") 98 | parts.append("ea: %08X" % item.ea) 99 | if item.is_expr() and not expr is None and not expr.type.empty(): 100 | parts.append(", ") 101 | tstr = expr.type._print() 102 | parts.append(tstr if tstr else "?") 103 | return "".join(parts) 104 | 105 | 106 | # Puts the tree in a format suitable for JSON 107 | def json_tree(self, n): 108 | # Each node has a unique ID 109 | node_info = { "node_id" : n } 110 | item = self.items[n] 111 | # This is the type of ctree node 112 | node_info["node_type"] = ida_hexrays.get_ctype_name(item.op) 113 | # This is the type of the data (in C-land) 114 | if item.is_expr() and not item.cexpr.type.empty(): 115 | node_info["type"] = item.cexpr.type._print() 116 | node_info["address"] = "%08X" % item.ea 117 | if item.ea == UNDEF_ADDR: 118 | node_info["parent_address"] = "%08X" % self.get_pred_ea(n) 119 | # Specific info for different node types 120 | if item.op == ida_hexrays.cot_ptr: 121 | node_info["pointer_size"] = item.cexpr.ptrsize 122 | elif item.op == ida_hexrays.cot_memptr: 123 | node_info.update({ 124 | "pointer_size": item.cexpr.ptrsize, 125 | "m": item.cexpr.m 126 | }) 127 | elif item.op == ida_hexrays.cot_memref: 128 | node_info["m"] = item.cexpr.m 129 | elif item.op == ida_hexrays.cot_obj: 130 | node_info.update({ 131 | "name": get_expr_name(item.cexpr), 132 | "ref_width": item.cexpr.refwidth 133 | }) 134 | elif item.op == ida_hexrays.cot_var: 135 | _, var_id, old_name, new_name = get_expr_name(item.cexpr).split("@@") 136 | node_info.update({ 137 | "var_id": var_id, 138 | "old_name": old_name, 139 | "new_name": new_name, 140 | "ref_width": item.cexpr.refwidth 141 | }) 142 | elif item.op in [ida_hexrays.cot_num, 143 | ida_hexrays.cot_str, 144 | ida_hexrays.cot_helper]: 145 | node_info["name"] = get_expr_name(item.cexpr) 146 | # Get info for children of this node 147 | successors = [] 148 | x_successor = None 149 | y_successor = None 150 | z_successor = None 151 | for i in xrange(self.nsucc(n)): 152 | successors.append(self.succ(n, i)) 153 | successor_trees = [] 154 | if item.is_expr(): 155 | if item.x: 156 | for s in successors: 157 | if item.x == self.items[s]: 158 | successors.remove(s) 159 | x_successor = self.json_tree(s) 160 | break 161 | if item.y: 162 | for s in successors: 163 | if item.y == self.items[s]: 164 | successors.remove(s) 165 | y_successor = self.json_tree(s) 166 | break 167 | if item.z: 168 | for s in successors: 169 | if item.z == self.items[s]: 170 | successors.remove(s) 171 | z_successor = self.json_tree(s) 172 | break 173 | if successors: 174 | for succ in successors: 175 | successor_trees.append(self.json_tree(succ)) 176 | if successor_trees != []: 177 | node_info["children"] = successor_trees 178 | if x_successor: 179 | node_info["x"] = x_successor 180 | if y_successor: 181 | node_info["y"] = y_successor 182 | if z_successor: 183 | node_info["z"] = z_successor 184 | return node_info 185 | 186 | def print_tree(self): 187 | tree = json.dumps(self.json_tree(0)) 188 | print(tree) 189 | 190 | def dump(self): 191 | print("%d items:" % len(self.items)) 192 | for idx, item in enumerate(self.items): 193 | print("\t%d: %s" % (idx, ida_hexrays.get_ctype_name(item.op))) 194 | # print("\t%d: %s" % (idx, self.get_node_label(idx))) 195 | 196 | print("succs:") 197 | for parent, s in enumerate(self.succs): 198 | print("\t%d: %s" % (parent, s)) 199 | 200 | print("preds:") 201 | for child, p in enumerate(self.preds): 202 | print("\t%d: %s" % (child, p)) 203 | 204 | 205 | class GraphBuilder(ida_hexrays.ctree_parentee_t): 206 | def __init__(self, cg): 207 | ida_hexrays.ctree_parentee_t.__init__(self) 208 | self.cg = cg 209 | 210 | 211 | def add_node(self, i): 212 | n = self.cg.add_node() 213 | if n <= len(self.cg.items): 214 | self.cg.items.append(i) 215 | self.cg.items[n] = i 216 | self.cg.reverse[i] = n 217 | return n 218 | 219 | def process(self, i): 220 | n = self.add_node(i) 221 | if n < 0: 222 | return n 223 | if len(self.parents) > 1: 224 | lp = self.parents.back().obj_id 225 | for k, v in self.cg.reverse.items(): 226 | if k.obj_id == lp: 227 | p = v 228 | break 229 | self.cg.add_edge(p, n) 230 | return 0 231 | 232 | def visit_insn(self, i): 233 | return self.process(i) 234 | 235 | def visit_expr(self, e): 236 | return self.process(e) 237 | -------------------------------------------------------------------------------- /scripts/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function __setup_bashrc { 4 | { 5 | echo "source $CUSTOM_PATH/.commacd.sh" 6 | echo "export COMMACD_SEQSTART=1" 7 | echo "alias ll='ls -alFG'" 8 | } >> "$HOME/.bashrc" 9 | } 10 | 11 | if [[ -z "${LOCAL_USER_ID}" ]]; then 12 | # LOCAL_USER_ID not set, proceed as root. 13 | __setup_bashrc 14 | exec bash -c "$@" 15 | else 16 | # Add local user with specified UID and GID. 17 | USER_ID=${LOCAL_USER_ID} 18 | GROUP_ID=${LOCAL_GROUP_ID} 19 | 20 | echo "Starting with UID: $USER_ID, GID: $GROUP_ID" 21 | GROUP_NAME=$(getent group "$GROUP_ID" | cut -d: -f1) 22 | if [[ -z "${GROUP_NAME}" ]]; then 23 | # The group doesn't exist; create a new one. 24 | groupadd -g "$GROUP_ID" host 25 | GROUP_NAME="host" 26 | fi 27 | useradd --shell /bin/bash -u "$USER_ID" -g "$GROUP_NAME" -o -c "" -m user 28 | export HOME=/home/user 29 | chown -R user /usr/src/ 30 | __setup_bashrc 31 | 32 | # Set a limit on the number of processes one can run (ulimit), before running user scripts. 33 | # 34 | # Note: Using $@ instead of $* when embedded in another word would result in unexpected behavior: 35 | # the first field is merged with the string, and the rest left out as separate fields. 36 | # If the command is `chmod +x ./configure`, it would become: 37 | # > "ulimit -u 256; chmod" "+x" "./configure" 38 | exec /usr/local/bin/gosu user bash -c "ulimit -u 256; $*" 39 | fi 40 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/X11/Intrinsic.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | #include "_X11_fake_defines.h" 4 | #include "_X11_fake_typedefs.h" 5 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/X11/Xlib.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | #include "_X11_fake_defines.h" 4 | #include "_X11_fake_typedefs.h" 5 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/X11/_X11_fake_defines.h: -------------------------------------------------------------------------------- 1 | #ifndef _X11_FAKE_DEFINES_H 2 | #define _X11_FAKE_DEFINES_H 3 | 4 | #define Atom CARD32 5 | #define Bool int 6 | #define KeySym CARD32 7 | #define Pixmap CARD32 8 | #define Time CARD32 9 | #define _XFUNCPROTOBEGIN 10 | #define _XFUNCPROTOEND 11 | #define _Xconst const 12 | 13 | #define _X_RESTRICT_KYWD 14 | #define Cardinal unsigned int 15 | #define Boolean int 16 | #endif 17 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/X11/_X11_fake_typedefs.h: -------------------------------------------------------------------------------- 1 | #ifndef _X11_FAKE_TYPEDEFS_H 2 | #define _X11_FAKE_TYPEDEFS_H 3 | 4 | typedef char* XPointer; 5 | typedef unsigned char KeyCode; 6 | typedef unsigned int CARD32; 7 | typedef unsigned long VisualID; 8 | typedef unsigned long XIMResetState; 9 | typedef unsigned long XID; 10 | typedef XID Window; 11 | typedef XID Colormap; 12 | typedef XID Cursor; 13 | typedef XID Drawable; 14 | typedef void* XtPointer; 15 | typedef XtPointer XtRequestId; 16 | typedef struct Display Display; 17 | typedef struct Screen Screen; 18 | typedef struct Status Status; 19 | typedef struct Visual Visual; 20 | typedef struct Widget *Widget; 21 | typedef struct XColor XColor; 22 | typedef struct XClassHint XClassHint; 23 | typedef struct XEvent XEvent; 24 | typedef struct XFontStruct XFontStruct; 25 | typedef struct XGCValues XGCValues; 26 | typedef struct XKeyEvent XKeyEvent; 27 | typedef struct XKeyPressedEvent XKeyPressedEvent; 28 | typedef struct XPoint XPoint; 29 | typedef struct XRectangle XRectangle; 30 | typedef struct XSelectionRequestEvent XSelectionRequestEvent; 31 | typedef struct XWindowChanges XWindowChanges; 32 | typedef struct _XGC _XCG; 33 | typedef struct _XGC *GC; 34 | typedef struct _XIC *XIC; 35 | typedef struct _XIM *XIM; 36 | typedef struct _XImage XImage; 37 | 38 | typedef int __end_of_fake_libc__; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/_ansi.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/_fake_defines.h: -------------------------------------------------------------------------------- 1 | #ifndef _FAKE_DEFINES_H 2 | #define _FAKE_DEFINES_H 3 | 4 | /* GCC extensions */ 5 | #include "_fake_gcc_ext.h" 6 | 7 | /* Original fake_libc */ 8 | 9 | #define NULL 0 10 | #define BUFSIZ 1024 11 | #define FOPEN_MAX 20 12 | #define FILENAME_MAX 1024 13 | 14 | #ifndef SEEK_SET 15 | #define SEEK_SET 0 /* set file offset to offset */ 16 | #endif 17 | #ifndef SEEK_CUR 18 | #define SEEK_CUR 1 /* set file offset to current plus offset */ 19 | #endif 20 | #ifndef SEEK_END 21 | #define SEEK_END 2 /* set file offset to EOF plus offset */ 22 | #endif 23 | 24 | #define __LITTLE_ENDIAN 1234 25 | #define LITTLE_ENDIAN __LITTLE_ENDIAN 26 | #define __BIG_ENDIAN 4321 27 | #define BIG_ENDIAN __BIG_ENDIAN 28 | #define __BYTE_ORDER __LITTLE_ENDIAN 29 | #define BYTE_ORDER __BYTE_ORDER 30 | 31 | #define EXIT_FAILURE 1 32 | #define EXIT_SUCCESS 0 33 | 34 | #define UCHAR_MAX 255 35 | #define USHRT_MAX 65535 36 | #define UINT_MAX 4294967295U 37 | #define RAND_MAX 32767 38 | #define INT_MAX 32767 39 | 40 | /* C99 inttypes.h defines */ 41 | #define PRId8 "d" 42 | #define PRIi8 "i" 43 | #define PRIo8 "o" 44 | #define PRIu8 "u" 45 | #define PRIx8 "x" 46 | #define PRIX8 "X" 47 | #define PRId16 "d" 48 | #define PRIi16 "i" 49 | #define PRIo16 "o" 50 | #define PRIu16 "u" 51 | #define PRIx16 "x" 52 | #define PRIX16 "X" 53 | #define PRId32 "d" 54 | #define PRIi32 "i" 55 | #define PRIo32 "o" 56 | #define PRIu32 "u" 57 | #define PRIx32 "x" 58 | #define PRIX32 "X" 59 | #define PRId64 "d" 60 | #define PRIi64 "i" 61 | #define PRIo64 "o" 62 | #define PRIu64 "u" 63 | #define PRIx64 "x" 64 | #define PRIX64 "X" 65 | #define PRIdLEAST8 "d" 66 | #define PRIiLEAST8 "i" 67 | #define PRIoLEAST8 "o" 68 | #define PRIuLEAST8 "u" 69 | #define PRIxLEAST8 "x" 70 | #define PRIXLEAST8 "X" 71 | #define PRIdLEAST16 "d" 72 | #define PRIiLEAST16 "i" 73 | #define PRIoLEAST16 "o" 74 | #define PRIuLEAST16 "u" 75 | #define PRIxLEAST16 "x" 76 | #define PRIXLEAST16 "X" 77 | #define PRIdLEAST32 "d" 78 | #define PRIiLEAST32 "i" 79 | #define PRIoLEAST32 "o" 80 | #define PRIuLEAST32 "u" 81 | #define PRIxLEAST32 "x" 82 | #define PRIXLEAST32 "X" 83 | #define PRIdLEAST64 "d" 84 | #define PRIiLEAST64 "i" 85 | #define PRIoLEAST64 "o" 86 | #define PRIuLEAST64 "u" 87 | #define PRIxLEAST64 "x" 88 | #define PRIXLEAST64 "X" 89 | #define PRIdFAST8 "d" 90 | #define PRIiFAST8 "i" 91 | #define PRIoFAST8 "o" 92 | #define PRIuFAST8 "u" 93 | #define PRIxFAST8 "x" 94 | #define PRIXFAST8 "X" 95 | #define PRIdFAST16 "d" 96 | #define PRIiFAST16 "i" 97 | #define PRIoFAST16 "o" 98 | #define PRIuFAST16 "u" 99 | #define PRIxFAST16 "x" 100 | #define PRIXFAST16 "X" 101 | #define PRIdFAST32 "d" 102 | #define PRIiFAST32 "i" 103 | #define PRIoFAST32 "o" 104 | #define PRIuFAST32 "u" 105 | #define PRIxFAST32 "x" 106 | #define PRIXFAST32 "X" 107 | #define PRIdFAST64 "d" 108 | #define PRIiFAST64 "i" 109 | #define PRIoFAST64 "o" 110 | #define PRIuFAST64 "u" 111 | #define PRIxFAST64 "x" 112 | #define PRIXFAST64 "X" 113 | #define PRIdPTR "d" 114 | #define PRIiPTR "i" 115 | #define PRIoPTR "o" 116 | #define PRIuPTR "u" 117 | #define PRIxPTR "x" 118 | #define PRIXPTR "X" 119 | #define PRIdMAX "d" 120 | #define PRIiMAX "i" 121 | #define PRIoMAX "o" 122 | #define PRIuMAX "u" 123 | #define PRIxMAX "x" 124 | #define PRIXMAX "X" 125 | #define SCNd8 "d" 126 | #define SCNi8 "i" 127 | #define SCNo8 "o" 128 | #define SCNu8 "u" 129 | #define SCNx8 "x" 130 | #define SCNd16 "d" 131 | #define SCNi16 "i" 132 | #define SCNo16 "o" 133 | #define SCNu16 "u" 134 | #define SCNx16 "x" 135 | #define SCNd32 "d" 136 | #define SCNi32 "i" 137 | #define SCNo32 "o" 138 | #define SCNu32 "u" 139 | #define SCNx32 "x" 140 | #define SCNd64 "d" 141 | #define SCNi64 "i" 142 | #define SCNo64 "o" 143 | #define SCNu64 "u" 144 | #define SCNx64 "x" 145 | #define SCNdLEAST8 "d" 146 | #define SCNiLEAST8 "i" 147 | #define SCNoLEAST8 "o" 148 | #define SCNuLEAST8 "u" 149 | #define SCNxLEAST8 "x" 150 | #define SCNdLEAST16 "d" 151 | #define SCNiLEAST16 "i" 152 | #define SCNoLEAST16 "o" 153 | #define SCNuLEAST16 "u" 154 | #define SCNxLEAST16 "x" 155 | #define SCNdLEAST32 "d" 156 | #define SCNiLEAST32 "i" 157 | #define SCNoLEAST32 "o" 158 | #define SCNuLEAST32 "u" 159 | #define SCNxLEAST32 "x" 160 | #define SCNdLEAST64 "d" 161 | #define SCNiLEAST64 "i" 162 | #define SCNoLEAST64 "o" 163 | #define SCNuLEAST64 "u" 164 | #define SCNxLEAST64 "x" 165 | #define SCNdFAST8 "d" 166 | #define SCNiFAST8 "i" 167 | #define SCNoFAST8 "o" 168 | #define SCNuFAST8 "u" 169 | #define SCNxFAST8 "x" 170 | #define SCNdFAST16 "d" 171 | #define SCNiFAST16 "i" 172 | #define SCNoFAST16 "o" 173 | #define SCNuFAST16 "u" 174 | #define SCNxFAST16 "x" 175 | #define SCNdFAST32 "d" 176 | #define SCNiFAST32 "i" 177 | #define SCNoFAST32 "o" 178 | #define SCNuFAST32 "u" 179 | #define SCNxFAST32 "x" 180 | #define SCNdFAST64 "d" 181 | #define SCNiFAST64 "i" 182 | #define SCNoFAST64 "o" 183 | #define SCNuFAST64 "u" 184 | #define SCNxFAST64 "x" 185 | #define SCNdPTR "d" 186 | #define SCNiPTR "i" 187 | #define SCNoPTR "o" 188 | #define SCNuPTR "u" 189 | #define SCNxPTR "x" 190 | #define SCNdMAX "d" 191 | #define SCNiMAX "i" 192 | #define SCNoMAX "o" 193 | #define SCNuMAX "u" 194 | #define SCNxMAX "x" 195 | 196 | /* C99 stdbool.h defines */ 197 | #define __bool_true_false_are_defined 1 198 | #define false 0 199 | #define true 1 200 | 201 | /* va_arg macros and type*/ 202 | #define va_start(_ap, _type) __builtin_va_start((_ap)) 203 | #define va_arg(_ap, _type) __builtin_va_arg((_ap)) 204 | #define va_end(_list) 205 | 206 | #endif 207 | 208 | /* Vectors */ 209 | #define __m128 int 210 | #define __m128_u int 211 | #define __m128d int 212 | #define __m128d_u int 213 | #define __m128i int 214 | #define __m128i_u int 215 | #define __m256 int 216 | #define __m256_u int 217 | #define __m256d int 218 | #define __m256d_u int 219 | #define __m256i int 220 | #define __m256i_u int 221 | #define __m512 int 222 | #define __m512_u int 223 | #define __m512d int 224 | #define __m512d_u int 225 | #define __m512i int 226 | #define __m512i_u int 227 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/_fake_gcc_ext.h: -------------------------------------------------------------------------------- 1 | #ifndef _FAKE_GCC_EXT_H 2 | #define _FAKE_GCC_EXT_H 3 | 4 | /* Header file to remove (some) GCC extensions */ 5 | 6 | /* 7 | Get rid of the `__asm__(...)` & `__asm__ volatile(...)` syntax. This does not break volatile type qualifiers. 8 | However, this leaves `__asm__ volatile(...)` to be a single `__asm__`. 9 | Thus, we typedef `__asm__` before the define, so it becomes a single type, which is a valid statement. 10 | */ 11 | typedef int __asm__; 12 | typedef int __asm; 13 | #define __asm__(...) 14 | #define __asm(...) 15 | #define volatile(...) 16 | 17 | #define __attribute__(...) 18 | #define __attribute(...) 19 | 20 | #define __const__ const 21 | #define __const const 22 | #define __inline__ inline 23 | #define __inline inline 24 | #define __restrict__ restrict 25 | #define __restrict restrict 26 | #define __volatile__ volatile 27 | #define __volatile volatile 28 | #define __extension__ 29 | 30 | #endif // _FAKE_GCC_EXT_H 31 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/_fake_typedefs.h: -------------------------------------------------------------------------------- 1 | #ifndef _FAKE_TYPEDEFS_H 2 | #define _FAKE_TYPEDEFS_H 3 | 4 | /* Automatically generated from glibc 2.26 */ 5 | 6 | typedef int __builtin_va_list; 7 | typedef unsigned char __u_char; 8 | typedef unsigned short int __u_short; 9 | typedef unsigned int __u_int; 10 | typedef unsigned long int __u_long; 11 | typedef signed char __int8_t; 12 | typedef unsigned char __uint8_t; 13 | typedef signed short int __int16_t; 14 | typedef unsigned short int __uint16_t; 15 | typedef signed int __int32_t; 16 | typedef unsigned int __uint32_t; 17 | typedef signed long int __int64_t; 18 | typedef unsigned long int __uint64_t; 19 | typedef long int __quad_t; 20 | typedef unsigned long int __u_quad_t; 21 | typedef unsigned long int __dev_t; 22 | typedef unsigned int __uid_t; 23 | typedef unsigned int __gid_t; 24 | typedef unsigned long int __ino_t; 25 | typedef unsigned long int __ino64_t; 26 | typedef unsigned int __mode_t; 27 | typedef unsigned long int __nlink_t; 28 | typedef long int __off_t; 29 | typedef long int __off64_t; 30 | typedef int __pid_t; 31 | typedef struct 32 | { 33 | int __val[2]; 34 | } __fsid_t; 35 | typedef long int __clock_t; 36 | typedef unsigned long int __rlim_t; 37 | typedef unsigned long int __rlim64_t; 38 | typedef unsigned int __id_t; 39 | typedef long int __time_t; 40 | typedef unsigned int __useconds_t; 41 | typedef long int __suseconds_t; 42 | typedef int __daddr_t; 43 | typedef int __key_t; 44 | typedef int __clockid_t; 45 | typedef void *__timer_t; 46 | typedef long int __blksize_t; 47 | typedef long int __blkcnt_t; 48 | typedef long int __blkcnt64_t; 49 | typedef unsigned long int __fsblkcnt_t; 50 | typedef unsigned long int __fsblkcnt64_t; 51 | typedef unsigned long int __fsfilcnt_t; 52 | typedef unsigned long int __fsfilcnt64_t; 53 | typedef long int __fsword_t; 54 | typedef long int __ssize_t; 55 | typedef long int __syscall_slong_t; 56 | typedef unsigned long int __syscall_ulong_t; 57 | typedef __off64_t __loff_t; 58 | typedef __quad_t *__qaddr_t; 59 | typedef char *__caddr_t; 60 | typedef long int __intptr_t; 61 | typedef unsigned int __socklen_t; 62 | typedef __u_char u_char; 63 | typedef __u_short u_short; 64 | typedef __u_int u_int; 65 | typedef __u_long u_long; 66 | typedef __quad_t quad_t; 67 | typedef __u_quad_t u_quad_t; 68 | typedef __fsid_t fsid_t; 69 | typedef __loff_t loff_t; 70 | typedef __ino_t ino_t; 71 | typedef __dev_t dev_t; 72 | typedef __gid_t gid_t; 73 | typedef __mode_t mode_t; 74 | typedef __nlink_t nlink_t; 75 | typedef __uid_t uid_t; 76 | typedef __off_t off_t; 77 | typedef __pid_t pid_t; 78 | typedef __id_t id_t; 79 | typedef __ssize_t ssize_t; 80 | typedef __daddr_t daddr_t; 81 | typedef __caddr_t caddr_t; 82 | typedef __key_t key_t; 83 | typedef __clock_t clock_t; 84 | typedef __time_t time_t; 85 | typedef __clockid_t clockid_t; 86 | typedef __timer_t timer_t; 87 | typedef long unsigned int size_t; 88 | typedef unsigned long int ulong; 89 | typedef unsigned short int ushort; 90 | typedef unsigned int uint; 91 | typedef int int8_t; 92 | typedef int int16_t; 93 | typedef int int32_t; 94 | typedef int int64_t; 95 | typedef unsigned int u_int8_t; 96 | typedef unsigned int u_int16_t; 97 | typedef unsigned int u_int32_t; 98 | typedef unsigned int u_int64_t; 99 | typedef int register_t; 100 | typedef int __sig_atomic_t; 101 | typedef struct 102 | { 103 | unsigned long int __val[1024 / (8 * (sizeof(unsigned long int)))]; 104 | } __sigset_t; 105 | typedef __sigset_t sigset_t; 106 | typedef __suseconds_t suseconds_t; 107 | typedef long int __fd_mask; 108 | typedef struct 109 | { 110 | __fd_mask __fds_bits[1024 / (8 * ((int) (sizeof(__fd_mask))))]; 111 | } fd_set; 112 | typedef __fd_mask fd_mask; 113 | typedef __blksize_t blksize_t; 114 | typedef __blkcnt_t blkcnt_t; 115 | typedef __fsblkcnt_t fsblkcnt_t; 116 | typedef __fsfilcnt_t fsfilcnt_t; 117 | typedef unsigned long int pthread_t; 118 | typedef union pthread_attr_t pthread_attr_t; 119 | typedef struct __pthread_internal_list 120 | { 121 | struct __pthread_internal_list *__prev; 122 | struct __pthread_internal_list *__next; 123 | } __pthread_list_t; 124 | typedef union 125 | { 126 | struct __pthread_mutex_s 127 | { 128 | int __lock; 129 | unsigned int __count; 130 | int __owner; 131 | unsigned int __nusers; 132 | int __kind; 133 | short __spins; 134 | short __elision; 135 | __pthread_list_t __list; 136 | } __data; 137 | char __size[40]; 138 | long int __align; 139 | } pthread_mutex_t; 140 | typedef union 141 | { 142 | char __size[4]; 143 | int __align; 144 | } pthread_mutexattr_t; 145 | typedef union 146 | { 147 | struct 148 | { 149 | int __lock; 150 | unsigned int __futex; 151 | unsigned long long int __total_seq; 152 | unsigned long long int __wakeup_seq; 153 | unsigned long long int __woken_seq; 154 | void *__mutex; 155 | unsigned int __nwaiters; 156 | unsigned int __broadcast_seq; 157 | } __data; 158 | char __size[48]; 159 | long long int __align; 160 | } pthread_cond_t; 161 | typedef union 162 | { 163 | char __size[4]; 164 | int __align; 165 | } pthread_condattr_t; 166 | typedef unsigned int pthread_key_t; 167 | typedef int pthread_once_t; 168 | typedef union 169 | { 170 | struct 171 | { 172 | int __lock; 173 | unsigned int __nr_readers; 174 | unsigned int __readers_wakeup; 175 | unsigned int __writer_wakeup; 176 | unsigned int __nr_readers_queued; 177 | unsigned int __nr_writers_queued; 178 | int __writer; 179 | int __shared; 180 | signed char __rwelision; 181 | unsigned char __pad1[7]; 182 | unsigned long int __pad2; 183 | unsigned int __flags; 184 | } __data; 185 | char __size[56]; 186 | long int __align; 187 | } pthread_rwlock_t; 188 | typedef union 189 | { 190 | char __size[8]; 191 | long int __align; 192 | } pthread_rwlockattr_t; 193 | typedef volatile int pthread_spinlock_t; 194 | typedef union 195 | { 196 | char __size[32]; 197 | long int __align; 198 | } pthread_barrier_t; 199 | typedef union 200 | { 201 | char __size[4]; 202 | int __align; 203 | } pthread_barrierattr_t; 204 | typedef union sigval 205 | { 206 | int sival_int; 207 | void *sival_ptr; 208 | } sigval_t; 209 | typedef struct sigevent 210 | { 211 | sigval_t sigev_value; 212 | int sigev_signo; 213 | int sigev_notify; 214 | union 215 | { 216 | int _pad[(64 / (sizeof(int))) - 4]; 217 | __pid_t _tid; 218 | struct 219 | { 220 | void (*_function)(sigval_t); 221 | pthread_attr_t *_attribute; 222 | } _sigev_thread; 223 | } _sigev_un; 224 | } sigevent_t; 225 | typedef unsigned char uint8_t; 226 | typedef unsigned short int uint16_t; 227 | typedef unsigned int uint32_t; 228 | typedef unsigned long int uint64_t; 229 | typedef signed char int_least8_t; 230 | typedef short int int_least16_t; 231 | typedef int int_least32_t; 232 | typedef long int int_least64_t; 233 | typedef unsigned char uint_least8_t; 234 | typedef unsigned short int uint_least16_t; 235 | typedef unsigned int uint_least32_t; 236 | typedef unsigned long int uint_least64_t; 237 | typedef signed char int_fast8_t; 238 | typedef long int int_fast16_t; 239 | typedef long int int_fast32_t; 240 | typedef long int int_fast64_t; 241 | typedef unsigned char uint_fast8_t; 242 | typedef unsigned long int uint_fast16_t; 243 | typedef unsigned long int uint_fast32_t; 244 | typedef unsigned long int uint_fast64_t; 245 | typedef long int intptr_t; 246 | typedef unsigned long int uintptr_t; 247 | typedef long int intmax_t; 248 | typedef unsigned long int uintmax_t; 249 | typedef __socklen_t socklen_t; 250 | typedef unsigned short int sa_family_t; 251 | typedef uint32_t in_addr_t; 252 | typedef uint16_t in_port_t; 253 | typedef struct __locale_struct 254 | { 255 | struct __locale_data *__locales[13]; 256 | const unsigned short int *__ctype_b; 257 | const int *__ctype_tolower; 258 | const int *__ctype_toupper; 259 | const char *__names[13]; 260 | } *__locale_t; 261 | typedef __locale_t locale_t; 262 | typedef struct __dirstream DIR; 263 | typedef unsigned short int fexcept_t; 264 | typedef struct 265 | { 266 | unsigned short int __control_word; 267 | unsigned short int __glibc_reserved1; 268 | unsigned short int __status_word; 269 | unsigned short int __glibc_reserved2; 270 | unsigned short int __tags; 271 | unsigned short int __glibc_reserved3; 272 | unsigned int __eip; 273 | unsigned short int __cs_selector; 274 | unsigned int __opcode : 11; 275 | unsigned int __glibc_reserved4 : 5; 276 | unsigned int __data_offset; 277 | unsigned short int __data_selector; 278 | unsigned short int __glibc_reserved5; 279 | unsigned int __mxcsr; 280 | } fenv_t; 281 | typedef int (*__ftw_func_t)(const char *__filename, const struct stat *__status, int __flag); 282 | typedef struct 283 | { 284 | size_t gl_pathc; 285 | char **gl_pathv; 286 | size_t gl_offs; 287 | int gl_flags; 288 | void (*gl_closedir)(void *); 289 | void *(*gl_readdir)(void *); 290 | void *(*gl_opendir)(const char *); 291 | int (*gl_lstat)(const char *, void *); 292 | int (*gl_stat)(const char *, void *); 293 | } glob_t; 294 | typedef struct _IO_FILE FILE; 295 | typedef void *iconv_t; 296 | typedef int __gwchar_t; 297 | typedef struct 298 | { 299 | long int quot; 300 | long int rem; 301 | } imaxdiv_t; 302 | typedef void *nl_catd; 303 | typedef int nl_item; 304 | typedef float float_t; 305 | typedef double double_t; 306 | typedef enum 307 | { 308 | _IEEE_ = -1, 309 | _SVID_, 310 | _XOPEN_, 311 | _POSIX_, 312 | _ISOC_ 313 | } _LIB_VERSION_TYPE; 314 | typedef int mqd_t; 315 | typedef u_int32_t tcp_seq; 316 | typedef unsigned long int nfds_t; 317 | typedef unsigned long int __cpu_mask; 318 | typedef struct 319 | { 320 | __cpu_mask __bits[1024 / (8 * (sizeof(__cpu_mask)))]; 321 | } cpu_set_t; 322 | typedef long int __jmp_buf[8]; 323 | typedef struct 324 | { 325 | struct 326 | { 327 | __jmp_buf __cancel_jmp_buf; 328 | int __mask_was_saved; 329 | } __cancel_jmp_buf[1]; 330 | void *__pad[4]; 331 | } __pthread_unwind_buf_t; 332 | typedef long int s_reg_t; 333 | typedef unsigned long int active_reg_t; 334 | typedef unsigned long int reg_syntax_t; 335 | typedef enum 336 | { 337 | REG_ENOSYS = -1, 338 | REG_NOERROR = 0, 339 | REG_NOMATCH, 340 | REG_BADPAT, 341 | REG_ECOLLATE, 342 | REG_ECTYPE, 343 | REG_EESCAPE, 344 | REG_ESUBREG, 345 | REG_EBRACK, 346 | REG_EPAREN, 347 | REG_EBRACE, 348 | REG_BADBR, 349 | REG_ERANGE, 350 | REG_ESPACE, 351 | REG_BADRPT, 352 | REG_EEND, 353 | REG_ESIZE, 354 | REG_ERPAREN 355 | } reg_errcode_t; 356 | typedef struct re_pattern_buffer regex_t; 357 | typedef int regoff_t; 358 | typedef struct 359 | { 360 | regoff_t rm_so; 361 | regoff_t rm_eo; 362 | } regmatch_t; 363 | typedef int (*__compar_fn_t)(const void *, const void *); 364 | typedef enum 365 | { 366 | FIND, 367 | ENTER 368 | } ACTION; 369 | typedef struct entry 370 | { 371 | char *key; 372 | void *data; 373 | } ENTRY; 374 | typedef enum 375 | { 376 | preorder, 377 | postorder, 378 | endorder, 379 | leaf 380 | } VISIT; 381 | typedef void (*__action_fn_t)(const void *__nodep, VISIT __value, int __level); 382 | typedef union 383 | { 384 | char __size[32]; 385 | long int __align; 386 | } sem_t; 387 | typedef struct __jmp_buf_tag jmp_buf[1]; 388 | typedef struct __jmp_buf_tag sigjmp_buf[1]; 389 | typedef __sig_atomic_t sig_atomic_t; 390 | typedef __clock_t __sigchld_clock_t; 391 | typedef struct 392 | { 393 | int si_signo; 394 | int si_errno; 395 | int si_code; 396 | union 397 | { 398 | int _pad[(128 / (sizeof(int))) - 4]; 399 | struct 400 | { 401 | __pid_t si_pid; 402 | __uid_t si_uid; 403 | } _kill; 404 | struct 405 | { 406 | int si_tid; 407 | int si_overrun; 408 | sigval_t si_sigval; 409 | } _timer; 410 | struct 411 | { 412 | __pid_t si_pid; 413 | __uid_t si_uid; 414 | sigval_t si_sigval; 415 | } _rt; 416 | struct 417 | { 418 | __pid_t si_pid; 419 | __uid_t si_uid; 420 | int si_status; 421 | __sigchld_clock_t si_utime; 422 | __sigchld_clock_t si_stime; 423 | } _sigchld; 424 | struct 425 | { 426 | void *si_addr; 427 | short int si_addr_lsb; 428 | struct 429 | { 430 | void *_lower; 431 | void *_upper; 432 | } si_addr_bnd; 433 | } _sigfault; 434 | struct 435 | { 436 | long int si_band; 437 | int si_fd; 438 | } _sigpoll; 439 | struct 440 | { 441 | void *_call_addr; 442 | int _syscall; 443 | unsigned int _arch; 444 | } _sigsys; 445 | } _sifields; 446 | } siginfo_t; 447 | typedef void (*__sighandler_t)(int); 448 | typedef __sighandler_t sig_t; 449 | typedef struct sigaltstack 450 | { 451 | void *ss_sp; 452 | int ss_flags; 453 | size_t ss_size; 454 | } stack_t; 455 | typedef long long int greg_t; 456 | typedef greg_t gregset_t[23]; 457 | typedef struct _libc_fpstate *fpregset_t; 458 | typedef struct 459 | { 460 | gregset_t gregs; 461 | fpregset_t fpregs; 462 | unsigned long long __reserved1[8]; 463 | } mcontext_t; 464 | typedef struct ucontext 465 | { 466 | unsigned long int uc_flags; 467 | struct ucontext *uc_link; 468 | stack_t uc_stack; 469 | mcontext_t uc_mcontext; 470 | __sigset_t uc_sigmask; 471 | struct _libc_fpstate __fpregs_mem; 472 | } ucontext_t; 473 | typedef struct 474 | { 475 | short int __flags; 476 | pid_t __pgrp; 477 | sigset_t __sd; 478 | sigset_t __ss; 479 | struct sched_param __sp; 480 | int __policy; 481 | int __pad[16]; 482 | } posix_spawnattr_t; 483 | typedef struct 484 | { 485 | int __allocated; 486 | int __used; 487 | struct __spawn_action *__actions; 488 | int __pad[16]; 489 | } posix_spawn_file_actions_t; 490 | typedef __builtin_va_list __gnuc_va_list; 491 | typedef __gnuc_va_list va_list; 492 | typedef long int ptrdiff_t; 493 | typedef int wchar_t; 494 | typedef struct 495 | { 496 | long long __max_align_ll; 497 | long double __max_align_ld; 498 | } max_align_t; 499 | typedef struct _IO_FILE __FILE; 500 | typedef struct 501 | { 502 | int __count; 503 | union 504 | { 505 | unsigned int __wch; 506 | char __wchb[4]; 507 | } __value; 508 | } __mbstate_t; 509 | typedef struct 510 | { 511 | __off_t __pos; 512 | __mbstate_t __state; 513 | } _G_fpos_t; 514 | typedef struct 515 | { 516 | __off64_t __pos; 517 | __mbstate_t __state; 518 | } _G_fpos64_t; 519 | typedef void _IO_lock_t; 520 | typedef struct _IO_FILE _IO_FILE; 521 | typedef __ssize_t __io_read_fn(void *__cookie, char *__buf, size_t __nbytes); 522 | typedef __ssize_t __io_write_fn(void *__cookie, const char *__buf, size_t __n); 523 | typedef int __io_seek_fn(void *__cookie, __off64_t *__pos, int __w); 524 | typedef int __io_close_fn(void *__cookie); 525 | typedef _G_fpos_t fpos_t; 526 | typedef enum 527 | { 528 | P_ALL, 529 | P_PID, 530 | P_PGID 531 | } idtype_t; 532 | typedef union 533 | { 534 | union wait *__uptr; 535 | int *__iptr; 536 | } __WAIT_STATUS; 537 | typedef struct 538 | { 539 | int quot; 540 | int rem; 541 | } div_t; 542 | typedef struct 543 | { 544 | long int quot; 545 | long int rem; 546 | } ldiv_t; 547 | typedef struct 548 | { 549 | long long int quot; 550 | long long int rem; 551 | } lldiv_t; 552 | typedef int __t_scalar_t; 553 | typedef unsigned int __t_uscalar_t; 554 | typedef __t_scalar_t t_scalar_t; 555 | typedef __t_uscalar_t t_uscalar_t; 556 | typedef int __ipc_pid_t; 557 | typedef __syscall_ulong_t msgqnum_t; 558 | typedef __syscall_ulong_t msglen_t; 559 | typedef __rlim_t rlim_t; 560 | typedef int __rlimit_resource_t; 561 | typedef int __rusage_who_t; 562 | typedef int __priority_which_t; 563 | typedef __syscall_ulong_t shmatt_t; 564 | typedef struct timezone *__timezone_ptr_t; 565 | typedef int __itimer_which_t; 566 | typedef unsigned char cc_t; 567 | typedef unsigned int speed_t; 568 | typedef unsigned int tcflag_t; 569 | typedef __useconds_t useconds_t; 570 | typedef unsigned int wint_t; 571 | typedef __mbstate_t mbstate_t; 572 | typedef unsigned long int wctype_t; 573 | typedef const __int32_t *wctrans_t; 574 | typedef struct 575 | { 576 | size_t we_wordc; 577 | char **we_wordv; 578 | size_t we_offs; 579 | } wordexp_t; 580 | 581 | /* Left-overs from original fake_libc */ 582 | 583 | typedef int __int_least16_t; 584 | typedef int __uint_least16_t; 585 | typedef int __int_least32_t; 586 | typedef int __uint_least32_t; 587 | typedef int __s8; 588 | typedef int __u8; 589 | typedef int __s16; 590 | typedef int __u16; 591 | typedef int __s32; 592 | typedef int __u32; 593 | typedef int __s64; 594 | typedef int __u64; 595 | typedef int _LOCK_T; 596 | typedef int _LOCK_RECURSIVE_T; 597 | typedef int _flock_t; 598 | typedef int _iconv_t; 599 | typedef int __ULong; 600 | typedef int _types_fd_set; 601 | typedef int cookie_read_function_t; 602 | typedef int cookie_write_function_t; 603 | typedef int cookie_seek_function_t; 604 | typedef int cookie_close_function_t; 605 | typedef int cookie_io_functions_t; 606 | typedef int _sig_func_ptr; 607 | typedef int __tzrule_type; 608 | typedef int __tzinfo_type; 609 | typedef int z_stream; 610 | typedef _Bool bool; 611 | typedef void* MirEGLNativeWindowType; 612 | typedef void* MirEGLNativeDisplayType; 613 | typedef struct MirConnection MirConnection; 614 | typedef struct MirSurface MirSurface; 615 | typedef struct MirSurfaceSpec MirSurfaceSpec; 616 | typedef struct MirScreencast MirScreencast; 617 | typedef struct MirPromptSession MirPromptSession; 618 | typedef struct MirBufferStream MirBufferStream; 619 | typedef struct MirPersistentId MirPersistentId; 620 | typedef struct MirBlob MirBlob; 621 | typedef struct MirDisplayConfig MirDisplayConfig; 622 | typedef struct xcb_connection_t xcb_connection_t; 623 | typedef uint32_t xcb_window_t; 624 | typedef uint32_t xcb_visualid_t; 625 | 626 | typedef int __end_of_fake_libc__; 627 | 628 | #endif 629 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/_syslist.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/aio.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/alloca.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ar.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/argz.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/arpa/inet.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/asm-generic/int-ll64.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/assert.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/complex.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/cpio.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ctype.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/dirent.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/dlfcn.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/emmintrin.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/endian.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/envz.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/errno.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/fastmath.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/fcntl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/features.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/fenv.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/float.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/fmtmsg.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/fnmatch.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ftw.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/getopt.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/glob.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/grp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/iconv.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ieeefp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/immintrin.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/inttypes.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/iso646.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/langinfo.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/libgen.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/libintl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/limits.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/linux/socket.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/linux/version.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/locale.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/malloc.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/math.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/mir_toolkit/client_types.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/monetary.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/mqueue.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ndbm.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/net/if.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/netdb.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/netinet/in.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/netinet/tcp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/newlib.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/nl_types.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/openssl/err.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/openssl/evp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/openssl/hmac.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/openssl/ssl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/openssl/x509v3.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/paths.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/poll.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/process.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/pthread.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/pwd.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/reent.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/regdef.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/regex.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sched.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/search.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/semaphore.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/setjmp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/signal.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/smmintrin.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/spawn.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stdarg.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stdbool.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stddef.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stdint.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stdio.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stdlib.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/string.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/strings.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/stropts.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/ioctl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/ipc.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/mman.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/msg.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/poll.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/resource.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/select.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/sem.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/shm.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/socket.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/stat.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/statvfs.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/sysctl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/time.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/times.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/types.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/uio.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/un.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/utsname.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/sys/wait.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/syslog.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/tar.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/termios.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/tgmath.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/time.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/trace.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/ulimit.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/unctrl.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/unistd.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/utime.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/utmp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/utmpx.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/wchar.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/wctype.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/wordexp.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/xcb/xcb.h: -------------------------------------------------------------------------------- 1 | #include "_fake_defines.h" 2 | #include "_fake_typedefs.h" 3 | -------------------------------------------------------------------------------- /scripts/fake_libc_include/zlib.h: -------------------------------------------------------------------------------- 1 | #ifndef ZLIB_H 2 | #define ZLIB_H 3 | 4 | #include "_fake_defines.h" 5 | #include "_fake_typedefs.h" 6 | 7 | typedef int uInt; 8 | typedef int uLong; 9 | #if !defined(__MACTYPES__) 10 | typedef int Byte; 11 | #endif 12 | 13 | typedef int Bytef; 14 | typedef int charf; 15 | typedef int intf; 16 | typedef int uIntf; 17 | typedef int uLongf; 18 | 19 | typedef int voidpc; 20 | typedef int voidpf; 21 | typedef int voidp; 22 | 23 | #if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) 24 | typedef int Z_U4; 25 | #endif 26 | 27 | typedef int z_crc_t; 28 | typedef int z_size_t; 29 | 30 | typedef int alloc_func; 31 | typedef int free_func; 32 | 33 | typedef int __end_of_fake_libc__; 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /scripts/mock_path/batch_make.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import functools 3 | import multiprocessing as mp 4 | import os 5 | import pickle 6 | import queue 7 | import time 8 | from typing import Dict, List, Optional 9 | 10 | import argtyped 11 | import flutes 12 | from argtyped import Switch 13 | 14 | import ghcc 15 | 16 | 17 | class Arguments(argtyped.Arguments): 18 | compile_timeout: int = 900 # wait up to 15 minutes 19 | record_libraries: Switch = False 20 | gcc_override_flags: Optional[str] = None 21 | use_makefile_info_pkl: Switch = False 22 | single_process: Switch = False # useful for debugging 23 | verbose: Switch = False 24 | 25 | 26 | args = Arguments() 27 | 28 | TIMEOUT_TOLERANCE = 5 # allow worker process to run for maximum 5 seconds beyond timeout 29 | REPO_PATH = "/usr/src/repo" 30 | BINARY_PATH = "/usr/src/bin" 31 | 32 | 33 | def compile_makefiles(): 34 | if args.use_makefile_info_pkl: 35 | # Use information from previous compilations. 36 | # This is used when matching decompiled functions to original code. 37 | makefile_info: Dict[str, Dict[str, str]] = {} # make_dir -> (binary_path -> binary_sha256) 38 | with open(os.path.join(BINARY_PATH, "makefiles.pkl"), "rb") as f: 39 | makefile_info = pickle.load(f) 40 | # Convert this back to absolute path... 41 | makefile_info = {os.path.abspath(os.path.join(REPO_PATH, path)): binaries 42 | for path, binaries in makefile_info.items()} 43 | 44 | def check_file_fn(directory: str, file: str) -> bool: 45 | return file in makefile_info[directory] 46 | 47 | def hash_fn(directory: str, path: str) -> str: 48 | return makefile_info[directory][path] 49 | 50 | compile_fn = functools.partial( 51 | ghcc.compile._make_skeleton, make_fn=ghcc.compile._unsafe_make, check_file_fn=check_file_fn) 52 | makefile_dirs = list(makefile_info.keys()) 53 | kwargs = {"compile_fn": compile_fn, "hash_fn": hash_fn} 54 | else: 55 | makefile_dirs = ghcc.find_makefiles(REPO_PATH) 56 | kwargs = {"compile_fn": ghcc.unsafe_make} 57 | 58 | for makefile in ghcc.compile_and_move( 59 | BINARY_PATH, REPO_PATH, makefile_dirs, 60 | compile_timeout=args.compile_timeout, record_libraries=args.record_libraries, 61 | gcc_override_flags=args.gcc_override_flags, **kwargs): 62 | makefile['directory'] = os.path.relpath(makefile['directory'], REPO_PATH) 63 | yield makefile 64 | 65 | 66 | def worker(q: mp.Queue): 67 | for makefile in compile_makefiles(): 68 | q.put(makefile) 69 | 70 | 71 | def read_queue(makefiles: List[ghcc.RepoDB.MakefileEntry], q: 'mp.Queue[ghcc.RepoDB.MakefileEntry]'): 72 | try: 73 | while True: 74 | makefiles.append(q.get_nowait()) 75 | except queue.Empty: 76 | pass # queue empty, wait until next round 77 | except (OSError, ValueError): 78 | pass # data in queue could be corrupt, e.g. if worker process is terminated while enqueueing 79 | 80 | 81 | def main(): 82 | if args.single_process: 83 | makefiles = list(compile_makefiles()) 84 | else: 85 | q = mp.Queue() 86 | process = mp.Process(target=worker, args=(q,)) 87 | process.start() 88 | start_time = time.time() 89 | 90 | makefiles: List[ghcc.RepoDB.MakefileEntry] = [] 91 | while process.is_alive(): 92 | time.sleep(2) # no rush 93 | cur_time = time.time() 94 | # Get stuff out of the queue before possible termination -- otherwise it might deadlock. 95 | # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing-programming, 96 | # the "Joining processes that use queues" section. 97 | read_queue(makefiles, q) 98 | # Note that it's still possible to have deadlocks if the child process pushed new elements into the queue 99 | # after we read and before we terminate. A better solution would be to send a message to the child and ask 100 | # it to quit, and only terminate when it doesn't respond. However, this current implementation is probably 101 | # good enough for most cases. 102 | if cur_time - start_time > args.compile_timeout + TIMEOUT_TOLERANCE: 103 | process.terminate() 104 | print(f"Timeout ({args.compile_timeout}s), killed", flush=True) 105 | ghcc.clean(REPO_PATH) # clean up after the worker process 106 | break 107 | read_queue(makefiles, q) 108 | 109 | flutes.kill_proc_tree(os.getpid(), including_parent=False) # make sure all subprocesses are dead 110 | with open(os.path.join(BINARY_PATH, "log.pkl"), "wb") as f: 111 | pickle.dump(makefiles, f) 112 | flutes.run_command(["chmod", "-R", "g+w", BINARY_PATH]) 113 | flutes.run_command(["chmod", "-R", "g+w", REPO_PATH]) 114 | 115 | 116 | if __name__ == '__main__': 117 | main() 118 | -------------------------------------------------------------------------------- /scripts/mock_path/cc: -------------------------------------------------------------------------------- 1 | gcc -------------------------------------------------------------------------------- /scripts/mock_path/clang: -------------------------------------------------------------------------------- 1 | gcc -------------------------------------------------------------------------------- /scripts/mock_path/gcc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | r"""A fake gcc implementation which records input/output files, adds -O0 flag, and then calls real gcc. 3 | """ 4 | import argparse 5 | import os 6 | import subprocess 7 | import sys 8 | 9 | 10 | def filter_filenames(args): 11 | return [arg for arg in args if arg.endswith('.c') or arg.endswith('.h')] 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('-o') 17 | parser.add_argument('-c', action='store_true') 18 | # Override the optimization level with -O0. 19 | parser.add_argument('-O', nargs='?') 20 | # Record the libraries used. 21 | parser.add_argument('-l', action='append') 22 | # Disable settings to platform-specific code; we only target our platform. 23 | parser.add_argument('-mabi') # ignored 24 | parser.add_argument('-march') # ignored 25 | parser.add_argument('-mtune') # ignored 26 | # Swallow flags that are unnecessary for us. 27 | parser.add_argument('-Wall', action='store_true') 28 | parser.add_argument('-Werror', action='store_true') 29 | parser.add_argument('-Wextra', action='store_true') 30 | # Link time optimization would cause `-g` to be ignored on older versions of GCC. 31 | parser.add_argument('-flto', action='store_true') # ignored 32 | parser.add_argument('-mlittle-endian', action='store_true') # ignored 33 | parser.add_argument('-mapcs', action='store_true') # ignored 34 | parser.add_argument('-mno-sched-prolog', action='store_true') # ignored 35 | args, unknown_args = parser.parse_known_args(sys.argv[1:]) 36 | 37 | # The `-ggdb[level]` flag would preserve macros for certain levels. This would cause problems with `pycparser`. 38 | unknown_args = [arg for arg in unknown_args if not arg.startswith("-ggdb")] 39 | 40 | # Remove the mock path from PATH to find the actual GCC. 41 | cur_path = os.path.abspath(os.path.split(__file__)[0]) 42 | all_paths = [os.path.abspath(path) for path in os.environ["PATH"].split(":")] 43 | env = {b"PATH": ':'.join(path for path in all_paths if path != cur_path).encode('utf-8')} 44 | 45 | override_flags = os.environ.get("MOCK_GCC_OVERRIDE_FLAGS", "").split() 46 | 47 | # Gather library names that the program is linked to. 48 | # Note that this is only called if exception occurs, or GCC fails. This gets rid of libraries that are installed. 49 | def write_libraries(): 50 | log_path = os.environ.get("MOCK_GCC_LIBRARY_LOG", "").strip() 51 | if len(log_path) > 0 and args.l: 52 | with open(log_path, "a") as f: 53 | f.write('\n'.join(args.l) + '\n') 54 | 55 | filenames = filter_filenames(unknown_args) 56 | out_file = None 57 | if args.o: 58 | out_file = args.o 59 | elif args.c: 60 | for f in filenames: 61 | if f.endswith('.c'): 62 | out_file = os.path.splitext(f)[0] + ".o" 63 | if out_file is None: 64 | out_file = 'a.out' 65 | 66 | known_args = [] 67 | if args.c: 68 | known_args.append("-c") 69 | 70 | try: 71 | gcc = "gcc" # "gcc-4.7" 72 | # When multiple -O options are specified, the last one takes precedence. 73 | gcc_args = [gcc] + known_args + unknown_args + ["-o", out_file, "-O0", "-g"] + override_flags 74 | # Add linker options after files that use them. 75 | gcc_args.extend([f"-l{lib}" for lib in (args.l or [])]) 76 | sys.stderr.write("Mock GCC: " + ' '.join(gcc_args) + "\n") 77 | # Redirecting to a pipe could prevent GCC producing colored output. 78 | process = subprocess.Popen(gcc_args, stdout=sys.stdout, stderr=sys.stderr, env=env) 79 | process.wait() 80 | if process.returncode != 0: 81 | write_libraries() 82 | sys.stderr.write(f"Return code: {process.returncode}\n") 83 | exit(process.returncode) 84 | except Exception as e: 85 | write_libraries() 86 | sys.stderr.write(f"Mock GCC: Exception: {e}\n") 87 | exit(2) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /scripts/mock_path/install_libraries.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os 4 | import re 5 | import subprocess 6 | import tempfile 7 | 8 | import flutes 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("file", type=str) # path to libraries log file 12 | parser.add_argument("--skip-to", type=str, default=None) # name of library to skip to 13 | parser.add_argument("--skip-after", type=str, default=None) # name of library to skip to 14 | args = parser.parse_args() 15 | 16 | 17 | def skip_until(elem, iterator): 18 | flag = False 19 | for x in iterator: 20 | if x == elem: 21 | flag = True 22 | if flag: 23 | yield x 24 | 25 | 26 | def skip_after(elem, iterator): 27 | flag = False 28 | for x in iterator: 29 | if flag: 30 | yield x 31 | if x == elem: 32 | flag = True 33 | 34 | 35 | def main(): 36 | with open(args.file) as f: 37 | libraries = f.read().split() 38 | 39 | # Create a dummy .C file for compilation / linking. 40 | tempdir = tempfile.TemporaryDirectory() 41 | src_path = os.path.join(tempdir.name, "main.c") 42 | with open(src_path, "w") as f: 43 | f.write(r""" 44 | #include 45 | int main() { 46 | printf("Hello world!\n"); 47 | return 0; 48 | }""") 49 | 50 | def check_installed(_library: str) -> bool: 51 | try: 52 | _ret = flutes.run_command(["gcc", src_path, f"-l{_library}"], cwd=tempdir.name) 53 | return _ret.return_code == 0 54 | except subprocess.CalledProcessError: 55 | return False 56 | 57 | packages_to_install = [] 58 | flutes.run_command(["apt-get", "update"]) # refresh package index just in case 59 | 60 | if args.skip_to is not None: 61 | libraries = skip_until(args.skip_to, libraries) 62 | elif args.skip_after is not None: 63 | libraries = skip_after(args.skip_after, libraries) 64 | for lib in libraries: 65 | # Check if library is installed -- whether linking succeeds. 66 | if check_installed(lib): 67 | flutes.log(f"'{lib}' is installed", "info") 68 | continue 69 | 70 | # Find the correct package for the name: 71 | # 1. Enumerate different package names. 72 | # 2. Search for the package with `apt search`. 73 | # 3. Install the package. 74 | # 4. Retry compilation to see if it succeeds. 75 | libname = lib.replace("_", "[-_]").replace("+", r"\+") 76 | names = [f"lib{libname}-dev", 77 | f"lib{libname}(-?[0-9.]+)?-dev", 78 | f"lib{libname}(-?[0-9.]+)?", 79 | f"{libname}(-?[0-9.]+)?-dev", 80 | libname] 81 | for name in names: 82 | ret = flutes.run_command(["apt-cache", "--quiet", "search", name], timeout=10, return_output=True) 83 | packages = [line.split()[0] for line in ret.captured_output.decode('utf-8').split('\n') if line] 84 | if len(packages) > 0: 85 | # Find the most matching package name. This is done in a simple way: search the package name using the 86 | # regex, and count the number of characters outside the match. The fewer characters there are, the more 87 | # accurate the match. 88 | regex = re.compile(name) 89 | packages_rank = [] 90 | for p in packages: 91 | match = regex.search(p) 92 | if match is not None: 93 | packages_rank.append((len(p) - (match.end() - match.start()), p)) 94 | if len(packages_rank) == 0: 95 | continue 96 | package = min(packages_rank)[1] 97 | print(f"Trying {package} for {lib}", flush=True) 98 | ret = flutes.run_command(["apt-get", "install", "--dry-run", package], return_output=True) 99 | match = re.search(r"(\d+) newly installed", ret.captured_output.decode('utf-8')) 100 | if match.group(1): 101 | install_count = int(match.group(1)) 102 | if install_count > 50: 103 | # Too much to install, ignore. 104 | continue 105 | try: 106 | # Do not use a timeout, otherwise DPKG will break. 107 | ret = flutes.run_command(["apt-get", "install", "-y", "--no-install-recommends", package]) 108 | except subprocess.CalledProcessError as e: 109 | flutes.log(f"Exception occurred when installing '{package}' for '{lib}': {e}", "error") 110 | continue 111 | if ret.return_code != 0 or not check_installed(lib): 112 | # Although it's not the package we want, do not try to uninstall as it may result in other errors. 113 | continue 114 | 115 | packages_to_install.append(package) 116 | flutes.log(f"'{lib}' resolved to '{package}'", "success") 117 | break 118 | else: 119 | flutes.log(f"Failed to resolve '{lib}'", "error") 120 | 121 | flutes.log(f"Packages to install are:\n" + '\n'.join(packages_to_install), "info") 122 | 123 | 124 | if __name__ == '__main__': 125 | main() 126 | -------------------------------------------------------------------------------- /scripts/mock_path/pkg-config: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | r"""A fake pkg-config implementation which records libraries resolved. 3 | """ 4 | import os 5 | import subprocess 6 | import sys 7 | 8 | 9 | def main(): 10 | # Remove the mock path from PATH to find the actual GCC. 11 | cur_path = os.path.abspath(os.path.split(__file__)[0]) 12 | all_paths = [os.path.abspath(path) for path in os.environ["PATH"].split(":")] 13 | env = {b"PATH": ':'.join(path for path in all_paths if path != cur_path).encode('utf-8')} 14 | 15 | # Gather library names that the program is linked to. Note that this is only called if exception occurs, or 16 | # pkg-config fails. This gets rid of libraries that are installed. 17 | def write_libraries(): 18 | log_path = os.environ.get("MOCK_GCC_LIBRARY_LOG", "").strip() 19 | if len(log_path) > 0: 20 | libs = [arg for arg in sys.argv[1:] if arg[0] != "-"] 21 | if len(libs) > 0: 22 | with open(log_path, "a") as f: 23 | f.write('\n'.join(libs) + '\n') 24 | 25 | command = ["pkg-config"] + sys.argv[1:] 26 | try: 27 | # Redirecting to a pipe could prevent GCC producing colored output. 28 | process = subprocess.Popen(command, stdout=sys.stdout, stderr=sys.stderr, env=env) 29 | process.wait() 30 | if process.returncode != 0: 31 | write_libraries() 32 | sys.stderr.write(f"Mock pkg-config return code: {process.returncode}, command: {' '.join(command)}\n") 33 | exit(process.returncode) 34 | except Exception as e: 35 | write_libraries() 36 | sys.stderr.write(f"Mock pkg-config command: {' '.join(command)}, exception: {e}\n") 37 | exit(2) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /scripts/mock_path/sudo: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | r"""A fake `sudo` implementation that does not prompt for password and just executes the command. 3 | """ 4 | import subprocess 5 | import sys 6 | 7 | if __name__ == '__main__': 8 | exit(subprocess.call(sys.argv[1:])) 9 | -------------------------------------------------------------------------------- /tests/compile_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import tempfile 4 | import unittest 5 | from typing import List 6 | 7 | import flutes 8 | 9 | import ghcc 10 | 11 | 12 | class CompileTest(unittest.TestCase): 13 | def setUp(self) -> None: 14 | self.tempdir = tempfile.TemporaryDirectory() 15 | self.repo_owner = "pjreddie" 16 | self.repo_name = "uwimg" 17 | 18 | # Clone an existing repo. 19 | result = ghcc.clone(self.repo_owner, self.repo_name, clone_folder=self.tempdir.name, skip_if_exists=False) 20 | assert result.success is True, result.captured_output 21 | 22 | self.directory = os.path.join(self.tempdir.name, self.repo_owner, self.repo_name) 23 | self.target_elfs = [ 24 | "libuwimg.so", 25 | "obj/args.o", 26 | "obj/classifier.o", 27 | "obj/data.o", 28 | "obj/filter_image.o", 29 | "obj/flow_image.o", 30 | "obj/harris_image.o", 31 | "obj/image_opencv.o", 32 | "obj/list.o", 33 | "obj/load_image.o", 34 | "obj/main.o", 35 | "obj/matrix.o", 36 | "obj/panorama_image.o", 37 | "obj/process_image.o", 38 | "obj/resize_image.o", 39 | "obj/test.o", 40 | "uwimg", 41 | ] 42 | 43 | def tearDown(self) -> None: 44 | self.tempdir.cleanup() 45 | 46 | def _test_debug_info(self, elf_paths: List[str]) -> None: 47 | # Check if binaries contain debugging information (whether mock GCC works). 48 | for elf in elf_paths: 49 | # NOTE: This doesn't work under macOS. 50 | ret = flutes.run_command(f"objdump --syms {elf} | grep debug | wc -l", return_output=True, shell=True) 51 | assert int(ret.captured_output.decode('utf-8')) > 0 52 | 53 | def _test_compile(self, compile_func) -> None: 54 | # Find Makefiles. 55 | makefiles = ghcc.find_makefiles(self.directory) 56 | self.assertEqual([self.directory], makefiles) 57 | 58 | # Try compile. 59 | result = compile_func(makefiles[0], timeout=15) 60 | assert result.success is True, result.captured_output 61 | assert set(self.target_elfs) == set(result.elf_files), result.captured_output 62 | 63 | elf_paths = [os.path.join(self.directory, elf) for elf in self.target_elfs] 64 | self._test_debug_info(elf_paths) 65 | 66 | def test_compile(self) -> None: 67 | # NOTE: This doesn't work under macOS. 68 | self._test_compile(ghcc.unsafe_make) 69 | 70 | def test_docker_compile(self) -> None: 71 | ghcc.utils.verify_docker_image() 72 | self._test_compile(ghcc.docker_make) 73 | 74 | def test_docker_batch_compile(self) -> None: 75 | ghcc.utils.verify_docker_image() 76 | binary_dir = os.path.join(self.tempdir.name, "_bin") 77 | os.makedirs(binary_dir) 78 | result = ghcc.docker_batch_compile(binary_dir, self.directory, 20, record_libraries=True, user_id=0) 79 | assert len(result) == 1 80 | assert set(self.target_elfs) == set(result[0]["binaries"]) 81 | 82 | elf_paths = [os.path.join(binary_dir, file) for file in result[0]["sha256"]] 83 | self._test_debug_info(elf_paths) 84 | 85 | def test_gcc_library_log(self) -> None: 86 | from ghcc.compile import MOCK_PATH 87 | library_log_path = os.path.join(self.tempdir.name, "libraries.txt") 88 | env = { 89 | "PATH": f"{MOCK_PATH}:{os.environ['PATH']}", 90 | "MOCK_GCC_LIBRARY_LOG": library_log_path, 91 | } 92 | libraries = ["pthread", "m", "opencv", "openmp", "library_with_random_name"] 93 | try: 94 | flutes.run_command( 95 | ["gcc", *[f"-l{lib}" for lib in libraries], "nonexistent_file.c"], env=env) 96 | except subprocess.CalledProcessError: 97 | pass # error must occur because file is nonexistent 98 | assert os.path.exists(library_log_path) 99 | with open(library_log_path) as f: 100 | recorded_libraries = f.read().split() 101 | assert set(libraries) == set(recorded_libraries) 102 | -------------------------------------------------------------------------------- /tests/parse_test.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | from pathlib import Path 4 | from typing import List, Tuple 5 | 6 | import pycparser 7 | from pycparser.c_ast import Node as ASTNode 8 | from pycparser.c_generator import CGenerator 9 | 10 | import ghcc 11 | import match_functions 12 | 13 | EXAMPLE_CODE: List[Tuple[str, List[int]]] = [ # [(code, [type_token_pos])] 14 | (r""" 15 | typedef int some_type; 16 | 17 | char * (*ret_func(void *(*a1)(long long))) (some_type a) 18 | { } 19 | 20 | some_type * (*complete_sym(long long arg)) (long long a1, int a2) { 21 | } 22 | """, [1, 4, 10, 17, 18, 23, 28, 34, 35, 40, 41, 44]), 23 | ] 24 | 25 | 26 | class ParsingTest(unittest.TestCase): 27 | def setUp(self) -> None: 28 | self.tempdir = tempfile.TemporaryDirectory() 29 | self.parser = pycparser.CParser(lexer=ghcc.parse.CachedCLexer) 30 | 31 | def tearDown(self) -> None: 32 | self.tempdir.cleanup() 33 | 34 | def _test_ast_equivalent(self, a: ASTNode, b: ASTNode) -> None: 35 | assert a.attr_names == b.attr_names 36 | for name in a.attr_names: 37 | assert getattr(a, name) == getattr(b, name) 38 | for (name_a, child_a), (name_b, child_b) in zip(a.children(), b.children()): 39 | assert name_a == name_b 40 | self._test_ast_equivalent(child_a, child_b) 41 | 42 | def test_pycparser(self) -> None: 43 | # Ensure we're using the right version of `pycparser` (and that we've cleaned generated tables from previous 44 | # versions) by parsing a string that takes exponential time in versions prior to 2.20. 45 | string = r"\xED\xFF\xFF\xEB\x04\xe0\x2d\xe5\x00\x00\x00\x00\xe0\x83\x22\xe5\xf1\x02\x03\x0e\x00\x00\xa0\xe3" \ 46 | r"\x02\x30\xc1\xe7\x00\x00\x53\xe3" 47 | code = rf'char *s = "{string}";' 48 | ast = self.parser.parse(code) 49 | assert ast.ext[0].init.value == f'"{string}"' 50 | 51 | def test_serialization(self) -> None: 52 | # Clone the `pycparser` repo. 53 | result = ghcc.clone("eliben", "pycparser", clone_folder=self.tempdir.name) 54 | assert result.success 55 | 56 | def _test(code: str): 57 | ast = self.parser.parse(code) 58 | json_dict = ghcc.parse.ast_to_dict(ast) 59 | deserialized_ast = ghcc.parse.dict_to_ast(json_dict) 60 | self._test_ast_equivalent(ast, deserialized_ast) 61 | 62 | for file in (Path(self.tempdir.name) / "eliben" / "pycparser" / "examples" / "c_files").iterdir(): 63 | preprocessed_code = ghcc.parse.preprocess_file(str(file)) 64 | _test(preprocessed_code) 65 | 66 | for code, _ in EXAMPLE_CODE: 67 | preprocessed_code = ghcc.parse.preprocess(code) 68 | _test(preprocessed_code) 69 | 70 | def test_token_pos(self) -> None: 71 | for code, type_token_pos in EXAMPLE_CODE: 72 | preprocessed_code = ghcc.parse.preprocess(code) 73 | ast = self.parser.parse(preprocessed_code) 74 | token_coords = ghcc.parse.convert_to_tokens(preprocessed_code, self.parser.clex.cached_tokens) 75 | json_dict = ghcc.parse.ast_to_dict(ast, token_coords) 76 | found_type_token_pos = set() 77 | 78 | def visit_fn(node: ghcc.parse.JSONNode, _) -> None: 79 | if (node[ghcc.parse.NODE_TYPE_ATTR] == "IdentifierType" and 80 | node[ghcc.parse.TOKEN_POS_ATTR] is not None): 81 | for idx in range(len(node["names"])): 82 | found_type_token_pos.add(node[ghcc.parse.TOKEN_POS_ATTR] + idx) 83 | 84 | ghcc.parse.visit_dict(visit_fn, json_dict) 85 | assert found_type_token_pos == set(type_token_pos) 86 | 87 | 88 | class MatchFunctionsTest(unittest.TestCase): 89 | def setUp(self) -> None: 90 | self.parser = pycparser.CParser(lexer=ghcc.parse.CachedCLexer) 91 | self.generator = CGenerator() 92 | self.lexer = ghcc.parse.LexerWrapper() 93 | 94 | def test_serialize(self) -> None: 95 | for code, _ in EXAMPLE_CODE: 96 | preprocessed_code = ghcc.parse.preprocess(code) 97 | ast = self.parser.parse(preprocessed_code) 98 | token_coords = ghcc.parse.convert_to_tokens(preprocessed_code, self.parser.clex.cached_tokens) 99 | functions = ghcc.parse.FunctionExtractor().find_functions(ast) 100 | for func_ast in functions.values(): 101 | ast_dict, tokens = match_functions.serialize(func_ast, token_coords) 102 | original_code = self.lexer.lex(self.generator.visit(func_ast)) 103 | assert tokens == original_code 104 | -------------------------------------------------------------------------------- /tests/repo_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import unittest 4 | 5 | import ghcc 6 | 7 | 8 | class RepoCloneTest(unittest.TestCase): 9 | def setUp(self) -> None: 10 | self.tempdir = tempfile.TemporaryDirectory() 11 | 12 | def tearDown(self) -> None: 13 | self.tempdir.cleanup() 14 | 15 | def test_clone(self) -> None: 16 | # Clone an existing repo. 17 | result = ghcc.clone("huzecong", "memes", clone_folder=self.tempdir.name) 18 | self.assertTrue(result.success, msg=result.captured_output) 19 | self.assertTrue(os.path.exists(os.path.join(self.tempdir.name, "huzecong", "memes", "Get Memes.scpt")), 20 | msg=result.captured_output) 21 | 22 | # Non-existent repo. 23 | result = ghcc.clone("huzecong", "non-existent-repo", clone_folder=self.tempdir.name) 24 | self.assertFalse(result.success, msg=result.captured_output) 25 | self.assertEqual(ghcc.CloneErrorType.PrivateOrNonexistent, result.error_type, msg=result.captured_output) 26 | 27 | # Timeout 28 | result = ghcc.clone("torvalds", "linux", clone_folder=self.tempdir.name, timeout=1) 29 | self.assertFalse(result.success, msg=result.captured_output) 30 | self.assertEqual(ghcc.CloneErrorType.Timeout, result.error_type, msg=result.captured_output) 31 | --------------------------------------------------------------------------------