├── wllvm ├── __init__.py ├── dragonegg_as │ └── as ├── sanity.py ├── popenwrapper.py ├── wllvmpp.py ├── wllvm.py ├── wfortran.py ├── wparser.py ├── extractor.py ├── logconfig.py ├── filetype.py ├── as.py ├── version.py ├── checker.py ├── compilers.py ├── extraction.py └── arglistfilter.py ├── .gitignore ├── MANIFEST.in ├── img ├── dragonfull.png ├── wllvm_logo.png └── dragon128x128.png ├── test ├── test_files │ ├── bar.c │ ├── baz.c │ ├── foo.c │ ├── hello.cc │ ├── main.c │ ├── test1.h │ ├── test1.cpp │ ├── test2.cpp │ └── Makefile ├── test_clang_driver.py ├── test_dragonegg_driver.py └── test_base_driver.py ├── vagrant ├── clang │ ├── login │ └── Vagrantfile └── dragonegg │ ├── login │ └── Vagrantfile ├── .travis ├── apache_clang.sh ├── store.sh ├── apache_dragonegg.sh └── musllvm.sh ├── LICENSE ├── .travis.yml ├── Makefile ├── doc ├── tutorial.md ├── tutorial-ubuntu-16.04.md └── tutorial-freeBSD.md ├── setup.py ├── README.rst ├── README.md └── .pylintrc /wllvm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /wllvm/dragonegg_as/as: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wllvm-as $* 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea 3 | key.cfg 4 | wllvm.egg-info 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include wllvm/dragonegg_as/as 2 | 3 | recursive-exclude test * 4 | exclude doc 5 | -------------------------------------------------------------------------------- /img/dragonfull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/travitch/whole-program-llvm/HEAD/img/dragonfull.png -------------------------------------------------------------------------------- /img/wllvm_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/travitch/whole-program-llvm/HEAD/img/wllvm_logo.png -------------------------------------------------------------------------------- /img/dragon128x128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/travitch/whole-program-llvm/HEAD/img/dragon128x128.png -------------------------------------------------------------------------------- /test/test_files/bar.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | void bar(void){ 5 | fprintf(stderr, "bar\n"); 6 | } 7 | -------------------------------------------------------------------------------- /test/test_files/baz.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | void baz(void){ 5 | fprintf(stderr, "baz\n"); 6 | } 7 | -------------------------------------------------------------------------------- /test/test_files/foo.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | void foo(void){ 5 | fprintf(stderr, "foo\n"); 6 | } 7 | -------------------------------------------------------------------------------- /test/test_files/hello.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | int main(int argc, char** argv){ 5 | 6 | std::cout << "Hello World" << std::endl; 7 | 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /vagrant/clang/login: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | 4 | if [ -a key.cfg ]; then 5 | echo "logging in" 6 | else 7 | vagrant ssh-config > key.cfg 8 | fi 9 | 10 | ssh -Y -F key.cfg default 11 | -------------------------------------------------------------------------------- /vagrant/dragonegg/login: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | 4 | if [ -a key.cfg ]; then 5 | echo "logging in" 6 | else 7 | vagrant ssh-config > key.cfg 8 | fi 9 | 10 | ssh -Y -F key.cfg default 11 | -------------------------------------------------------------------------------- /test/test_files/main.c: -------------------------------------------------------------------------------- 1 | 2 | extern void foo(void); 3 | extern void bar(void); 4 | extern void baz(void); 5 | 6 | int main(int argc, char** argv){ 7 | foo(); 8 | bar(); 9 | baz(); 10 | return 0; 11 | } 12 | -------------------------------------------------------------------------------- /test/test_files/test1.h: -------------------------------------------------------------------------------- 1 | // test1.h 2 | #ifndef TEST1_H_ 3 | #define TEST1_H_ 4 | #include 5 | #include 6 | 7 | extern int glb_ext; 8 | extern pthread_mutex_t m; 9 | 10 | void lock(); 11 | void unlock(); 12 | 13 | #endif 14 | 15 | -------------------------------------------------------------------------------- /test/test_files/test1.cpp: -------------------------------------------------------------------------------- 1 | // test1.cpp 2 | #include "test1.h" 3 | pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; 4 | 5 | int glb_ext = 1; 6 | 7 | void lock() { 8 | pthread_mutex_lock(&m); 9 | } 10 | 11 | void unlock() { 12 | pthread_mutex_unlock(&m); 13 | } 14 | 15 | -------------------------------------------------------------------------------- /wllvm/sanity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This does some simple sanity checks on the configuration. 3 | 4 | It attempts to print informative results of that check. 5 | Hopefully never dumping a python stack trace. 6 | 7 | """ 8 | 9 | import sys 10 | 11 | from .checker import Checker 12 | 13 | def main(): 14 | return Checker().check() 15 | 16 | 17 | if __name__ == '__main__': 18 | sys.exit(main()) 19 | -------------------------------------------------------------------------------- /.travis/apache_clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Make sure we exit if there is a failure 3 | set -e 4 | 5 | 6 | export PATH=/usr/lib/llvm-3.5/bin:${PATH} 7 | export LLVM_COMPILER=clang 8 | export WLLVM_OUTPUT=WARNING 9 | 10 | wllvm-sanity-checker 11 | 12 | wget ${APACHE_URL}httpd-${APACHE_VER}.tar.gz 13 | 14 | tar xfz httpd-${APACHE_VER}.tar.gz 15 | mv httpd-${APACHE_VER} apache_clang 16 | 17 | cd apache_clang 18 | CC=wllvm ./configure 19 | make 20 | extract-bc httpd 21 | 22 | 23 | if [ -s "httpd.bc" ] 24 | then 25 | echo "httpd.bc built." 26 | else 27 | exit 1 28 | fi 29 | -------------------------------------------------------------------------------- /wllvm/popenwrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import pprint 4 | import logging 5 | 6 | # This module provides a wrapper for subprocess.POpen 7 | # that can be used for debugging 8 | 9 | # Internal logger 10 | _logger = logging.getLogger(__name__) 11 | 12 | def Popen(*pargs, **kwargs): 13 | _logger.debug("WLLVM Executing:\n" + pprint.pformat(pargs[0]) + "\nin: " + os.getcwd()) 14 | try: 15 | return subprocess.Popen(*pargs, **kwargs) 16 | except OSError: 17 | _logger.error("WLLVM Failed to execute: %s", pprint.pformat(pargs[0])) 18 | raise 19 | -------------------------------------------------------------------------------- /.travis/store.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Make sure we exit if there is a failure 3 | set -e 4 | 5 | 6 | export PATH=/usr/lib/llvm-3.5/bin:${PATH} 7 | export LLVM_COMPILER=clang 8 | export WLLVM_OUTPUT=WARNING 9 | 10 | wllvm-sanity-checker 11 | 12 | #setup the store so we test that feature as well 13 | export WLLVM_BC_STORE=/tmp/bc 14 | mkdir /tmp/bc 15 | 16 | cd ./test/test_files 17 | make clean 18 | CC=wllvm make one 19 | mv main ../.. 20 | make clean 21 | cd ../.. 22 | extract-bc main 23 | 24 | if [ -s "main.bc" ] 25 | then 26 | echo "main.bc exists." 27 | else 28 | exit 1 29 | fi 30 | -------------------------------------------------------------------------------- /test/test_files/test2.cpp: -------------------------------------------------------------------------------- 1 | // test2.cpp 2 | #include "test1.h" 3 | 4 | using namespace std; 5 | 6 | int glb_test; 7 | 8 | void *Thread1(void* x) { 9 | lock(); 10 | glb_test++; 11 | unlock(); 12 | return nullptr; 13 | } 14 | 15 | void *Thread2(void* x) { 16 | lock(); 17 | glb_test++; 18 | unlock(); 19 | return nullptr; 20 | } 21 | 22 | int main() { 23 | pthread_t t[2]; 24 | pthread_create(&t[0], nullptr, Thread1, nullptr); 25 | pthread_create(&t[1], nullptr, Thread1, nullptr); 26 | pthread_join(t[0], nullptr); 27 | pthread_join(t[1], nullptr); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /vagrant/clang/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure(2) do |config| 5 | 6 | config.vm.box = "ubuntu/trusty64" 7 | 8 | config.vm.provision "shell", inline: <<-SHELL 9 | sudo apt-get update 10 | sudo apt-get install -y python-pip 11 | sudo apt-get install -y llvm-3.5 clang-3.5 12 | sudo pip install wllvm 13 | echo 'export PATH=/usr/lib/llvm-3.5/bin:${PATH}' >> /home/vagrant/.bashrc 14 | echo 'export LLVM_COMPILER=clang' >> /home/vagrant/.bashrc 15 | echo 'export WLLVM_OUTPUT=WARNING' >> /home/vagrant/.bashrc 16 | SHELL 17 | 18 | end 19 | -------------------------------------------------------------------------------- /wllvm/wllvmpp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is a wrapper around the real compiler. 3 | 4 | It first invokes a real compiler to generate an object file. Then it 5 | invokes a bitcode compiler to generate a parallel bitcode file. It 6 | records the location of the bitcode in an ELF section of the object 7 | file so that it can be found later after all of the objects are linked 8 | into a library or executable. 9 | """ 10 | 11 | import sys 12 | 13 | from .compilers import wcompile 14 | 15 | 16 | def main(): 17 | """ The entry point to wllvm++. 18 | """ 19 | return wcompile("wllvm++") 20 | 21 | 22 | if __name__ == '__main__': 23 | sys.exit(main()) 24 | -------------------------------------------------------------------------------- /wllvm/wllvm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is a wrapper around the real compiler. 3 | 4 | It first invokes a real compiler to generate 5 | an object file. Then it invokes a bitcode 6 | compiler to generate a parallel bitcode file. 7 | It records the location of the bitcode in an 8 | ELF section of the object file so that it can be 9 | found later after all of the objects are 10 | linked into a library or executable. 11 | """ 12 | 13 | import sys 14 | 15 | from .compilers import wcompile 16 | 17 | 18 | def main(): 19 | """ The entry point to wllvm. 20 | """ 21 | return wcompile("wllvm") 22 | 23 | 24 | if __name__ == '__main__': 25 | sys.exit(main()) 26 | -------------------------------------------------------------------------------- /wllvm/wfortran.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is a wrapper around the real compiler. 3 | 4 | It first invokes a real compiler to generate 5 | an object file. Then it invokes a bitcode 6 | compiler to generate a parallel bitcode file. 7 | It records the location of the bitcode in an 8 | ELF section of the object file so that it can be 9 | found later after all of the objects are 10 | linked into a library or executable. 11 | """ 12 | 13 | import sys 14 | 15 | from .compilers import wcompile 16 | 17 | 18 | def main(): 19 | """ The entry point to wllvm. 20 | """ 21 | return wcompile("wfortran") 22 | 23 | 24 | if __name__ == '__main__': 25 | sys.exit(main()) 26 | -------------------------------------------------------------------------------- /wllvm/wparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is a wrapper around the real compiler. 3 | 4 | It first invokes a real compiler to generate 5 | an object file. Then it invokes a bitcode 6 | compiler to generate a parallel bitcode file. 7 | It records the location of the bitcode in an 8 | ELF section of the object file so that it can be 9 | found later after all of the objects are 10 | linked into a library or executable. 11 | """ 12 | 13 | import sys 14 | 15 | from .arglistfilter import ArgumentListFilter 16 | 17 | def main(): 18 | cmd = list(sys.argv) 19 | cmd = cmd[1:] 20 | args = ArgumentListFilter(cmd) 21 | args.dump() 22 | return 0 23 | 24 | 25 | if __name__ == '__main__': 26 | sys.exit(main()) 27 | -------------------------------------------------------------------------------- /test/test_clang_driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from test_base_driver import root_directory, BaseDriverTest 5 | 6 | __author__ = 'Benjamin Schubert, ben.c.schubert@gmail.com' 7 | 8 | 9 | 10 | class ClangDriverTest(BaseDriverTest): 11 | """ 12 | Clang driver tester 13 | """ 14 | @property 15 | def env(self): 16 | """ 17 | The different environment variables used by subprocess to compile with clang and wllvm 18 | :return: 19 | """ 20 | env = os.environ.copy() 21 | env["CC"] = "wllvm" 22 | env["CXX"] = "wllvm++" 23 | env["LLVM_COMPILER"] = "clang" 24 | env["PATH"] = "{}:{}".format(root_directory, os.environ["PATH"]) 25 | return env 26 | -------------------------------------------------------------------------------- /.travis/apache_dragonegg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Make sure we exit if there is a failure 3 | set -e 4 | 5 | export dragonegg_disable_version_check=true 6 | 7 | export PATH=/usr/lib/llvm-3.3/bin:${PATH} 8 | export LLVM_COMPILER=dragonegg 9 | export LLVM_GCC_PREFIX=llvm- 10 | export LLVM_DRAGONEGG_PLUGIN=/usr/lib/gcc/x86_64-linux-gnu/4.7/plugin/dragonegg.so 11 | 12 | export WLLVM_OUTPUT=WARNING 13 | 14 | wllvm-sanity-checker 15 | 16 | wget ${APACHE_URL}httpd-${APACHE_VER}.tar.gz 17 | 18 | tar xfz httpd-${APACHE_VER}.tar.gz 19 | mv httpd-${APACHE_VER} apache_dragonegg 20 | cd apache_dragonegg 21 | CC=wllvm ./configure 22 | make 23 | extract-bc httpd 24 | 25 | 26 | if [ -s "httpd.bc" ] 27 | then 28 | echo "httpd.bc built." 29 | else 30 | exit 1 31 | fi 32 | -------------------------------------------------------------------------------- /vagrant/dragonegg/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure(2) do |config| 5 | 6 | config.vm.box = "ubuntu/trusty64" 7 | 8 | config.vm.provision "shell", inline: <<-SHELL 9 | sudo apt-get update 10 | sudo apt-get install -y python-pip 11 | sudo apt-get install -y llvm-3.3 llvm-gcc-4.7 12 | sudo pip install wllvm 13 | echo 'export PATH=/usr/lib/llvm-3.3/bin:${PATH}' >> /home/vagrant/.bashrc 14 | echo 'export LLVM_COMPILER=dragonegg' >> /home/vagrant/.bashrc 15 | echo 'export LLVM_GCC_PREFIX=llvm-' >> /home/vagrant/.bashrc 16 | echo 'export LLVM_DRAGONEGG_PLUGIN=/usr/lib/gcc/x86_64-linux-gnu/4.7/plugin/dragonegg.so' >> /home/vagrant/.bashrc 17 | SHELL 18 | 19 | end 20 | -------------------------------------------------------------------------------- /.travis/musllvm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Make sure we exit if there is a failure 3 | set -e 4 | 5 | 6 | export PATH=/usr/lib/llvm-3.5/bin:${PATH} 7 | export LLVM_COMPILER=clang 8 | export WLLVM_OUTPUT=WARNING 9 | 10 | wllvm-sanity-checker 11 | 12 | #setup the store so we test that feature as well 13 | export WLLVM_BC_STORE=/tmp/bc 14 | mkdir -p /tmp/bc 15 | 16 | git clone https://github.com/SRI-CSL/musllvm.git musllvm 17 | cd musllvm 18 | WLLVM_CONFIGURE_ONLY=1 CC=wllvm ./configure --target=LLVM --build=LLVM 19 | make 20 | extract-bc --bitcode ./lib/libc.a 21 | 22 | if [ -s "./lib/libc.a.bc" ] 23 | then 24 | echo "libc.a.bc exists." 25 | else 26 | exit 1 27 | fi 28 | 29 | #now lets makes sure the store has the bitcode too. 30 | mv ./lib/libc.a . 31 | make clean 32 | extract-bc --bitcode ./libc.a 33 | 34 | if [ -s "./libc.a.bc" ] 35 | then 36 | echo "libc.a.bc exists." 37 | else 38 | exit 1 39 | fi 40 | -------------------------------------------------------------------------------- /test/test_dragonegg_driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from test_base_driver import root_directory, BaseDriverTest 5 | 6 | __author__ = 'Benjamin Schubert, ben.c.schubert@gmail.com' 7 | 8 | 9 | class DragonEggDriverTest(BaseDriverTest): 10 | """ 11 | Dragonegg driver tester 12 | """ 13 | @property 14 | def env(self): 15 | """ 16 | The different environment variables used by subprocess to compile with dragonegg and wllvm 17 | :return: 18 | """ 19 | env = os.environ.copy() 20 | env["CC"] = "wllvm" 21 | env["CXX"] = "wllvm++" 22 | env["LLVM_COMPILER"] = "dragonegg" 23 | env["PATH"] = "{}:{}".format(root_directory, os.environ["PATH"]) 24 | # FIXME find dragonegg path generically 25 | env["LLVM_DRAGONEGG_PLUGIN"] = "/usr/lib/gcc/x86_64-linux-gnu/4.7/plugin/dragonegg.so" 26 | env["LLVM_GCC_PREFIX"] = "llvm-" 27 | return env 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2016 Ian A. Mason, Tristan Ravitch, Bruno Dutertre 2 | Copyright (c) 2011-2013 Tristan Ravitch 3 | Copyright (c) 2013 Ben Liblit, Daniel Liew 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: required 4 | dist: trusty 5 | 6 | python: 7 | - "3.6" 8 | 9 | 10 | # command to install dependencies 11 | install: 12 | - sudo apt-get update 13 | # apache prerequisites 14 | - sudo apt-get install -y libapr1-dev libaprutil1-dev libpcre3-dev 15 | # for the clang build 16 | - sudo apt-get install -y llvm-3.5 clang-3.5 17 | # dragonegg prereqs. dragonegg and llvm-gcc use llvm 3.3 18 | - sudo apt-get install -y llvm-3.3 llvm-gcc-4.7 19 | # Install wllvm 20 | # Report the version of pip being used for debugging purposes. 21 | # It should report the site-packages directory and the version 22 | # of python it is working with. 23 | - pip --version 24 | - pip install -e . 25 | 26 | 27 | # command to run tests 28 | script: 29 | # # Run unittests 30 | # - python -m unittest discover test/ || exit 1 31 | - export WLLVM_HOME=`pwd` 32 | - ${WLLVM_HOME}/.travis/store.sh 33 | - export APACHE_URL=https://www-us.apache.org/dist/httpd/ 34 | - export APACHE_VER=2.4.46 35 | # build apache with clang 36 | - ${WLLVM_HOME}/.travis/apache_clang.sh 37 | # build apache with gcc and dragonegg 38 | - ${WLLVM_HOME}/.travis/apache_dragonegg.sh 39 | # build musllvm with clang 40 | - ${WLLVM_HOME}/.travis/musllvm.sh 41 | -------------------------------------------------------------------------------- /wllvm/extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This tool can be used two ways. 3 | 4 | The ELF or MACH-O section contains 5 | absolute paths to all of its constituent bitcode files. This utility 6 | reads the section and links together all of the named bitcode files. 7 | 8 | If the passed in file is a binary executable it will extract the 9 | file paths in the bitcode section from the provided ELF or MACH-O object 10 | and assemble them into an actual bitcode file. 11 | 12 | If the passed in file is a static library it will extract the 13 | constituent ELF or MACH-O objects and read their bitcode sections and 14 | create a LLVM Bitcode archive from the bitcode files. That said, there 15 | is a command line option (--bitcode -b) that allows one to extract the 16 | bitcode into a module rather than an archive. 17 | 18 | The above language is deliberately vague, since ELF contains a 19 | .llvm_bc section, whereas the MACH-O contains a segment called __LLVM 20 | that contains a section called __llvm_bc. 21 | 22 | """ 23 | from __future__ import absolute_import 24 | 25 | import sys 26 | 27 | from .extraction import extraction 28 | 29 | def main(): 30 | """ The entry point to extract-bc. 31 | """ 32 | try: 33 | extraction() 34 | except Exception: 35 | pass 36 | return 0 37 | 38 | if __name__ == '__main__': 39 | sys.exit(main()) 40 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | @echo '' 4 | @echo 'Here are the targets:' 5 | @echo '' 6 | @echo 'To develop : "make develop"' 7 | @echo 'To install : "make install"' 8 | @echo 'To publish : "make publish"' 9 | 10 | @echo 'To check clang : "make check_clang"' 11 | @echo '' 12 | @echo 'e.g. on linux: LLVM_COMPILER_PATH=/usr/lib/llvm-3.5/bin/ make check_clang' 13 | @echo '' 14 | @echo 'To check dragonegg : "make check_dragonegg"' 15 | @echo '' 16 | @echo 'e.g. on linux: PATH=/usr/lib/llvm-3.3/bin:... make check_dragonegg' 17 | @echo '' 18 | @echo 'To pylint : "make lint"' 19 | @echo '' 20 | 21 | 22 | 23 | #local editable install for developing 24 | develop: 25 | pip3 install -e . 26 | 27 | 28 | dist: clean 29 | python3 setup.py sdist bdist_wheel 30 | 31 | # If you need to push this project again, 32 | # INCREASE the version number in wllvm/version.py, 33 | # otherwise the server will give you an error. 34 | 35 | #publish: dist 36 | # python setup.py sdist upload 37 | 38 | publish: dist 39 | python3 -m twine upload dist/* 40 | 41 | 42 | install: 43 | pip3 install 44 | 45 | check_clang: 46 | cd test; python3 -m unittest -v test_base_driver test_clang_driver 47 | 48 | check_dragonegg: 49 | cd test; python3 -m unittest -v test_base_driver test_dragonegg_driver 50 | 51 | clean: 52 | rm -f wllvm/*.pyc wllvm/*~ 53 | 54 | 55 | PYLINT = $(shell which pylint) 56 | 57 | lint: 58 | ifeq ($(PYLINT),) 59 | $(error lint target requires pylint) 60 | endif 61 | # @ $(PYLINT) -E wllvm/*.py 62 | # for detecting more than just errors: 63 | @ $(PYLINT) --rcfile=.pylintrc wllvm/*.py 64 | -------------------------------------------------------------------------------- /test/test_files/Makefile: -------------------------------------------------------------------------------- 1 | #iam: please leave this Makefile; it is useful for debugging when things break. 2 | 3 | all: one archive 4 | 5 | zero: 6 | ${CXX} hello.cc -o hello 7 | 8 | zero_e: 9 | ${CXX} hello.cc -emit-llvm -c 10 | ${CXX} hello.bc -o hello 11 | 12 | 13 | e: 14 | ${CC} -emit-llvm foo.c -c 15 | ${CC} -emit-llvm bar.c -c 16 | ${CC} -emit-llvm baz.c -c 17 | ${CC} -emit-llvm main.c -c 18 | ${CC} foo.bc bar.bc baz.bc main.bc -o main 19 | 20 | 21 | one: 22 | ${CC} -pthread foo.c bar.c baz.c main.c -o main 23 | 24 | two: 25 | ${CC} foo.c bar.c baz.c main.c -c 26 | ${CC} foo.o bar.o baz.o main.o -o main 27 | 28 | two_e: 29 | ${CC} -emit-llvm foo.c bar.c baz.c main.c -c 30 | ${CC} foo.bc bar.bc baz.bc main.bc -o main 31 | 32 | mix: 33 | ${CC} foo.c bar.c -c 34 | ${CC} foo.o bar.o baz.c main.c -o main 35 | 36 | mix_e: 37 | ${CC} -emit-llvm foo.c bar.c -c 38 | ${CC} foo.bc bar.bc baz.c main.c -o main 39 | 40 | threads: 41 | ${CXX} -pthread test1.cpp test2.cpp -o main 42 | 43 | objects: 44 | ${CC} foo.c -c 45 | ${CC} bar.c -c 46 | ${CC} baz.c -c 47 | ${CC} main.c -c 48 | 49 | many: objects 50 | ${CC} foo.o bar.o baz.o main.o -o main 51 | 52 | archive: objects 53 | ar cr libfoo.a foo.o bar.o baz.o 54 | ranlib libfoo.a 55 | 56 | dylib: objects 57 | ${CC} -dynamiclib foo.o bar.o baz.o -o libfoo.dylib 58 | 59 | deadstrip: objects 60 | ${CC} -dynamiclib -Wl,-dead_strip foo.o bar.o baz.o -o libfoo.dylib 61 | 62 | link_with_archive:: archive 63 | $(CC) main.o libfoo.a -o main.arch 64 | 65 | clean: 66 | rm -f *.o main main.arch .*.o.bc .*.o *.bc .*.bc a.out *.s *.i hello *.a *.bca *.dylib *.manifest *.ll 67 | 68 | mystery: 69 | otool -X -s __WLLVM __llvm_bc main > main.otool 70 | xxd -r main.otool 71 | xxd -r main.otool main.xxd 72 | -------------------------------------------------------------------------------- /doc/tutorial.md: -------------------------------------------------------------------------------- 1 | # Compiling Apache on Ubuntu 2 | 3 | 4 | On a clean 14.04 machine I will build apache. 5 | 6 | ``` 7 | >pwd 8 | 9 | /vagrant 10 | 11 | >more /etc/lsb-release 12 | 13 | DISTRIB_ID=Ubuntu 14 | DISTRIB_RELEASE=14.04 15 | DISTRIB_CODENAME=trusty 16 | DISTRIB_DESCRIPTION="Ubuntu 14.04.2 LTS" 17 | ``` 18 | 19 | 20 | ## Step 1. 21 | 22 | 23 | Install `wllvm`. 24 | 25 | ``` 26 | >sudo apt-get update 27 | 28 | >sudo apt-get install python-pip 29 | 30 | >sudo pip install wllvm 31 | 32 | ``` 33 | 34 | ## Step 2. 35 | 36 | I am only going to build apache, not apr, so I first install the prerequisites. 37 | 38 | ``` 39 | >sudo apt-get install llvm-3.4 clang-3.4 libapr1-dev libaprutil1-dev 40 | 41 | ``` Note `wllvm` is agnostic with respect to llvm versions, when you 42 | use clang, so feel free to install a more recent version if you 43 | wish. However, if you are going to use dragonegg the llvm version is 44 | tightly coupled to the gcc and plugin versions you are using. 45 | 46 | 47 | ## Step 3. 48 | 49 | Configure the wllvm tool to use clang and be relatively quiet: 50 | 51 | ``` 52 | >export LLVM_COMPILER=clang 53 | 54 | >export WLLVM_OUTPUT=WARNING 55 | ``` 56 | 57 | 58 | ## Step 4. 59 | 60 | Fetch apache, untar, configure, then build: 61 | 62 | ``` 63 | >wget https://archive.apache.org/dist/httpd/httpd-2.4.18.tar.gz 64 | 65 | >tar xfz httpd-2.4.18.tar.gz 66 | 67 | >cd httpd-2.4.18 68 | 69 | >CC=wllvm ./configure 70 | 71 | >make 72 | ``` 73 | 74 | ## Step 5. 75 | 76 | Extract the bitcode. 77 | 78 | ``` 79 | >extract-bc -l llvm-link-3.4 httpd 80 | 81 | >ls -la httpd.bc 82 | -rw-rw-r-- 1 vagrant vagrant 860608 Aug 4 16:55 httpd.bc 83 | ``` 84 | 85 | The extra command line argument to `extract-bc` is because `apt` 86 | installs `llvm-link` as `llvm-link-3.4` so we need to tell `extract-bc` 87 | to use that one. -------------------------------------------------------------------------------- /wllvm/logconfig.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is intended to be imported by command line tools so they can 3 | configure the root logger so that other loggers used in other modules can 4 | inherit the configuration. 5 | """ 6 | import logging 7 | import os 8 | import sys 9 | 10 | # iam: 6/30/2017 decided to move to a gllvm style where we can set the level and the output file 11 | _loggingEnvLevel_old = 'WLLVM_OUTPUT' 12 | _loggingEnvLevel_new = 'WLLVM_OUTPUT_LEVEL' 13 | 14 | _loggingDestination = 'WLLVM_OUTPUT_FILE' 15 | 16 | _validLogLevels = ['ERROR', 'WARNING', 'INFO', 'DEBUG'] 17 | 18 | def logConfig(name): 19 | 20 | destination = os.getenv(_loggingDestination) 21 | 22 | if destination: 23 | logging.basicConfig(filename=destination, level=logging.WARNING, format='%(levelname)s:%(message)s') 24 | else: 25 | logging.basicConfig(level=logging.WARNING, format='%(levelname)s:%(message)s') 26 | 27 | retval = logging.getLogger(name) 28 | 29 | # ignore old setting 30 | level = os.getenv(_loggingEnvLevel_new) 31 | 32 | if level: 33 | level = level.upper() 34 | if not level in _validLogLevels: 35 | logging.error('"%s" is not a valid value for %s or %s. Valid values are %s', 36 | level, _loggingEnvLevel_old, _loggingEnvLevel_new, _validLogLevels) 37 | sys.exit(1) 38 | else: 39 | retval.setLevel(getattr(logging, level)) 40 | 41 | # Adjust the format if debugging 42 | if retval.getEffectiveLevel() == logging.DEBUG: 43 | formatter = logging.Formatter('%(levelname)s::%(module)s.%(funcName)s() at %(filename)s:%(lineno)d ::%(message)s') 44 | for h in logging.getLogger().handlers: 45 | h.setFormatter(formatter) 46 | 47 | return retval 48 | 49 | def loggingConfiguration(): 50 | destination = os.getenv(_loggingDestination) 51 | level = os.getenv(_loggingEnvLevel_new) 52 | return (destination, level) 53 | 54 | 55 | def informUser(msg): 56 | sys.stderr.write(msg) 57 | -------------------------------------------------------------------------------- /doc/tutorial-ubuntu-16.04.md: -------------------------------------------------------------------------------- 1 | # Compiling Apache on Ubuntu 2 | 3 | 4 | On a clean 16.04 server machine I will build apache. Desktop instructions should be no different. 5 | 6 | ``` 7 | >more /etc/lsb-release 8 | 9 | DISTRIB_ID=Ubuntu 10 | DISTRIB_RELEASE=16.04 11 | DISTRIB_CODENAME=xenial 12 | DISTRIB_DESCRIPTION="Ubuntu 16.04 LTS" 13 | ``` 14 | 15 | 16 | ## Step 1. 17 | 18 | Install wllvm 19 | 20 | ``` 21 | >sudo apt-get update 22 | 23 | >sudo install python-pip 24 | 25 | >sudo pip install wllvm 26 | ``` 27 | 28 | ## Step 2. 29 | 30 | I am only going to build apache, not apr, so I first install the prerequisites. 31 | 32 | ``` 33 | >sudo apt-get install llvm clang libapr1-dev libaprutil1-dev libpcre3-dev make 34 | 35 | ``` 36 | 37 | At this point, you could check your clang version with `which clang` and `ls -l /usr/bin/clang`. 38 | It should be at least clang-3.8. 39 | 40 | ## Step 3. 41 | 42 | Configure the wllvm tool to use clang and be relatively quiet: 43 | 44 | ``` 45 | >export LLVM_COMPILER=clang 46 | 47 | >export WLLVM_OUTPUT=WARNING 48 | ``` 49 | 50 | ## Step 4. 51 | 52 | Fetch apache, untar, configure, then build: 53 | 54 | ``` 55 | 56 | >wget https://archive.apache.org/dist/httpd/httpd-2.4.23.tar.gz 57 | 58 | >tar xfz httpd-2.4.23.tar.gz 59 | 60 | >cd httpd-2.4.23 61 | 62 | >CC=wllvm ./configure 63 | 64 | >make 65 | ``` 66 | 67 | ## Step 5. 68 | 69 | Extract the bitcode. 70 | 71 | ``` 72 | >extract-bc httpd 73 | 74 | >ls -la httpd.bc 75 | -rw-r--r-- 1 vagrant vagrant 1119584 Aug 4 20:02 httpd.bc 76 | ``` 77 | > Note that in httpd-2.4.41, the binary of `httpd` is under directory ".libs", 78 | > in that case, please cd into ".libs" then execute the command. 79 | 80 | ``` 81 | cd .libs 82 | ls -la httpd 83 | extract-bc httpd 84 | ``` 85 | ## Step 6. 86 | 87 | Turn the bitcode into a second executable binary. (optional -- just for fun and sanity checking) 88 | 89 | ``` 90 | llc -filetype=obj httpd.bc 91 | clang httpd.o -Wl,--export-dynamic -lpthread -lapr-1 -laprutil-1 -lpcre -o httpd_from_bc 92 | ``` 93 | See [here](http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html) for an explanation of the 94 | ``` 95 | -Wl,--export-dynamic 96 | ``` 97 | incantation. 98 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | import glob 4 | 5 | from codecs import open 6 | from os import path 7 | 8 | here = path.abspath(path.dirname(__file__)) 9 | 10 | # Get the long description from the README file 11 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 12 | long_description = f.read() 13 | 14 | # use the in house version number so we stay in synch with ourselves. 15 | from wllvm.version import wllvm_version 16 | 17 | setup( 18 | name='wllvm', 19 | version=wllvm_version, 20 | python_requires='>=3.6', 21 | description='Whole Program LLVM', 22 | long_description=long_description, 23 | url='https://github.com/SRI-CSL/whole-program-llvm', 24 | author='Ian A. Mason, Tristan Ravitch, Dan Liew, Bruno Dutertre, Benjamin Schubert, Berkeley Churchill, Marko Dimjasevic, Will Dietz, Fabian Mager, Ben Liblit, Andrew Santosa, Tomas Kalibera, Loic Gelle, Joshua Cranmer, Alexander Bakst, Miguel Arroyo.', 25 | author_email='iam@csl.sri.com', 26 | 27 | 28 | include_package_data=True, 29 | 30 | packages=find_packages(), 31 | 32 | entry_points = { 33 | 'console_scripts': [ 34 | 'wllvm-as = wllvm.as:main', 35 | 'wllvm = wllvm.wllvm:main', 36 | 'wllvm++ = wllvm.wllvmpp:main', 37 | 'wfortran = wllvm.wfortran:main', 38 | 'wllvm-sanity-checker = wllvm.sanity:main', 39 | 'extract-bc = wllvm.extractor:main', 40 | 'wparse-args = wllvm.wparser:main', 41 | ], 42 | }, 43 | 44 | license='MIT', 45 | 46 | classifiers=[ 47 | 'Development Status :: 4 - Beta', 48 | 'Natural Language :: English', 49 | 'Intended Audience :: Science/Research', 50 | 'Intended Audience :: Developers', 51 | 'Topic :: Software Development :: Compilers', 52 | 'License :: OSI Approved :: MIT License', 53 | 'Operating System :: OS Independent', 54 | 'Operating System :: MacOS', 55 | 'Operating System :: POSIX :: Linux', 56 | 'Operating System :: POSIX :: BSD', 57 | 'Programming Language :: Python', 58 | 'Programming Language :: Python :: 3', 59 | 'Programming Language :: Python :: 3.6', 60 | ], 61 | ) 62 | -------------------------------------------------------------------------------- /wllvm/filetype.py: -------------------------------------------------------------------------------- 1 | """ A static class that allows the type of a file to be checked. 2 | """ 3 | import os 4 | 5 | from subprocess import PIPE 6 | 7 | from .popenwrapper import Popen 8 | 9 | class FileType: 10 | """ A hack to grok the type of input files. 11 | """ 12 | 13 | # These are just here to keep pylint happy. 14 | UNKNOWN = None 15 | ELF_EXECUTABLE = None 16 | ELF_OBJECT = None 17 | ELF_SHARED = None 18 | MACH_EXECUTABLE = None 19 | MACH_OBJECT = None 20 | MACH_SHARED = None 21 | ARCHIVE = None 22 | THIN_ARCHIVE = None 23 | 24 | 25 | # Provides int -> str map 26 | revMap = {} 27 | 28 | @classmethod 29 | def getFileType(cls, fileName): 30 | """ Returns the type of a file. 31 | 32 | This is a hacky way of determining 33 | the type of file we are looking at. 34 | Maybe we should use python-magic instead? 35 | """ 36 | retval = None 37 | fileP = Popen(['file', os.path.realpath(fileName)], stdout=PIPE) 38 | output = fileP.communicate()[0] 39 | foutput = output.decode() 40 | foutput = foutput.split(' ', 1)[1] # Strip file path 41 | 42 | if 'ELF' in foutput and 'executable' in foutput: 43 | retval = cls.ELF_EXECUTABLE 44 | elif 'Mach-O' in foutput and 'executable' in foutput: 45 | retval = cls.MACH_EXECUTABLE 46 | elif 'ELF' in foutput and 'shared' in foutput: 47 | retval = cls.ELF_SHARED 48 | elif 'Mach-O' in foutput and 'dynamically linked shared' in foutput: 49 | retval = cls.MACH_SHARED 50 | elif 'current ar archive' in foutput: 51 | retval = cls.ARCHIVE 52 | elif 'thin archive' in foutput: 53 | retval = cls.THIN_ARCHIVE 54 | elif 'ELF' in foutput and 'relocatable' in foutput: 55 | retval = cls.ELF_OBJECT 56 | elif 'Mach-O' in foutput and 'object' in foutput: 57 | retval = cls.MACH_OBJECT 58 | else: 59 | retval = cls.UNKNOWN 60 | 61 | return retval 62 | 63 | 64 | @classmethod 65 | def getFileTypeString(cls, fti): 66 | """ Returns the string name of the file type. 67 | 68 | """ 69 | if fti in cls.revMap: 70 | return cls.revMap[fti] 71 | return 'UNKNOWN' 72 | 73 | @classmethod 74 | def init(cls): 75 | """ Initializes the static fields. 76 | """ 77 | for (index, name) in enumerate(('UNKNOWN', 78 | 'ELF_EXECUTABLE', 79 | 'ELF_OBJECT', 80 | 'ELF_SHARED', 81 | 'MACH_EXECUTABLE', 82 | 'MACH_OBJECT', 83 | 'MACH_SHARED', 84 | 'ARCHIVE', 85 | 'THIN_ARCHIVE')): 86 | setattr(cls, name, index) 87 | cls.revMap[index] = name 88 | 89 | # Initialise FileType static class 90 | FileType.init() 91 | -------------------------------------------------------------------------------- /wllvm/as.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """This is the (dragonegg) assembler phase. 3 | 4 | This variant is only invoked during the second compilation where we 5 | are building bitcode. The compiler has already been instructed to 6 | generate LLVM IR; the compiler then tries to assemble it into an 7 | object file. The standard assembler doesn't understand LLVM bitcode, 8 | so we interpose and use the llvm-as command to build a bitcode file. 9 | We leave the bitcode in place, but record its full absolute path in 10 | the corresponding object file (which was created in the first 11 | compilation phase by the real compiler). We'll link this together at 12 | a later stage. 13 | 14 | In the pip version the main below is a console script called wllvm-as. 15 | Since we cannot tell gcc what our assember is called, only which 16 | directory it should look for the assembler "as" in, we have to make a 17 | "hidden" directory that we can use to pass gcc. It needs to be hidden 18 | since we have no control over the user's PATH and certainly do not 19 | want our assembler to accidently override the user's assembler. 20 | 21 | This should explain: 22 | 23 | ./dragonegg_as/as 24 | 25 | in the pip egg, and in the repository. 26 | 27 | """ 28 | 29 | from __future__ import absolute_import 30 | 31 | import sys 32 | 33 | import os 34 | 35 | #from subprocess import * 36 | 37 | from .compilers import llvmCompilerPathEnv 38 | 39 | from .popenwrapper import Popen 40 | 41 | from .arglistfilter import ArgumentListFilter 42 | 43 | from .logconfig import logConfig 44 | 45 | # Internal logger 46 | _logger = logConfig(__name__) 47 | 48 | 49 | class BCFilter(ArgumentListFilter): 50 | """ Argument filter for the assembler. 51 | """ 52 | def __init__(self, arglist): 53 | self.bcName = None 54 | self.outFileName = None 55 | localCallbacks = {'-o' : (1, BCFilter.outFileCallback)} 56 | super().__init__(arglist, exactMatches=localCallbacks) 57 | 58 | def outFileCallback(self, flag, name): 59 | """ Callback for the -o flag. 60 | """ 61 | _logger.debug('BCFilter.outFileCallback %s %s', flag, name) 62 | self.outFileName = name 63 | 64 | def main(): 65 | """ Entry point to the assembler 'as' in the dragonegg realm. 66 | """ 67 | argFilter = BCFilter(sys.argv[1:]) 68 | # Since this is just the assembler, there should only ever be one file 69 | try: 70 | [infile] = argFilter.inputFiles 71 | except ValueError: 72 | _logger.debug('Input file argument not detected, assuming stdin.') 73 | infile = "-" 74 | 75 | # set llvm-as 76 | llvmAssembler = 'llvm-as' 77 | if os.getenv(llvmCompilerPathEnv): 78 | llvmAssembler = os.path.join(os.getenv(llvmCompilerPathEnv), llvmAssembler) 79 | 80 | # Now compile this llvm assembly file into a bitcode file. The output 81 | # filename is the same as the object with a .bc appended 82 | if not argFilter.outFileName: 83 | _logger.error('Output file argument not found.') 84 | sys.exit(1) 85 | 86 | fakeAssembler = [llvmAssembler, infile, '-o', argFilter.outFileName] 87 | 88 | asmProc = Popen(fakeAssembler) 89 | realRet = asmProc.wait() 90 | 91 | if realRet != 0: 92 | _logger.error('llvm-as failed') 93 | sys.exit(realRet) 94 | 95 | sys.exit(realRet) 96 | 97 | 98 | if __name__ == '__main__': 99 | sys.exit(main()) 100 | -------------------------------------------------------------------------------- /wllvm/version.py: -------------------------------------------------------------------------------- 1 | # Feeping Creaturism: 2 | # 3 | # this is the all important version number used by pip. 4 | # 5 | # 6 | """ 7 | Version History: 8 | 9 | 1.0.0 - 8/2/2016 initial birth as a pip package. 10 | 11 | 1.0.1 - 8/2/2016 the rst gets a make over, and doc strings 12 | became more pervasive. 13 | 14 | 1.0.2 - 8/4/2016 dragonegg issues. trying to include a polite 'as' wrapper 15 | (i.e. not a console_script called as). 16 | 17 | 1.0.3 - 8/4/2016 travis build fixes. 18 | 19 | 1.0.4 - 8/4/2016 travis build fixes, and exception handling fixes. 20 | 21 | 1.0.5 - 8/4/2016 exit value was upsetting travis. 22 | 23 | 1.0.6 - 8/9/2016 exit codes preserved; important for configure scripts like musl libc. 24 | 25 | 1.0.7 - 8/9/2016 logical restructuring; smaller bites. 26 | 27 | 1.0.8 - 8/9/2016 test and doc subdirectories are no longer included. 28 | 29 | 1.0.9 - 8/25/2016 Python 3.0 import fixes (Will Dietz) 30 | 31 | 1.0.10 - 9/26/2016 Apple's otool just gets biggier and buggier. 32 | 33 | 1.0.11 - 9/27/2016 Improved Apple's otool fix. 34 | 35 | 1.0.12 - 10/27/2016 Common flag support. 36 | 37 | 1.0.13 - 11/05/2016 pylint spots a few mistakes. 38 | 39 | 1.0.14 - 11/10/2016 --coverage flag. 40 | 41 | 1.0.15 - 11/15/2016 pylintification complete. 42 | 43 | 1.0.16 - 11/16/2016 ooops musl points out I screwed up the exit codes AGAIN. 44 | 45 | 1.0.17 - 11/23/2016 delcypher #16 over at travitch's place. 46 | 47 | 1.0.18 - 4/11/2017 tentative solution to the -emit-llvm "out of context" experience. 48 | 49 | 1.0.19 - 4/19/2017 fixed a '-o' issue in extract-bc and added the bitcode store feature. 50 | 51 | 1.1.0 - 4/21/2017 no new features on the horizon, no new bugs? 52 | 53 | 1.1.1 - 4/25/2017 bugs introduced by the new fetures have hopefully been eradicated. 54 | 55 | 1.1.2 - 4/26/2017 encoding issues with hashlib in the python 3 swarm. 56 | 57 | 1.1.3 - 5/20/2017 fortran support via flang (pull #60 over at travitch's place) 58 | 59 | 1.1.4 - 7/24/2017 improvements motivated by gllvm and logic. 60 | 61 | 1.1.5 - 3/14/2018 fixes suggested by Alexander Bakst 62 | 63 | 1.2.0 - 4/24/2018 fixes suggested by building the Linux kernel and trying to harmonize with gllvm. 64 | 4/28/2018 can handle thin archives, can sort bitcode input to llvm-{ar, link} and manifest via the -s switch. 65 | 5/1/2018 can handle archives correctly (deal with multiple files with the same name in the archive). 66 | 67 | 1.2.1 - 5/13/2018 -fsanitize= now recognized as a compile AND link flag (mothers day edition) 68 | 69 | 1.2.2 - 6/1/2018 lots of minor fixes from building big projects (and their dependencies) like tor 70 | 71 | 1.2.3 - 4/15/2019 The tax day version. Almost a years worth of tweaks from building large things like the Linux kernel. 72 | 73 | 1.2.4 - 4/15/2019 The tax day version, II. Testing the twine upload. 74 | 75 | 1.2.5 - 4/17/2019 Fixing the pip package, hopefully. 76 | 77 | 1.2.6 - 6/18/2019 Various compiler cmd line options parsing tweaks. 78 | 79 | 1.2.7 - 3/23/2020 Added the LLVM_BITCODE_GENERATION_FLAGS to allow LTO support. 80 | 81 | 1.2.8 - 3/23/2020 Added the LLVM_BITCODE_GENERATION_FLAGS to allow LTO support. (pip uploading issues) 82 | 83 | 1.2.9 - 2/20/2021 Various fixes: 84 | wllvm-sanity-checker prints correctly now we are python3 85 | Eliminated "....".format(...) in favor of f'...{thingy}....' How many times did python try to get this right? 86 | e.g. handle -Wl,--start-group ... -Wl,--end-group properly. 87 | e.g. -W and -w don't trip the compile only flag. 88 | 1.3.0 - 3/6/2021 otool seems to have changed its output format, so we need to tread more carefully. 89 | 90 | """ 91 | 92 | wllvm_version = '1.3.0' 93 | wllvm_date = 'March 6 2021' 94 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Introduction to WLLVM 2 | ===================== 3 | 4 | This project, WLLVM, provides tools for building whole-program (or 5 | whole-library) LLVM bitcode files from an unmodified C or C++ 6 | source package. It currently runs on `*nix` platforms such as Linux, 7 | FreeBSD, and Mac OS X. 8 | 9 | WLLVM provides python-based compiler wrappers that work in two 10 | steps. The wrappers first invoke the compiler as normal. Then, for 11 | each object file, they call a bitcode compiler to produce LLVM 12 | bitcode. The wrappers also store the location of the generated bitcode 13 | file in a dedicated section of the object file. When object files are 14 | linked together, the contents of the dedicated sections are 15 | concatenated (so we don't lose the locations of any of the constituent 16 | bitcode files). After the build completes, one can use an WLLVM 17 | utility to read the contents of the dedicated section and link all of 18 | the bitcode into a single whole-program bitcode file. This utility 19 | works for both executable and native libraries. 20 | 21 | This two-phase build process is necessary to be a drop-in replacement 22 | for ``gcc`` or ``g++`` in any build system. Using the LTO framework in gcc 23 | and the gold linker plugin works in many cases, but fails in the 24 | presence of static libraries in builds. WLLVM's approach has the 25 | distinct advantage of generating working binaries, in case some part 26 | of a build process requires that. 27 | 28 | WLLVM works with either ``clang`` or the ``gcc dragonegg`` plugin. 29 | 30 | 31 | Usage 32 | ----- 33 | 34 | WLLVM includes four python executables: ``wllvm`` for compiling C code 35 | and ``wllvm++`` for compiling C++, an auxiliary tool ``extract-bc`` for 36 | extracting the bitcode from a build product (object file, executable, library 37 | or archive), and a sanity checker, ``wllvm-sanity-checker`` for detecting 38 | configuration oversights. 39 | 40 | Three environment variables must be set to use these wrappers: 41 | 42 | * ``LLVM_COMPILER`` should be set to either ``dragonegg`` or ``clang``. 43 | * ``LLVM_GCC_PREFIX`` should be set to the prefix for the version of gcc that should 44 | be used with dragonegg. This can be empty if there is no prefix. This variable is 45 | not used if ``$LLVM_COMPILER == clang``. 46 | * ``LLVM_DRAGONEGG_PLUGIN`` should be the full path to the dragonegg plugin. This 47 | variable is not used if ``$LLVM_COMPILER == clang``. 48 | 49 | Once the environment is set up, just use ``wllvm`` and ``wllvm++`` as your C 50 | and C++ compilers, respectively. 51 | 52 | 53 | In addition to the above environment variables the following can be optionally used: 54 | 55 | * ``LLVM_CC_NAME`` can be set if your clang compiler is not called ``clang`` but 56 | something like ``clang-3.7``. Similarly ``LLVM_CXX_NAME`` can be used to describe 57 | what the C++ compiler is called. Note that in these sorts of cases, the environment 58 | variable ``LLVM_COMPILER`` should still be set to ``clang`` not ``clang-3.7`` etc. 59 | We also pay attention to the environment variables ``LLVM_LINK_NAME`` and ``LLVM_AR_NAME`` in an 60 | analagous way, since they too get adorned with suffixes in various Linux distributions. 61 | 62 | * ``LLVM_COMPILER_PATH`` can be set to the absolute path to the folder that 63 | contains the compiler and other LLVM tools such as ``llvm-link`` to be used. 64 | This prevents searching for the compiler in your PATH environment variable. 65 | This can be useful if you have different versions of clang on your system 66 | and you want to easily switch compilers without tinkering with your PATH 67 | variable. 68 | Example ``LLVM_COMPILER_PATH=/home/user/llvm_and_clang/Debug+Asserts/bin``. 69 | 70 | * ``WLLVM_CONFIGURE_ONLY`` can be set to anything. If it is set, ``wllvm`` 71 | and ``wllvm++`` behave like a normal C or C++ compiler. They do not 72 | produce bitcode. Setting ``WLLVM_CONFIGURE_ONLY`` may prevent 73 | configuration errors caused by the unexpected production of hidden 74 | bitcode files. 75 | 76 | 77 | Documentation 78 | ------------- 79 | 80 | More detailed documentation as well as some tutorials can be found 81 | here: 82 | 83 | https://github.com/SRI-CSL/whole-program-llvm 84 | -------------------------------------------------------------------------------- /doc/tutorial-freeBSD.md: -------------------------------------------------------------------------------- 1 | # Steps to build bitcode version of FreeBSD 10.0 world and kernel 2 | 3 | The following instructions have been tested with FreeBSD 10.0 amd64. 4 | 5 | ## Prerequisites 6 | 7 | ### 1. FreeBSD 8 | 9 | The ideal way to start is with a clean install of FreeBSD 10.0 with 10 | sources installed. The simplest way to do this is to install from the 11 | Release 10.0 ISO image and and on the "Distribution Select" screen 12 | select just the following: 13 | 14 | [*] ports Ports tree 15 | [*] src System source code 16 | 17 | If you are on an existing system that has either an old version of the 18 | source tree or is missing source, you can follow the instructions in the 19 | FreeBSD Handbook Chapter 24 to get the relevant sources. 20 | 21 | ### 2. Necessary ports 22 | 23 | Upgrade the ports collection (as 'root'): 24 | 25 | su - 26 | portsnap fetch 27 | portsnap extract 28 | cd /usr/ports/ports-mgmt/portupgrade 29 | make -DBATCH install clean 30 | portupgrade -a --batch 31 | 32 | Install the following ports using the BSD port tree: 33 | 34 | bash git subversion python27 pip sudo wget 35 | 36 | (See the FreeBSD Handbook Chapter 5 for instructions.) 37 | The quick way to do this is: 38 | 39 | su - 40 | cd /usr/ports 41 | cd shells/bash && make -DBATCH install clean && \ 42 | cd ../../devel/git && make -DBATCH install clean && \ 43 | cd ../../devel/py-pip && make -DBATCH install clean && \ 44 | cd ../../devel/subversion && make -DBATCH install clean && \ 45 | cd ../../security/sudo && make -DBATCH install clean && \ 46 | cd ../../ftp/wget && make -DBATCH install clean 47 | 48 | (The package python27 is installed as a prerequisite of git.) 49 | 50 | Below we assume the shell being used is bash, that is: 51 | 52 | chsh -s /usr/local/bin/bash 53 | 54 | has been run. If you want to use another shell, replace bash-isms like 55 | 'export' with the appropriate equivalent. 56 | 57 | We suggest installing 'sudo' and setting up your account as a sudoer, to 58 | make installing programs easier. You can do this, or modify the commands 59 | that use 'sudo' below. 60 | 61 | ### 3. LLVM and Clang 3.3 62 | 63 | Install LLVM and Clang version 3.3. (These instructions adapted from 64 | http://llvm.org/docs/GettingStarted.html) Decide where you want to 65 | install LLVM. If you have 'root' access, you can use the default 66 | '/usr/local', though any location is fine. You may then wish to add this 67 | to your shell startup (in '~/.profile' for bash): 68 | 69 | export LLVM_HOME=/usr/local/llvm-3.3 70 | 71 | Get LLVM and Clang version 3.3: 72 | 73 | svn co http://llvm.org/svn/llvm-project/llvm/branches/release_33 llvm 74 | cd llvm/tools 75 | svn co http://llvm.org/svn/llvm-project/cfe/branches/release_33 clang 76 | cd ../projects 77 | svn co http://llvm.org/svn/llvm-project/compiler-rt/branches/release_33 compiler-rt 78 | cd ../.. 79 | 80 | Now finish the build and install: 81 | 82 | cd llvm 83 | mkdir build 84 | cd build 85 | ../configure --prefix=$LLVM_HOME --enable-assertions \ 86 | --enable-targets=host-only --enable-optimized 87 | gmake 88 | sudo gmake install 89 | 90 | Note that the FreeBSD 10.0 base includes Clang 3.3 (but does not include 91 | the complete LLVM framework, in particular llvm-link is not included). 92 | 93 | So to make life easier, so that extract-bc can find it do: 94 | 95 | sudo ln -s $LLVM_HOME/bin/llvm-link /usr/bin/llvm-link 96 | 97 | 98 | ### 4. Install whole-program-wllvm. 99 | 100 | sudo pip install wllvm 101 | 102 | 103 | This next one is a hack (make buildworld doesn't find python with /usr/bin/env without it) 104 | 105 | ln -s /usr/local/bin/python python 106 | 107 | ### 5. Insert the hooks into the build path. 108 | 109 | ``` 110 | diff /usr/src/Makefile.inc1 Makefile.inc1.original 111 | 180c180 112 | BPATH= ${HOME}/wllvm.bin:${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/usr/games:${WORLDTMP}/legacy/bin 113 | --- 114 | BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/usr/games:${WORLDTMP}/legacy/bin 115 | ``` 116 | 117 | ## Building the Puppy 118 | 119 | If the build location doesn't exist, create it. 120 | 121 | mkdir ${HOME}/build.world 122 | 123 | Configure the environment for the build. 124 | 125 | export MAKEOBJDIRPREFIX=${HOME}/build.world 126 | export LLVM_COMPILER=clang 127 | export LLVM_COMPILER_PATH=/usr/bin 128 | export WLLVM_OUTPUT=DEBUG 129 | 130 | Start the build. 131 | 132 | cd /usr/src 133 | make buildworld 134 | 135 | Once that succeeds build the kernel 136 | 137 | make buildkernel 138 | 139 | Extract the bitcode: 140 | 141 | 142 | cd ${MAKEOBJDIRPREFIX}/usr/src/sys/GENERIC 143 | 144 | ${HOME}/whole-program-llvm/extract-bc kernel 145 | 146 | nm kernel | wc 147 | 53140 159418 2421852 148 | 149 | ${LLVM_HOME}/bin/llvm-nm kernel.bc | wc 150 | 50664 101328 1910997 151 | 152 | We are working on seeing if we can get these numbers to match. 153 | But suspect their is some assembler causing this difference. 154 | 155 | 156 | -------------------------------------------------------------------------------- /test/test_base_driver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = 'Benjamin Schubert, ben.c.schubert@gmail.com' 4 | 5 | from abc import abstractproperty 6 | import os 7 | import shutil 8 | import subprocess 9 | import unittest 10 | 11 | 12 | test_output_directory = "/tmp/test-wllvm" 13 | test_files_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test_files") 14 | root_directory = os.path.join(os.path.dirname(os.path.dirname(__file__))) 15 | 16 | 17 | class BaseDriverTest(unittest.TestCase): 18 | """ 19 | This is a BaseDriverTest class. Can be used to generically test that every driver works correctly with different 20 | code examples without any problem. This class is meant to be overridden 21 | """ 22 | @classmethod 23 | def setUpClass(cls): 24 | """ 25 | This is a base class that should not be run in tests, skip it 26 | :return: 27 | """ 28 | if cls is BaseDriverTest: 29 | raise unittest.SkipTest("Skip BaseDriverTest, it's a base class") 30 | 31 | @abstractproperty 32 | def env(self): 33 | """ 34 | Defines all necessary environment variables to allow the driver to be tested, assembly seen in "Usage" in the README 35 | """ 36 | return None 37 | 38 | def setUp(self): 39 | """ 40 | Creates the test directory in /tmp 41 | :return: 42 | """ 43 | if not os.path.exists(test_output_directory): 44 | os.makedirs(test_output_directory) 45 | 46 | def tearDown(self): 47 | """ 48 | remove all temporary test files 49 | :return: 50 | """ 51 | shutil.rmtree(test_output_directory) 52 | 53 | def launch_proc(self, cmd): 54 | """ 55 | Launches cmd with environment and in test_output_directory 56 | :param cmd: command to launch 57 | :return: the subprocess instance 58 | """ 59 | return subprocess.Popen(cmd, shell=True, env=self.env, cwd=test_output_directory) 60 | 61 | def create_objects(self): 62 | """ 63 | Creates some objects used by tests 64 | :return: 65 | """ 66 | for f in ["foo.c", "bar.c", "baz.c", "main.c"]: 67 | self.assertEqual(self.launch_proc("${{CC}} {dir}/{f} -c".format(dir=test_files_directory, f=f)).wait(), 0) 68 | 69 | def create_archive(self): 70 | """ 71 | creates the libfoo.a archive 72 | :return: 73 | """ 74 | proc1 = self.launch_proc("ar cr libfoo.a foo.o bar.o baz.o") 75 | self.assertEqual(proc1.wait(), 0) 76 | 77 | def test_can_compile_simple_file(self): 78 | """ 79 | Checks that it is possible to compile a single simple file 80 | :return: 81 | """ 82 | proc = self.launch_proc("${{CXX}} -o hello {}/hello.cc".format(test_files_directory)) 83 | self.assertEqual(proc.wait(), 0) 84 | 85 | def test_can_compile_multiple_file_in_one_object(self): 86 | """ 87 | Checks that is is possible to compile multiple files into one executable 88 | :return: 89 | """ 90 | proc = self.launch_proc( 91 | "${{CC}} {dir}/foo.c {dir}/bar.c {dir}/baz.c {dir}/main.c -o main".format(dir=test_files_directory) 92 | ) 93 | self.assertEqual(proc.wait(), 0) 94 | 95 | def test_can_compile_and_link_multiple_object(self): 96 | """ 97 | Checks that is is possible to compile first then link the compiled objects together 98 | :return: 99 | """ 100 | proc1 = self.launch_proc( 101 | "${{CC}} {dir}/foo.c {dir}/bar.c {dir}/baz.c {dir}/main.c -c".format(dir=test_files_directory) 102 | ) 103 | self.assertEqual(proc1.wait(), 0) 104 | 105 | proc2 = self.launch_proc("${CC} foo.o bar.o baz.o main.o -o main") 106 | self.assertEqual(proc2.wait(), 0) 107 | 108 | def test_can_compile_and_link_object_and_source_object(self): 109 | """ 110 | Checks that is is possible to compile some objects first, then link them while compiling others 111 | :return: 112 | """ 113 | proc1 = self.launch_proc("${{CC}} {dir}/foo.c {dir}/bar.c -c".format(dir=test_files_directory)) 114 | self.assertEqual(proc1.wait(), 0) 115 | 116 | proc2 = self.launch_proc("${{CC}} foo.o bar.o {dir}/baz.c {dir}/main.c -o main".format(dir=test_files_directory)) 117 | self.assertEqual(proc2.wait(), 0) 118 | 119 | def test_can_link_multiple_objects_together(self): 120 | """ 121 | Checks that it is possible to link multiple objects together 122 | :return: 123 | """ 124 | self.create_objects() 125 | proc = self.launch_proc("${CC} foo.o bar.o baz.o main.o -o main") 126 | self.assertEqual(proc.wait(), 0) 127 | 128 | def test_can_create_archive_from_object_created(self): 129 | """ 130 | Checks that it is possible to create a valid archive from the created objects 131 | :return: 132 | """ 133 | self.create_objects() 134 | self.create_archive() 135 | 136 | proc2 = self.launch_proc("ranlib libfoo.a") 137 | self.assertEqual(proc2.wait(), 0) 138 | 139 | def test_can_create_dynamic_library_from_objects(self): 140 | """ 141 | Checks that is is possible to create a dynamic library from the objects 142 | :return: 143 | """ 144 | self.create_objects() 145 | proc = self.launch_proc("${CC} -dynamiclib foo.o bar.o baz.o main.o -o libfoo.dylib") 146 | self.assertEqual(proc.wait(), 0) 147 | 148 | def test_can_deadstrip_dynamic_library(self): 149 | """ 150 | Checks that is is possible to create a deadstripped dynamic library from the objects 151 | :return: 152 | """ 153 | self.create_objects() 154 | proc = self.launch_proc("${CC} -dynamiclib -Wl,-dead_strip foo.o bar.o baz.o main.o -o libfoo.dylib") 155 | self.assertEqual(proc.wait(), 0) 156 | 157 | def test_can_link_with_archive(self): 158 | """ 159 | Checks that is is possible to link with a created archive 160 | :return: 161 | """ 162 | self.create_objects() 163 | self.create_archive() 164 | 165 | proc = self.launch_proc("${CC} main.o libfoo.a -o main.arch") 166 | self.assertEqual(proc.wait(), 0) 167 | 168 | 169 | if __name__ == '__main__': 170 | unittest.main() 171 | -------------------------------------------------------------------------------- /wllvm/checker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module support for the wllvm-sanity-checker tool. 3 | 4 | The wllvm-sanity-checker tool examines the users 5 | environment to see if it makes sense from the 6 | wllvm point of view. Useful first step in trying to 7 | debug a failure. 8 | """ 9 | from __future__ import print_function 10 | 11 | import sys 12 | import os 13 | import subprocess as sp 14 | import errno 15 | 16 | from .version import wllvm_version, wllvm_date 17 | from .logconfig import loggingConfiguration 18 | 19 | explain_LLVM_COMPILER = """ 20 | 21 | The environment variable 'LLVM_COMPILER' is a switch. It should either 22 | be set to 'clang' or 'dragonegg'. Anything else will cause an error. 23 | 24 | """ 25 | 26 | explain_LLVM_DRAGONEGG_PLUGIN = """ 27 | 28 | You need to set the environment variable LLVM_DRAGONEGG_PLUGIN to the 29 | full path to your dragonegg plugin. Thanks. 30 | 31 | """ 32 | 33 | explain_LLVM_CC_NAME = """ 34 | 35 | If your clang compiler is not called clang, but something else, then 36 | you will need to set the environment variable LLVM_CC_NAME to the 37 | appropriate string. For example if your clang is called clang-3.5 then 38 | LLVM_CC_NAME should be set to clang-3.5. 39 | 40 | """ 41 | 42 | explain_LLVM_CXX_NAME = """ 43 | 44 | If your clang++ compiler is not called clang++, but something else, 45 | then you will need to set the environment variable LLVM_CXX_NAME to 46 | the appropriate string. For example if your clang++ is called ++clang 47 | then LLVM_CC_NAME should be set to ++clang. 48 | 49 | """ 50 | 51 | explain_LLVM_COMPILER_PATH = """ 52 | 53 | Your compiler should either be in your PATH, or else located where the 54 | environment variable LLVM_COMPILER_PATH indicates. It can also be used 55 | to indicate the directory that contains the other LLVM tools such as 56 | llvm-link, and llvm-ar. 57 | 58 | """ 59 | 60 | explain_LLVM_LINK_NAME = """ 61 | 62 | If your llvm linker is not called llvm-link, but something else, then 63 | you will need to set the environment variable LLVM_LINK_NAME to the 64 | appropriate string. For example if your llvm-link is called llvm-link-3.5 then 65 | LLVM_LINK_NAME should be set to llvm-link-3.5. 66 | 67 | """ 68 | 69 | explain_LLVM_AR_NAME = """ 70 | 71 | If your llvm archiver is not called llvm-ar, but something else, 72 | then you will need to set the environment variable LLVM_AR_NAME to 73 | the appropriate string. For example if your llvm-ar is called llvm-ar-3.5 74 | then LLVM_AR_NAME should be set to llvm-ar-3.5. 75 | 76 | """ 77 | 78 | class Checker: 79 | def __init__(self): 80 | path = os.getenv('LLVM_COMPILER_PATH') 81 | 82 | if path and path[-1] != os.path.sep: 83 | path = path + os.path.sep 84 | 85 | self.path = path if path else '' 86 | 87 | def check(self): 88 | """Performs the environmental sanity check. 89 | 90 | Performs the following checks in order: 91 | 0. Prints out the logging configuartion 92 | 1. Check that the OS is supported. 93 | 2. Checks that the compiler settings make sense. 94 | 3. Checks that the needed LLVM utilities exists. 95 | 4. Check that the store, if set, exists. 96 | """ 97 | 98 | self.checkSelf() 99 | 100 | self.checkLogging() 101 | 102 | if not self.checkOS(): 103 | print('I do not think we support your OS. Sorry.') 104 | return 1 105 | 106 | success = self.checkCompiler() 107 | 108 | if success: 109 | self.checkAuxiliaries() 110 | self.checkStore() 111 | 112 | return 0 if success else 1 113 | 114 | def checkSelf(self): 115 | print(f'wllvm version: {wllvm_version}') 116 | print(f'wllvm released: {wllvm_date}\n') 117 | 118 | 119 | def checkLogging(self): 120 | (destination, level) = loggingConfiguration() 121 | print(f'Logging output to {destination if destination else "standard error"}.') 122 | if not level: 123 | print('Logging level not set, defaulting to WARNING.') 124 | else: 125 | print(f'Logging level set to {level}.') 126 | 127 | 128 | def checkOS(self): 129 | """Returns True if we support the OS.""" 130 | return (sys.platform.startswith('freebsd') or 131 | sys.platform.startswith('linux') or 132 | sys.platform.startswith('darwin')) 133 | 134 | 135 | def checkSwitch(self): 136 | """Checks the correctness of the LLVM_COMPILER env var.""" 137 | compiler_type = os.getenv('LLVM_COMPILER') 138 | if compiler_type == 'clang': 139 | return (1, '\nWe are using clang.\n') 140 | if compiler_type == 'dragonegg': 141 | return (2, '\nWe are using dragonegg.\n') 142 | return (0, explain_LLVM_COMPILER) 143 | 144 | 145 | def checkClang(self): 146 | """Checks for clang and clang++.""" 147 | cc_name = os.getenv('LLVM_CC_NAME') 148 | cxx_name = os.getenv('LLVM_CXX_NAME') 149 | 150 | cc = f'{self.path}{cc_name if cc_name else "clang"}' 151 | cxx = f'{self.path}{cxx_name if cxx_name else "clang++"}' 152 | 153 | return self.checkCompilers(cc, cxx) 154 | 155 | 156 | def checkDragonegg(self): 157 | """Checks for gcc, g++ and the dragonegg plugin.""" 158 | if not self.checkDragoneggPlugin(): 159 | return False 160 | 161 | pfx = '' 162 | if os.getenv('LLVM_GCC_PREFIX') is not None: 163 | pfx = os.getenv('LLVM_GCC_PREFIX') 164 | 165 | cc = f'{self.path}{pfx}gcc' 166 | cxx = f'{self.path}{pfx}g++' 167 | 168 | return self.checkCompilers(cc, cxx) 169 | 170 | 171 | def checkDragoneggPlugin(self): 172 | """Checks for the dragonegg plugin.""" 173 | plugin = os.getenv('LLVM_DRAGONEGG_PLUGIN') 174 | 175 | if not plugin: 176 | print(explain_LLVM_DRAGONEGG_PLUGIN) 177 | return False 178 | 179 | if os.path.isfile(plugin): 180 | try: 181 | open(plugin) 182 | except IOError as e: 183 | print(f'Unable to open {plugin}: {str(e)}') 184 | else: 185 | return True 186 | else: 187 | print(f'Could not find {plugin}') 188 | return False 189 | 190 | 191 | def checkCompiler(self): 192 | """Determines the chosen compiler, and checks it.""" 193 | (code, comment) = self.checkSwitch() 194 | 195 | if code == 0: 196 | print(comment) 197 | return False 198 | if code == 1: 199 | print(comment) 200 | return self.checkClang() 201 | if code == 2: 202 | print(comment) 203 | return self.checkDragonegg() 204 | print('Insane') 205 | return False 206 | 207 | 208 | 209 | def checkCompilers(self, cc, cxx): 210 | """Tests that the compilers actually exist.""" 211 | (ccOk, ccVersion) = self.checkExecutable(cc) 212 | (cxxOk, cxxVersion) = self.checkExecutable(cxx) 213 | 214 | if not ccOk: 215 | print(f'The C compiler {cc} was not found or not executable.\nBetter not try using wllvm!\n') 216 | else: 217 | print(f'The C compiler {cc} is:\n\n\t{extractLine(ccVersion, 0)}\n') 218 | 219 | if not cxxOk: 220 | print(f'The CXX compiler {cxx} was not found or not executable.\nBetter not try using wllvm++!\n') 221 | else: 222 | print(f'The C++ compiler {cxx} is:\n\n\t{extractLine(cxxVersion, 0)}\n') 223 | 224 | if not ccOk or not cxxOk: 225 | print(explain_LLVM_COMPILER_PATH) 226 | if not ccOk: 227 | print(explain_LLVM_CC_NAME) 228 | if not cxxOk: 229 | print(explain_LLVM_CXX_NAME) 230 | 231 | 232 | 233 | return ccOk or cxxOk 234 | 235 | 236 | def checkExecutable(self, exe, version_switch='-v'): 237 | """Checks that an executable exists, and is executable.""" 238 | cmd = [exe, version_switch] 239 | try: 240 | compiler = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) 241 | output = compiler.communicate() 242 | compilerOutput = f'{output[0].decode()}{output[1].decode()}' 243 | except OSError as e: 244 | if e.errno == errno.EPERM: 245 | return (False, f'{exe} not executable') 246 | if e.errno == errno.ENOENT: 247 | return (False, f'{exe} not found') 248 | return (False, f'{exe} not sure why, errno is {e.errno}') 249 | else: 250 | return (True, compilerOutput) 251 | 252 | 253 | 254 | def checkAuxiliaries(self): 255 | """Checks for the archiver and linker.""" 256 | link_name = os.getenv('LLVM_LINK_NAME') 257 | ar_name = os.getenv('LLVM_AR_NAME') 258 | 259 | if not link_name: 260 | link_name = 'llvm-link' 261 | 262 | if not ar_name: 263 | ar_name = 'llvm-ar' 264 | 265 | link = f'{self.path}{link_name}' if self.path else link_name 266 | ar = f'{self.path}{ar_name}' if self.path else ar_name 267 | 268 | (linkOk, linkVersion) = self.checkExecutable(link, '-version') 269 | 270 | (arOk, arVersion) = self.checkExecutable(ar, '-version') 271 | 272 | if not linkOk: 273 | print(f'The bitcode linker {link} was not found or not executable.\nBetter not try using extract-bc!\n') 274 | print(explain_LLVM_LINK_NAME) 275 | else: 276 | print(f'The bitcode linker {link} is:\n\n\t{extractLine(linkVersion, 1)}\n') 277 | 278 | if not arOk: 279 | print(f'The bitcode archiver {ar} was not found or not executable.\nBetter not try using extract-bc!\n') 280 | print(explain_LLVM_AR_NAME) 281 | else: 282 | print(f'The bitcode archiver {ar} is:\n\n\t{extractLine(arVersion, 1)}\n') 283 | 284 | 285 | def checkStore(self): 286 | """Checks that the bitcode store, if set, makes sense.""" 287 | store_dir = os.getenv('WLLVM_BC_STORE') 288 | if store_dir: 289 | if os.path.exists(store_dir) and os.path.isdir(store_dir) and os.path.isabs(store_dir): 290 | print(f'Using the bitcode store:\n\n\t{store_dir}\n\n') 291 | else: 292 | print(f'The bitcode store:\n\n\t{store_dir}\n\nis either not absolute, does not exist, or is not a directory.\n\n') 293 | else: 294 | print('Not using a bitcode store.\n\n') 295 | 296 | 297 | def extractLine(version, n): 298 | if not version: 299 | return version 300 | lines = version.split('\n') 301 | line = lines[n] if n < len(lines) else lines[-1] 302 | return line.strip() if line else line 303 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![WLLVM](img/dragon128x128.png?raw_true)Whole Program LLVM 2 | 3 | [![License: MIT](https://img.shields.io/badge/License-MIT-blueviolet.svg)](https://opensource.org/licenses/MIT) 4 | [![PyPI version](https://badge.fury.io/py/wllvm.svg)](https://badge.fury.io/py/wllvm) 5 | [![Build Status](https://travis-ci.org/SRI-CSL/whole-program-llvm.svg?branch=master)](https://travis-ci.org/SRI-CSL/whole-program-llvm) 6 | [![PyPI Statistics](https://img.shields.io/pypi/dm/wllvm.svg)](https://pypistats.org/packages/wllvm) 7 | 8 | 9 | Introduction 10 | ------------ 11 | 12 | This project, WLLVM, provides tools for building whole-program (or 13 | whole-library) LLVM bitcode files from an unmodified C or C++ 14 | source package. It currently runs on `*nix` platforms such as Linux, 15 | FreeBSD, and Mac OS X. 16 | 17 | WLLVM provides python-based compiler wrappers that work in two 18 | steps. The wrappers first invoke the compiler as normal. Then, for 19 | each object file, they call a bitcode compiler to produce LLVM 20 | bitcode. The wrappers also store the location of the generated bitcode 21 | file in a dedicated section of the object file. When object files are 22 | linked together, the contents of the dedicated sections are 23 | concatenated (so we don't lose the locations of any of the constituent 24 | bitcode files). After the build completes, one can use a WLLVM 25 | utility to read the contents of the dedicated section and link all of 26 | the bitcode into a single whole-program bitcode file. This utility 27 | works for both executable and native libraries. 28 | 29 | This two-phase build process is necessary to be a drop-in replacement 30 | for gcc or g++ in any build system. Using the LTO framework in gcc 31 | and the gold linker plugin works in many cases, but fails in the 32 | presence of static libraries in builds. WLLVM's approach has the 33 | distinct advantage of generating working binaries, in case some part 34 | of a build process requires that. 35 | 36 | WLLVM works with either clang or the gcc dragonegg plugin. If you are not interested in dragonegg support, 37 | and speed is an issue for you, you may want to try out [gllvm.](https://github.com/SRI-CSL/gllvm) 38 | 39 | Installation 40 | ------------ 41 | 42 | As of August 2016 WLLVM is now a pip package. You can just do: 43 | 44 | pip install wllvm 45 | 46 | or 47 | 48 | sudo pip install wllvm 49 | 50 | depending on your machine's permissions. 51 | 52 | 53 | Tutorial 54 | ======= 55 | If you want to develop or use the development version: 56 | 57 | ``` 58 | git clone https://github.com/travitch/whole-program-llvm 59 | cd whole-program-llvm 60 | ``` 61 | 62 | Now you need to install WLLVM. You can either install 63 | globally on your system in develop mode: 64 | 65 | ``` 66 | sudo pip install -e . 67 | ``` 68 | 69 | or install WLLVM into a virtual python environment 70 | in develop mode to avoid installing globally: 71 | 72 | ``` 73 | virtualenv venv 74 | source venv/bin/activate 75 | pip install -e . 76 | ``` 77 | 78 | 79 | 80 | Usage 81 | ----- 82 | 83 | WLLVM includes four python executables: `wllvm` for compiling C code 84 | and `wllvm++` for compiling C++, an auxiliary tool `extract-bc` for 85 | extracting the bitcode from a build product (object file, executable, library 86 | or archive), and a sanity checker, `wllvm-sanity-checker` for detecting 87 | configuration oversights. 88 | 89 | Three environment variables must be set to use these wrappers: 90 | 91 | * `LLVM_COMPILER` should be set to either `dragonegg` or `clang`. 92 | * `LLVM_GCC_PREFIX` should be set to the prefix for the version of gcc that should 93 | be used with dragonegg. This can be empty if there is no prefix. This variable is 94 | not used if `$LLVM_COMPILER == clang`. 95 | * `LLVM_DRAGONEGG_PLUGIN` should be the full path to the dragonegg plugin. This 96 | variable is not used if `$LLVM_COMPILER == clang`. 97 | 98 | Once the environment is set up, just use `wllvm` and `wllvm++` as your C 99 | and C++ compilers, respectively. 100 | 101 | 102 | In addition to the above environment variables the following can be optionally used: 103 | 104 | * `LLVM_CC_NAME` can be set if your clang compiler is not called `clang` but 105 | something like `clang-3.7`. Similarly `LLVM_CXX_NAME` can be used to describe 106 | what the C++ compiler is called. Note that in these sorts of cases, the environment 107 | variable `LLVM_COMPILER` should still be set to `clang` not `clang-3.7` etc. 108 | We also pay attention to the environment variables `LLVM_LINK_NAME` and `LLVM_AR_NAME` in an 109 | analagous way, since they too get adorned with suffixes in various Linux distributions. 110 | 111 | * `LLVM_COMPILER_PATH` can be set to the absolute path to the folder that 112 | contains the compiler and other LLVM tools such as `llvm-link` to be used. 113 | This prevents searching for the compiler in your PATH environment variable. 114 | This can be useful if you have different versions of clang on your system 115 | and you want to easily switch compilers without tinkering with your PATH 116 | variable. 117 | Example `LLVM_COMPILER_PATH=/home/user/llvm_and_clang/Debug+Asserts/bin`. 118 | 119 | * `WLLVM_CONFIGURE_ONLY` can be set to anything. If it is set, `wllvm` 120 | and `wllvm++` behave like a normal C or C++ compiler. They do not 121 | produce bitcode. Setting `WLLVM_CONFIGURE_ONLY` may prevent 122 | configuration errors caused by the unexpected production of hidden 123 | bitcode files. It is sometimes required when configuring a build. 124 | 125 | 126 | 127 | 128 | 129 | Building a bitcode module with clang 130 | ------------------------------------ 131 | 132 | export LLVM_COMPILER=clang 133 | 134 | tar xf pkg-config-0.26.tar.gz 135 | cd pkg-config-0.26 136 | CC=wllvm ./configure 137 | make 138 | 139 | This should produce the executable `pkg-config`. To extract the bitcode: 140 | 141 | extract-bc pkg-config 142 | 143 | which will produce the bitcode module `pkg-config.bc`. 144 | 145 | 146 | Tutorials 147 | --------- 148 | 149 | A gentler set of instructions on building apache in a vagrant Ubuntu 14.04 can be found 150 | [here,](doc/tutorial.md) and for Ubuntu 16.04 [here.](doc/tutorial-ubuntu-16.04.md) 151 | 152 | Building a bitcode module with dragonegg 153 | ---------------------------------------- 154 | 155 | export LLVM_COMPILER=dragonegg 156 | export LLVM_GCC_PREFIX=llvm- 157 | export LLVM_DRAGONEGG_PLUGIN=/unsup/llvm-2.9/lib/dragonegg.so 158 | 159 | tar xf pkg-config-0.26.tar.gz 160 | cd pkg-config-0.26 161 | CC=wllvm ./configure 162 | make 163 | 164 | Again, this should produce the executable `pkg-config`. To extract the bitcode: 165 | 166 | extract-bc pkg-config 167 | 168 | which will produce the bitcode module `pkg-config.bc`. 169 | 170 | 171 | Building bitcode archive 172 | ------------------------ 173 | 174 | export LLVM_COMPILER=clang 175 | tar -xvf bullet-2.81-rev2613.tgz 176 | mkdir bullet-bin 177 | cd bullet-bin 178 | CC=wllvm CXX=wllvm++ cmake ../bullet-2.81-rev2613/ 179 | make 180 | 181 | # Produces src/LinearMath/libLinearMath.bca 182 | extract-bc src/LinearMath/libLinearMath.a 183 | 184 | Note that by default extracting bitcode from an archive produces 185 | an archive of bitcode. You can also extract the bitcode directly into a module. 186 | 187 | extract-bc -b src/LinearMath/libLinearMath.a 188 | 189 | produces `src/LinearMath/libLinearMath.a.bc`. 190 | 191 | 192 | 193 | Building an Operating System 194 | ---------------------------- 195 | 196 | To see how to build freeBSD 10.0 from scratch check out this 197 | [guide.](doc/tutorial-freeBSD.md) 198 | 199 | 200 | Configuring without building bitcode 201 | ------------------------------------ 202 | 203 | Sometimes it is necessary to disable the production of bitcode. 204 | Typically this is during configuration, where the production 205 | of unexpected files can confuse the configure script. For this 206 | we have a flag `WLLVM_CONFIGURE_ONLY` which can be used as 207 | follows: 208 | 209 | WLLVM_CONFIGURE_ONLY=1 CC=wllvm ./configure 210 | CC=wllvm make 211 | 212 | 213 | Building a bitcode archive then extracting the bitcode 214 | ------------------------------------------------------ 215 | 216 | export LLVM_COMPILER=clang 217 | tar xvfz jansson-2.7.tar.gz 218 | cd jansson-2.7 219 | CC=wllvm ./configure 220 | make 221 | mkdir bitcode 222 | cp src/.libs/libjansson.a bitcode 223 | cd bitcode 224 | extract-bc libjansson.a 225 | llvm-ar x libjansson.bca 226 | ls -la 227 | 228 | 229 | Preserving bitcode files in a store 230 | -------------------------------- 231 | 232 | Sometimes it can be useful to preserve the bitcode files produced in a 233 | build, either to prevent deletion or to retrieve it later. If the 234 | environment variable `WLLVM_BC_STORE` is set to the absolute path of 235 | an existing directory, 236 | then WLLVM will copy the produced bitcode file into that directory. 237 | The name of the copied bitcode file is the hash of the path to the 238 | original bitcode file. For convenience, when using both the manifest 239 | feature of `extract-bc` and the store, the manifest will contain both 240 | the original path, and the store path. 241 | 242 | Cross-Compilation 243 | ----------------- 244 | 245 | To support cross-compilation WLLVM supports the `-target` triple used by clang. 246 | More information can be found 247 | [here.](https://clang.llvm.org/docs/CrossCompilation.html#target-triple) 248 | 249 | Additionally, WLLVM leverages `objcopy` for some of its heavy lifting. When 250 | cross-compiling you must ensure to use the appropriate `objcopy` for the target 251 | architecture. The `BINUTILS_TARGET_PREFIX` environment variable can be used to 252 | set the objcopy of choice, for example, `arm-linux-gnueabihf`. 253 | 254 | LTO Support 255 | ----------- 256 | 257 | In some situations it is desirable to pass certain flags to clang in the step that 258 | produces the bitcode. This can be fulfilled by setting the 259 | `LLVM_BITCODE_GENERATION_FLAGS` environment variable to the desired 260 | flags, for example `"-flto -fwhole-program-vtables"`. 261 | 262 | Debugging 263 | --------- 264 | 265 | The WLLVM tools can show various levels of output to aid with debugging. 266 | To show this output set the `WLLVM_OUTPUT_LEVEL` environment 267 | variable to one of the following levels: 268 | 269 | * `ERROR` 270 | * `WARNING` 271 | * `INFO` 272 | * `DEBUG` 273 | 274 | For example: 275 | ``` 276 | export WLLVM_OUTPUT_LEVEL=DEBUG 277 | ``` 278 | Output will be directed to the standard error stream, unless you specify the 279 | path of a logfile via the `WLLVM_OUTPUT_FILE` environment variable. 280 | 281 | For example: 282 | ``` 283 | export WLLVM_OUTPUT_FILE=/tmp/wllvm.log 284 | ``` 285 | 286 | 287 | Sanity Checking 288 | --------------- 289 | 290 | Too many environment variables? Try doing a sanity check: 291 | 292 | ``` 293 | wllvm-sanity-checker 294 | ``` 295 | it might point out what is wrong. 296 | 297 | 298 | License 299 | ------- 300 | 301 | WLLVM is released under the MIT license. See the file `LICENSE` for [details.](LICENSE) 302 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | # iam: generated by 2 | # 3 | # pylint --generate-rcfile > .pylintrc 4 | # 5 | # then customized to ignore my warts 6 | 7 | [MASTER] 8 | 9 | # Specify a configuration file. 10 | #rcfile= 11 | 12 | # Python code to execute, usually for sys.path manipulation such as 13 | # pygtk.require(). 14 | #init-hook= 15 | 16 | # Add files or directories to the blacklist. They should be base names, not 17 | # paths. 18 | ignore=CVS 19 | 20 | # Add files or directories matching the regex patterns to the blacklist. The 21 | # regex matches against base names, not paths. 22 | ignore-patterns= 23 | 24 | # Pickle collected data for later comparisons. 25 | persistent=yes 26 | 27 | # List of plugins (as comma separated values of python modules names) to load, 28 | # usually to register additional checkers. 29 | load-plugins= 30 | 31 | # Use multiple processes to speed up Pylint. 32 | jobs=1 33 | 34 | # Allow loading of arbitrary C extensions. Extensions are imported into the 35 | # active Python interpreter and may run arbitrary code. 36 | unsafe-load-any-extension=no 37 | 38 | # A comma-separated list of package or module names from where C extensions may 39 | # be loaded. Extensions are loading into the active Python interpreter and may 40 | # run arbitrary code 41 | extension-pkg-whitelist= 42 | 43 | # Allow optimization of some AST trees. This will activate a peephole AST 44 | # optimizer, which will apply various small optimizations. For instance, it can 45 | # be used to obtain the result of joining multiple strings with the addition 46 | # operator. Joining a lot of strings can lead to a maximum recursion error in 47 | # Pylint and this flag can prevent that. It has one side effect, the resulting 48 | # AST will be different than the one from reality. This option is deprecated 49 | # and it will be removed in Pylint 2.0. 50 | optimize-ast=no 51 | 52 | 53 | [MESSAGES CONTROL] 54 | 55 | # Only show warnings with the listed confidence levels. Leave empty to show 56 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 57 | confidence= 58 | 59 | # Enable the message, report, category or checker with the given id(s). You can 60 | # either give multiple identifier separated by comma (,) or put this option 61 | # multiple time (only on the command line, not in the configuration file where 62 | # it should appear only once). See also the "--disable" option for examples. 63 | #enable= 64 | 65 | # Disable the message, report, category or checker with the given id(s). You 66 | # can either give multiple identifiers separated by comma (,) or put this 67 | # option multiple times (only on the command line, not in the configuration 68 | # file where it should appear only once).You can also use "--disable=all" to 69 | # disable everything first and then reenable specific checks. For example, if 70 | # you want to run only the similarities checker, you can use "--disable=all 71 | # --enable=similarities". If you want to run only the classes checker, but have 72 | # no Warning level messages displayed, use"--disable=all --enable=classes 73 | # --disable=W" 74 | disable=import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating, R0201, C0111, W0102 75 | 76 | [REPORTS] 77 | 78 | # Set the output format. Available formats are text, parseable, colorized, msvs 79 | # (visual studio) and html. You can also give a reporter class, eg 80 | # mypackage.mymodule.MyReporterClass. 81 | output-format=text 82 | 83 | # Put messages in a separate file for each module / package specified on the 84 | # command line instead of printing them on stdout. Reports (if any) will be 85 | # written in a file name "pylint_global.[txt|html]". This option is deprecated 86 | # and it will be removed in Pylint 2.0. 87 | files-output=no 88 | 89 | # Tells whether to display a full report or only the messages 90 | reports=yes 91 | 92 | # Python expression which should return a note less than 10 (10 is the highest 93 | # note). You have access to the variables errors warning, statement which 94 | # respectively contain the number of errors / warnings messages and the total 95 | # number of statements analyzed. This is used by the global evaluation report 96 | # (RP0004). 97 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 98 | 99 | # Template used to display messages. This is a python new-style format string 100 | # used to format the message information. See doc for all details 101 | #msg-template= 102 | 103 | 104 | [BASIC] 105 | 106 | # Good variable names which should always be accepted, separated by a comma 107 | good-names=i,j,k,ex,Run,_ 108 | 109 | # Bad variable names which should always be refused, separated by a comma 110 | bad-names=foo,bar,baz,toto,tutu,tata 111 | 112 | # Colon-delimited sets of names that determine each other's naming style when 113 | # the name regexes allow several styles. 114 | name-group= 115 | 116 | # Include a hint for the correct naming format with invalid-name 117 | include-naming-hint=no 118 | 119 | # List of decorators that produce properties, such as abc.abstractproperty. Add 120 | # to this list to register other decorators that produce valid properties. 121 | property-classes=abc.abstractproperty 122 | 123 | # Regular expression matching correct function names 124 | function-rgx=[a-zA-Z_]+[a-zA-Z0-9_]*$ 125 | 126 | # Naming hint for function names 127 | function-name-hint=[a-zA-Z_]+[a-zA-Z0-9_]*$ 128 | 129 | # Regular expression matching correct variable names 130 | variable-rgx=[a-zA-Z_]+[a-zA-Z0-9_]*$ 131 | 132 | # Naming hint for variable names 133 | variable-name-hint=[a-zA-Z_]+[a-zA-Z0-9_]*$ 134 | 135 | # Regular expression matching correct constant names 136 | const-rgx=(([a-zA-Z_]+[a-zA-Z0-9_]*)|(__.*__))$ 137 | 138 | # Naming hint for constant names 139 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 140 | 141 | # Regular expression matching correct attribute names 142 | attr-rgx=[a-zA-Z_]+[a-z0-9_]*$ 143 | 144 | # Naming hint for attribute names 145 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 146 | 147 | # Regular expression matching correct argument names 148 | argument-rgx=[a-zA-Z_]+[a-zA-Z0-9_]*$ 149 | 150 | # Naming hint for argument names 151 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 152 | 153 | # Regular expression matching correct class attribute names 154 | class-attribute-rgx=([A-Za-z_]+[A-Za-z0-9_]*|(__.*__))$ 155 | 156 | # Naming hint for class attribute names 157 | class-attribute-name-hint=([A-Za-z_]+[A-Za-z0-9_]*|(__.*__))$ 158 | 159 | # Regular expression matching correct inline iteration names 160 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 161 | 162 | # Naming hint for inline iteration names 163 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 164 | 165 | # Regular expression matching correct class names 166 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 167 | 168 | # Naming hint for class names 169 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 170 | 171 | # Regular expression matching correct module names 172 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 173 | 174 | # Naming hint for module names 175 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 176 | 177 | # Regular expression matching correct method names 178 | method-rgx=[a-zA-Z_]+[a-zA-Z0-9_]*$ 179 | 180 | # Naming hint for method names 181 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 182 | 183 | # Regular expression which should only match function or class names that do 184 | # not require a docstring. 185 | no-docstring-rgx=^_ 186 | 187 | # Minimum line length for functions/classes that require docstrings, shorter 188 | # ones are exempt. 189 | docstring-min-length=-1 190 | 191 | 192 | [ELIF] 193 | 194 | # Maximum number of nested blocks for function / method body 195 | max-nested-blocks=10 196 | 197 | 198 | [FORMAT] 199 | 200 | # Maximum number of characters on a single line. 201 | max-line-length=200 202 | 203 | # Regexp for a line that is allowed to be longer than the limit. 204 | ignore-long-lines=^\s*(# )??$ 205 | 206 | # Allow the body of an if to be on the same line as the test if there is no 207 | # else. 208 | single-line-if-stmt=no 209 | 210 | # List of optional constructs for which whitespace checking is disabled. `dict- 211 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 212 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 213 | # `empty-line` allows space-only lines. 214 | no-space-check=trailing-comma,dict-separator 215 | 216 | # Maximum number of lines in a module 217 | max-module-lines=1000 218 | 219 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 220 | # tab). 221 | indent-string=' ' 222 | 223 | # Number of spaces of indent required inside a hanging or continued line. 224 | indent-after-paren=4 225 | 226 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 227 | expected-line-ending-format= 228 | 229 | 230 | [LOGGING] 231 | 232 | # Logging modules to check that the string format arguments are in logging 233 | # function parameter format 234 | logging-modules=logging 235 | 236 | 237 | [MISCELLANEOUS] 238 | 239 | # List of note tags to take in consideration, separated by a comma. 240 | notes=FIXME,XXX,TODO 241 | 242 | 243 | [SIMILARITIES] 244 | 245 | # Minimum lines number of a similarity. 246 | min-similarity-lines=4 247 | 248 | # Ignore comments when computing similarities. 249 | ignore-comments=yes 250 | 251 | # Ignore docstrings when computing similarities. 252 | ignore-docstrings=yes 253 | 254 | # Ignore imports when computing similarities. 255 | ignore-imports=no 256 | 257 | 258 | [SPELLING] 259 | 260 | # Spelling dictionary name. Available dictionaries: none. To make it working 261 | # install python-enchant package. 262 | spelling-dict= 263 | 264 | # List of comma separated words that should not be checked. 265 | spelling-ignore-words= 266 | 267 | # A path to a file that contains private dictionary; one word per line. 268 | spelling-private-dict-file= 269 | 270 | # Tells whether to store unknown words to indicated private dictionary in 271 | # --spelling-private-dict-file option instead of raising a message. 272 | spelling-store-unknown-words=no 273 | 274 | 275 | [TYPECHECK] 276 | 277 | # Tells whether missing members accessed in mixin class should be ignored. A 278 | # mixin class is detected if its name ends with "mixin" (case insensitive). 279 | ignore-mixin-members=yes 280 | 281 | # List of module names for which member attributes should not be checked 282 | # (useful for modules/projects where namespaces are manipulated during runtime 283 | # and thus existing member attributes cannot be deduced by static analysis. It 284 | # supports qualified module names, as well as Unix pattern matching. 285 | ignored-modules= 286 | 287 | # List of class names for which member attributes should not be checked (useful 288 | # for classes with dynamically set attributes). This supports the use of 289 | # qualified names. 290 | ignored-classes=optparse.Values,thread._local,_thread._local 291 | 292 | # List of members which are set dynamically and missed by pylint inference 293 | # system, and so shouldn't trigger E1101 when accessed. Python regular 294 | # expressions are accepted. 295 | generated-members= 296 | 297 | # List of decorators that produce context managers, such as 298 | # contextlib.contextmanager. Add to this list to register other decorators that 299 | # produce valid context managers. 300 | contextmanager-decorators=contextlib.contextmanager 301 | 302 | 303 | [VARIABLES] 304 | 305 | # Tells whether we should check for unused import in __init__ files. 306 | init-import=no 307 | 308 | # A regular expression matching the name of dummy variables (i.e. expectedly 309 | # not used). 310 | dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy 311 | 312 | # List of additional names supposed to be defined in builtins. Remember that 313 | # you should avoid to define new builtins when possible. 314 | additional-builtins= 315 | 316 | # List of strings which can identify a callback function by name. A callback 317 | # name must start or end with one of those strings. 318 | callbacks=cb_,_cb 319 | 320 | # List of qualified module names which can have objects that can redefine 321 | # builtins. 322 | redefining-builtins-modules=six.moves,future.builtins 323 | 324 | 325 | [CLASSES] 326 | 327 | # List of method names used to declare (i.e. assign) instance attributes. 328 | defining-attr-methods=__init__,__new__,setUp 329 | 330 | # List of valid names for the first argument in a class method. 331 | valid-classmethod-first-arg=cls 332 | 333 | # List of valid names for the first argument in a metaclass class method. 334 | valid-metaclass-classmethod-first-arg=mcs 335 | 336 | # List of member names, which should be excluded from the protected access 337 | # warning. 338 | exclude-protected=_asdict,_fields,_replace,_source,_make 339 | 340 | 341 | [DESIGN] 342 | 343 | # Maximum number of arguments for function / method 344 | max-args=5 345 | 346 | # Argument names that match this expression will be ignored. Default to name 347 | # with leading underscore 348 | ignored-argument-names=_.* 349 | 350 | # Maximum number of locals for function / method body 351 | max-locals=20 352 | 353 | # Maximum number of return / yield for function / method body 354 | max-returns=6 355 | 356 | # Maximum number of branch for function / method body 357 | max-branches=20 358 | 359 | # Maximum number of statements in function / method body 360 | max-statements=60 361 | 362 | # Maximum number of parents for a class (see R0901). 363 | max-parents=7 364 | 365 | # Maximum number of attributes for a class (see R0902). 366 | max-attributes=20 367 | 368 | # Minimum number of public methods for a class (see R0903). 369 | min-public-methods=0 370 | 371 | # Maximum number of public methods for a class (see R0904). 372 | max-public-methods=30 373 | 374 | # Maximum number of boolean expressions in a if statement 375 | max-bool-expr=10 376 | 377 | 378 | [IMPORTS] 379 | 380 | # Deprecated modules which should not be used, separated by a comma 381 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 382 | 383 | # Create a graph of every (i.e. internal and external) dependencies in the 384 | # given file (report RP0402 must not be disabled) 385 | import-graph= 386 | 387 | # Create a graph of external dependencies in the given file (report RP0402 must 388 | # not be disabled) 389 | ext-import-graph= 390 | 391 | # Create a graph of internal dependencies in the given file (report RP0402 must 392 | # not be disabled) 393 | int-import-graph= 394 | 395 | # Force import order to recognize a module as part of the standard 396 | # compatibility libraries. 397 | known-standard-library= 398 | 399 | # Force import order to recognize a module as part of a third party library. 400 | known-third-party=enchant 401 | 402 | # Analyse import fallback blocks. This can be used to support both Python 2 and 403 | # 3 compatible code, which means that the block might have code that exists 404 | # only in one or another interpreter, leading to false positives when analysed. 405 | analyse-fallback-blocks=no 406 | 407 | 408 | [EXCEPTIONS] 409 | 410 | # Exceptions that will emit a warning when being caught. Defaults to 411 | # "Exception" 412 | overgeneral-exceptions=None 413 | -------------------------------------------------------------------------------- /wllvm/compilers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | 5 | import os 6 | import sys 7 | import tempfile 8 | import hashlib 9 | import subprocess 10 | 11 | from shutil import copyfile 12 | from .filetype import FileType 13 | from .popenwrapper import Popen 14 | from .arglistfilter import ArgumentListFilter 15 | 16 | from .logconfig import logConfig 17 | 18 | # Internal logger 19 | _logger = logConfig(__name__) 20 | 21 | def wcompile(mode): 22 | """ The workhorse, called from wllvm and wllvm++. 23 | """ 24 | 25 | # Make sure we are not invoked from ccache 26 | parentCmd = subprocess.check_output( 27 | ['ps', '-o', 'comm=', '-p', str(os.getppid())], text=True) 28 | if parentCmd.strip() == 'ccache': 29 | # The following error message is invisible in terminal 30 | # when ccache is using its preprocessor mode 31 | _logger.error('Should not be invoked from ccache') 32 | # When ccache detects an error in the preprocessor mode, 33 | # it will fall back to running the real compiler (wllvm) 34 | sys.exit(-1) 35 | 36 | rc = 1 37 | 38 | legible_argstring = ' '.join(list(sys.argv)[1:]) 39 | 40 | # for diffing with gclang 41 | _logger.info('Entering CC [%s]', legible_argstring) 42 | 43 | try: 44 | cmd = list(sys.argv) 45 | cmd = cmd[1:] 46 | 47 | builder = getBuilder(cmd, mode) 48 | 49 | af = builder.getBitcodeArglistFilter() 50 | 51 | rc = buildObject(builder) 52 | 53 | # phase one compile failed. no point continuing 54 | if rc != 0: 55 | _logger.error('Failed to compile using given arguments: [%s]', legible_argstring) 56 | return rc 57 | 58 | # no need to generate bitcode (e.g. configure only, assembly, ....) 59 | (skipit, reason) = af.skipBitcodeGeneration() 60 | if skipit: 61 | _logger.debug('No work to do: %s', reason) 62 | _logger.debug(af.__dict__) 63 | return rc 64 | 65 | # phase two 66 | buildAndAttachBitcode(builder, af) 67 | 68 | except Exception as e: 69 | _logger.warning('%s: exception case: %s', mode, str(e)) 70 | 71 | _logger.debug('Calling %s returned %d', list(sys.argv), rc) 72 | return rc 73 | 74 | 75 | 76 | 77 | fullSelfPath = os.path.realpath(__file__) 78 | prefix = os.path.dirname(fullSelfPath) 79 | driverDir = prefix 80 | asDir = os.path.abspath(os.path.join(driverDir, 'dragonegg_as')) 81 | 82 | 83 | # Environmental variable for path to compiler tools (clang/llvm-link etc..) 84 | llvmCompilerPathEnv = 'LLVM_COMPILER_PATH' 85 | 86 | # Environmental variable for cross-compilation target. 87 | binutilsTargetPrefixEnv = 'BINUTILS_TARGET_PREFIX' 88 | 89 | # This is the ELF section name inserted into binaries 90 | elfSectionName = '.llvm_bc' 91 | 92 | # (Fix: 2016/02/16: __LLVM is now used by MacOS's ld so we changed the segment name to __WLLVM). 93 | # 94 | # These are the MACH_O segment and section name 95 | # The SegmentName was __LLVM. Changed to __WLLVM to avoid clashing 96 | # with a segment that ld now uses (since MacOS X 10.11.3?) 97 | # 98 | darwinSegmentName = '__WLLVM' 99 | darwinSectionName = '__llvm_bc' 100 | 101 | 102 | # Same as an ArgumentListFilter, but DO NOT change the name of the output filename when 103 | # building the bitcode file so that we don't clobber the object file. 104 | class ClangBitcodeArgumentListFilter(ArgumentListFilter): 105 | def __init__(self, arglist): 106 | localCallbacks = {'-o' : (1, ClangBitcodeArgumentListFilter.outputFileCallback)} 107 | #super(ClangBitcodeArgumentListFilter, self).__init__(arglist, exactMatches=localCallbacks) 108 | super().__init__(arglist, exactMatches=localCallbacks) 109 | 110 | def outputFileCallback(self, flag, filename): 111 | self.outputFilename = filename 112 | 113 | 114 | def getHashedPathName(path): 115 | return hashlib.sha256(path.encode('utf-8')).hexdigest() if path else None 116 | 117 | 118 | def attachBitcodePathToObject(bcPath, outFileName): 119 | # Don't try to attach a bitcode path to a binary. Unfortunately 120 | # that won't work. 121 | (_, ext) = os.path.splitext(outFileName) 122 | _logger.debug('attachBitcodePathToObject: %s ===> %s [ext = %s]', bcPath, outFileName, ext) 123 | 124 | #iam: just object files, right? 125 | fileType = FileType.getFileType(outFileName) 126 | if fileType not in (FileType.MACH_OBJECT, FileType.ELF_OBJECT): 127 | #if fileType not in (FileType.MACH_OBJECT, FileType.MACH_SHARED, FileType.ELF_OBJECT, FileType.ELF_SHARED): 128 | _logger.warning('Cannot attach bitcode path to "%s of type %s"', outFileName, FileType.getFileTypeString(fileType)) 129 | return 130 | 131 | #iam: this also looks very dodgey; we need a more reliable way to do this: 132 | #if ext not in ('.o', '.lo', '.os', '.So', '.po'): 133 | # _logger.warning('Cannot attach bitcode path to "%s of type %s"', outFileName, FileType.getReadableFileType(outFileName)) 134 | # return 135 | 136 | # Now just build a temporary text file with the full path to the 137 | # bitcode file that we'll write into the object file. 138 | f = tempfile.NamedTemporaryFile(mode='w+b', delete=False) 139 | absBcPath = os.path.abspath(bcPath) 140 | f.write(absBcPath.encode()) 141 | f.write('\n'.encode()) 142 | _logger.debug('Wrote "%s" to file "%s"', absBcPath, f.name) 143 | 144 | # Ensure buffers are flushed so that objcopy doesn't read an empty 145 | # file 146 | f.flush() 147 | os.fsync(f.fileno()) 148 | f.close() 149 | 150 | binUtilsTargetPrefix = os.getenv(binutilsTargetPrefixEnv) 151 | 152 | # Now write our bitcode section 153 | if sys.platform.startswith('darwin'): 154 | objcopyBin = f'{binUtilsTargetPrefix}-{"ld"}' if binUtilsTargetPrefix else 'ld' 155 | objcopyCmd = [objcopyBin, '-r', '-keep_private_externs', outFileName, '-sectcreate', darwinSegmentName, darwinSectionName, f.name, '-o', outFileName] 156 | else: 157 | objcopyBin = f'{binUtilsTargetPrefix}-{"objcopy"}' if binUtilsTargetPrefix else 'objcopy' 158 | objcopyCmd = [objcopyBin, '--add-section', f'{elfSectionName}={f.name}', outFileName] 159 | orc = 0 160 | 161 | # loicg: If the environment variable WLLVM_BC_STORE is set, copy the bitcode 162 | # file to that location, using a hash of the original bitcode path as a name 163 | storeEnv = os.getenv('WLLVM_BC_STORE') 164 | if storeEnv: 165 | hashName = getHashedPathName(absBcPath) 166 | copyfile(absBcPath, os.path.join(storeEnv, hashName)) 167 | 168 | try: 169 | if os.path.getsize(outFileName) > 0: 170 | objProc = Popen(objcopyCmd) 171 | orc = objProc.wait() 172 | except OSError: 173 | # configure loves to immediately delete things, causing issues for 174 | # us here. Just ignore it 175 | os.remove(f.name) 176 | sys.exit(0) 177 | 178 | os.remove(f.name) 179 | 180 | if orc != 0: 181 | _logger.error('objcopy failed with %s', orc) 182 | sys.exit(-1) 183 | 184 | class BuilderBase: 185 | def __init__(self, cmd, mode, prefixPath=None): 186 | self.af = None #memoize the arglist filter 187 | self.cmd = cmd 188 | self.mode = mode 189 | 190 | # Used as prefix path for compiler 191 | if prefixPath: 192 | self.prefixPath = prefixPath 193 | # Ensure prefixPath has trailing slash 194 | if self.prefixPath[-1] != os.path.sep: 195 | self.prefixPath = self.prefixPath + os.path.sep 196 | # Check prefix path exists 197 | if not os.path.exists(self.prefixPath): 198 | errorMsg = 'Path to compiler "%s" does not exist' 199 | _logger.error(errorMsg, self.prefixPath) 200 | raise Exception(errorMsg) 201 | 202 | else: 203 | self.prefixPath = '' 204 | 205 | def getCommand(self): 206 | if self.af is not None: 207 | # need to remove things like "-dead_strip" 208 | forbidden = self.af.forbiddenArgs 209 | if forbidden: 210 | for baddy in forbidden: 211 | self.cmd.remove(baddy) 212 | return self.cmd 213 | 214 | 215 | class ClangBuilder(BuilderBase): 216 | 217 | def getBitcodeGenerationFlags(self): 218 | # iam: If the environment variable LLVM_BITCODE_GENERATION_FLAGS is set we will add them to the 219 | # bitcode generation step 220 | bitcodeFLAGS = os.getenv('LLVM_BITCODE_GENERATION_FLAGS') 221 | if bitcodeFLAGS: 222 | return bitcodeFLAGS.split() 223 | return [] 224 | 225 | def getBitcodeCompiler(self): 226 | cc = self.getCompiler() 227 | return cc + ['-emit-llvm'] + self.getBitcodeGenerationFlags() 228 | 229 | def getCompiler(self): 230 | if self.mode == "wllvm++": 231 | env, prog = 'LLVM_CXX_NAME', 'clang++' 232 | elif self.mode == "wllvm": 233 | env, prog = 'LLVM_CC_NAME', 'clang' 234 | elif self.mode == "wfortran": 235 | env, prog = 'LLVM_F77_NAME', 'flang' 236 | else: 237 | raise Exception(f'Unknown mode {self.mode}') 238 | return [f'{self.prefixPath}{os.getenv(env) or prog}'] 239 | 240 | def getBitcodeArglistFilter(self): 241 | if self.af is None: 242 | self.af = ClangBitcodeArgumentListFilter(self.cmd) 243 | return self.af 244 | 245 | class DragoneggBuilder(BuilderBase): 246 | def getBitcodeCompiler(self): 247 | pth = os.getenv('LLVM_DRAGONEGG_PLUGIN') 248 | cc = self.getCompiler() 249 | # We use '-B' to tell gcc where to look for an assembler. 250 | # When we build LLVM bitcode we do not want to use the GNU assembler, 251 | # instead we want gcc to use our own assembler (see as.py). 252 | cmd = cc + ['-B', asDir, f'-fplugin={pth}', '-fplugin-arg-dragonegg-emit-ir'] 253 | _logger.debug(cmd) 254 | return cmd 255 | 256 | def getCompiler(self): 257 | pfx = '' 258 | if os.getenv('LLVM_GCC_PREFIX') is not None: 259 | pfx = os.getenv('LLVM_GCC_PREFIX') 260 | 261 | if self.mode == "wllvm++": 262 | mode = 'g++' 263 | elif self.mode == "wllvm": 264 | mode = 'gcc' 265 | elif self.mode == "wfortran": 266 | mode = 'gfortran' 267 | else: 268 | raise Exception(f'Unknown mode {self.mode}') 269 | return [f'{self.prefixPath}{pfx}{mode}'] 270 | 271 | def getBitcodeArglistFilter(self): 272 | if self.af is None: 273 | self.af = ArgumentListFilter(self.cmd) 274 | return self.af 275 | 276 | def getBuilder(cmd, mode): 277 | compilerEnv = 'LLVM_COMPILER' 278 | cstring = os.getenv(compilerEnv) 279 | pathPrefix = os.getenv(llvmCompilerPathEnv) # Optional 280 | 281 | _logger.debug('WLLVM compiler using %s', cstring) 282 | if pathPrefix: 283 | _logger.debug('WLLVM compiler path prefix "%s"', pathPrefix) 284 | 285 | if cstring == 'clang': 286 | return ClangBuilder(cmd, mode, pathPrefix) 287 | if cstring == 'dragonegg': 288 | return DragoneggBuilder(cmd, mode, pathPrefix) 289 | if cstring is None: 290 | errorMsg = ' No compiler set. Please set environment variable %s' 291 | _logger.critical(errorMsg, compilerEnv) 292 | raise Exception(errorMsg) 293 | errorMsg = '%s = %s : Invalid compiler type' 294 | _logger.critical(errorMsg, compilerEnv, str(cstring)) 295 | raise Exception(errorMsg) 296 | 297 | def buildObject(builder): 298 | objCompiler = builder.getCompiler() 299 | objCompiler.extend(builder.getCommand()) 300 | proc = Popen(objCompiler) 301 | rc = proc.wait() 302 | _logger.debug('buildObject rc = %d', rc) 303 | return rc 304 | 305 | 306 | # This command does not have the executable with it 307 | def buildAndAttachBitcode(builder, af): 308 | 309 | #iam: when we have multiple input files we'll have to keep track of their object files. 310 | newObjectFiles = [] 311 | 312 | hidden = not af.isCompileOnly 313 | 314 | if len(af.inputFiles) == 1 and af.isCompileOnly: 315 | _logger.debug('Compile only case: %s', af.inputFiles[0]) 316 | # iam: 317 | # we could have 318 | # "... -c -o foo.o" or even "... -c -o foo.So" which is OK, but we could also have 319 | # "... -c -o crazy-assed.objectfile" which we wouldn't get right (yet) 320 | # so we need to be careful with the objFile and bcFile 321 | # maybe python-magic is in our future ... 322 | srcFile = af.inputFiles[0] 323 | (objFile, bcFile) = af.getArtifactNames(srcFile, hidden) 324 | if af.outputFilename is not None: 325 | objFile = af.outputFilename 326 | bcFile = af.getBitcodeFileName() 327 | buildBitcodeFile(builder, srcFile, bcFile) 328 | attachBitcodePathToObject(bcFile, objFile) 329 | 330 | else: 331 | 332 | for srcFile in af.inputFiles: 333 | _logger.debug('Not compile only case: %s', srcFile) 334 | (objFile, bcFile) = af.getArtifactNames(srcFile, hidden) 335 | if hidden: 336 | buildObjectFile(builder, srcFile, objFile) 337 | newObjectFiles.append(objFile) 338 | 339 | if srcFile.endswith('.bc'): 340 | _logger.debug('attaching %s to %s', srcFile, objFile) 341 | attachBitcodePathToObject(srcFile, objFile) 342 | else: 343 | _logger.debug('building and attaching %s to %s', bcFile, objFile) 344 | buildBitcodeFile(builder, srcFile, bcFile) 345 | attachBitcodePathToObject(bcFile, objFile) 346 | 347 | 348 | if not af.isCompileOnly: 349 | linkFiles(builder, newObjectFiles) 350 | 351 | sys.exit(0) 352 | 353 | def linkFiles(builder, objectFiles): 354 | af = builder.getBitcodeArglistFilter() 355 | outputFile = af.getOutputFilename() 356 | cc = builder.getCompiler() 357 | cc.extend(objectFiles) 358 | cc.extend(af.objectFiles) 359 | cc.extend(af.linkArgs) 360 | cc.extend(['-o', outputFile]) 361 | proc = Popen(cc) 362 | rc = proc.wait() 363 | if rc != 0: 364 | _logger.warning('Failed to link "%s"', str(cc)) 365 | sys.exit(rc) 366 | 367 | 368 | def buildBitcodeFile(builder, srcFile, bcFile): 369 | af = builder.getBitcodeArglistFilter() 370 | bcc = builder.getBitcodeCompiler() 371 | bcc.extend(af.compileArgs) 372 | bcc.extend(['-c', srcFile]) 373 | bcc.extend(['-o', bcFile]) 374 | _logger.debug('buildBitcodeFile: %s', bcc) 375 | proc = Popen(bcc) 376 | rc = proc.wait() 377 | if rc != 0: 378 | _logger.warning('Failed to generate bitcode "%s" for "%s"', bcFile, srcFile) 379 | sys.exit(rc) 380 | 381 | def buildObjectFile(builder, srcFile, objFile): 382 | af = builder.getBitcodeArglistFilter() 383 | cc = builder.getCompiler() 384 | cc.extend(af.compileArgs) 385 | cc.append(srcFile) 386 | cc.extend(['-c', '-o', objFile]) 387 | _logger.debug('buildObjectFile: %s', cc) 388 | proc = Popen(cc) 389 | rc = proc.wait() 390 | if rc != 0: 391 | _logger.warning('Failed to generate object "%s" for "%s"', objFile, srcFile) 392 | sys.exit(rc) 393 | 394 | # bd & iam: 395 | # 396 | # case 1 (compileOnly): 397 | # 398 | # if the -c flag exists then so do all the .o files, and we need to 399 | # locate them and produce and embed the bit code. 400 | # 401 | # locating them is easy: 402 | # either the .o is in the cmdline and we are in the simple case, 403 | # or else it was generated according to getObjectFilename 404 | # 405 | # we then produce and attach bitcode for each inputFile in the cmdline 406 | # 407 | # 408 | # case 2 (compile and link) 409 | # 410 | # af.inputFiles is not empty, and compileOnly is false. 411 | # in this case the .o's may not exist, we must regenerate 412 | # them in any case. 413 | # 414 | # 415 | # case 3 (link only) 416 | # 417 | # in this case af.inputFiles is empty and we are done 418 | # 419 | # 420 | -------------------------------------------------------------------------------- /wllvm/extraction.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import subprocess as sp 6 | import re 7 | import pprint 8 | import tempfile 9 | import shutil 10 | import argparse 11 | import codecs 12 | 13 | from .popenwrapper import Popen 14 | 15 | from .compilers import llvmCompilerPathEnv 16 | from .compilers import elfSectionName 17 | from .compilers import darwinSegmentName 18 | from .compilers import darwinSectionName 19 | from .compilers import getHashedPathName 20 | 21 | from .filetype import FileType 22 | 23 | from .logconfig import logConfig, informUser 24 | 25 | 26 | 27 | _logger = logConfig(__name__) 28 | 29 | decode_hex = codecs.getdecoder("hex_codec") 30 | 31 | def extraction(): 32 | """ This is the entry point to extract-bc. 33 | """ 34 | 35 | (success, pArgs) = extract_bc_args() 36 | 37 | if not success: 38 | return 1 39 | 40 | if sys.platform.startswith('freebsd') or sys.platform.startswith('linux'): 41 | return process_file_unix(pArgs) 42 | if sys.platform.startswith('darwin'): 43 | return process_file_darwin(pArgs) 44 | 45 | #iam: do we work on anything else? 46 | _logger.error('Unsupported or unrecognized platform: %s', sys.platform) 47 | return 1 48 | 49 | 50 | 51 | bitCodeArchiveExtension = 'bca' 52 | moduleExtension = 'bc' 53 | 54 | # Environmental variable for cross-compilation target. 55 | binutilsTargetPrefixEnv = 'BINUTILS_TARGET_PREFIX' 56 | 57 | def getSectionSizeAndOffset(sectionName, filename): 58 | """Returns the size and offset of the section, both in bytes. 59 | 60 | Use objdump on the provided binary; parse out the fields 61 | to find the given section. Parses the output,and 62 | extracts thesize and offset of that section (in bytes). 63 | """ 64 | 65 | binUtilsTargetPrefix = os.getenv(binutilsTargetPrefixEnv) 66 | objdumpBin = f'{binUtilsTargetPrefix}-{"objdump"}' if binUtilsTargetPrefix else 'objdump' 67 | objdumpCmd = [objdumpBin, '-h', '-w', filename] 68 | objdumpProc = Popen(objdumpCmd, stdout=sp.PIPE) 69 | 70 | objdumpOutput = objdumpProc.communicate()[0] 71 | if objdumpProc.returncode != 0: 72 | _logger.error('Could not dump %s', filename) 73 | sys.exit(-1) 74 | 75 | for line in [l.decode('utf-8') for l in objdumpOutput.splitlines()]: 76 | fields = line.split() 77 | if len(fields) <= 7: 78 | continue 79 | if fields[1] != sectionName: 80 | continue 81 | try: 82 | size = int(fields[2], 16) 83 | offset = int(fields[5], 16) 84 | return (size, offset) 85 | except ValueError: 86 | continue 87 | 88 | # The needed section could not be found 89 | _logger.warning('Could not find "%s" ELF section in "%s", so skipping this entry.', sectionName, filename) 90 | return None 91 | 92 | def getSectionContent(size, offset, filename): 93 | """Reads the entire content of an ELF section into a string.""" 94 | with open(filename, mode='rb') as f: 95 | f.seek(offset) 96 | d = '' 97 | try: 98 | c = f.read(size) 99 | d = c.decode('utf-8') 100 | except UnicodeDecodeError: 101 | _logger.error('Failed to read section containing:') 102 | print(c) 103 | raise 104 | # The linker pads sections with null bytes; our real data 105 | # cannot have null bytes because it is just text. Discard 106 | # nulls. 107 | return d.replace('\0', '') 108 | 109 | 110 | # otool hexdata pattern. 111 | otool_hexdata = re.compile(r'^(?:[0-9a-f]{8,16}\t)?([0-9a-f\s]+)$', re.IGNORECASE) 112 | 113 | #iam: 04/09/2021 114 | def convert2octects(otooln): 115 | """Converts a otool output line into a list of octets. 116 | 117 | The otool output format varies between Intel and M1 chips. 118 | 119 | Intel: 120 | 0000000000000070 2f 55 73 65 72 73 2f 65 32 37 36 35 38 2f 52 65 121 | 122 | M1: 123 | 000000010000c000 6573552f 692f7372 522f6d61 736f7065 124 | 125 | The input string corresponds to substring after the tab that follows 126 | tthe starting address. 127 | 128 | """ 129 | octets = [] 130 | chunks = otooln.split() 131 | for chunk in chunks: 132 | if len(chunk) == 2: 133 | octets.append(chunk) 134 | else: 135 | twoples = [chunk[i:i+2] for i in range(0, len(chunk), 2)] 136 | twoples.reverse() 137 | octets.extend(twoples) 138 | return octets 139 | 140 | def extract_section_darwin(inputFile): 141 | """Extracts the section as a string, the darwin version. 142 | 143 | Uses otool to extract the section, then processes it 144 | to a usable state. 145 | 146 | iam: 04/09/2021 Using otool here is starting to be a real pain. 147 | The output format varies between XCode versions, and also between Intel and M1 148 | chips. 149 | """ 150 | retval = None 151 | 152 | otoolCmd = ['otool', '-X', '-s', darwinSegmentName, darwinSectionName, inputFile] 153 | otoolProc = Popen(otoolCmd, stdout=sp.PIPE) 154 | 155 | otoolOutput = otoolProc.communicate()[0] 156 | if otoolProc.returncode != 0: 157 | _logger.error('otool failed on %s', inputFile) 158 | sys.exit(-1) 159 | 160 | lines = otoolOutput.decode('utf8').splitlines() 161 | _logger.debug('otool extracted:\n%s\n', lines) 162 | # iam 03/06/2021: so otool prior to llvm-otool(1): Apple Inc. version cctools-977.1 163 | # would output 'Contents of (__WLLVM,__llvm_bc) section' as the first line 164 | # of the extraction. This seems to have disappeared so we need to be careful 165 | # here: 166 | if lines and lines[0] and lines[0].startswith('Contents'): 167 | _logger.debug('dropping header: "%s"', lines[0]) 168 | lines = lines[1:] 169 | try: 170 | octets = [] 171 | for line in lines: 172 | m = otool_hexdata.match(line) 173 | if not m: 174 | _logger.debug('otool output:\n\t%s\nDID NOT match expectations.', line) 175 | continue 176 | octetline = m.group(1) 177 | octets.extend(convert2octects(octetline)) 178 | _logger.debug('We parsed this as:\n%s', octets) 179 | retval = decode_hex(''.join(octets))[0].splitlines() 180 | # these have become bytes in the "evolution" of python 181 | retval = [ f.decode('utf8') for f in retval] 182 | _logger.debug('decoded:\n%s\n', retval) 183 | if not retval: 184 | _logger.error('%s contained no %s segment', inputFile, darwinSegmentName) 185 | except Exception as e: 186 | _logger.error('extract_section_darwin: %s', str(e)) 187 | return retval 188 | 189 | def extract_section_linux(inputFile): 190 | """Extracts the section as a string, the *nix version.""" 191 | val = getSectionSizeAndOffset(elfSectionName, inputFile) 192 | if val is None: 193 | return [] 194 | (sectionSize, sectionOffset) = val 195 | content = getSectionContent(sectionSize, sectionOffset, inputFile) 196 | contents = content.split('\n') 197 | if not contents: 198 | _logger.error('%s contained no %s. section is empty', inputFile, elfSectionName) 199 | return contents 200 | 201 | 202 | def getStorePath(bcPath): 203 | storeEnv = os.getenv('WLLVM_BC_STORE') 204 | if storeEnv: 205 | hashName = getHashedPathName(bcPath) 206 | hashPath = os.path.join(storeEnv, hashName) 207 | if os.path.isfile(hashPath): 208 | return hashPath 209 | return None 210 | 211 | 212 | def getBitcodePath(bcPath): 213 | """Tries to resolve the whereabouts of the bitcode. 214 | 215 | First, checks if the given path points to an existing bitcode file. 216 | If it does not, it tries to look for the bitcode file in the store directory given 217 | by the environment variable WLLVM_BC_STORE. 218 | """ 219 | 220 | if not bcPath or os.path.isfile(bcPath): 221 | return bcPath 222 | 223 | storePath = getStorePath(bcPath) 224 | if storePath: 225 | return storePath 226 | return bcPath 227 | 228 | def executeLinker(linkCmd): 229 | try: 230 | # Use blocking call here since the output file needs to be generated 231 | # before we can continue linking. 232 | exitCode = sp.check_call(linkCmd) 233 | except OSError as e: 234 | if e.errno == 2: 235 | errorMsg = 'Your llvm-link does not seem to be easy to find.\nEither install it or use the -l llvmLinker option.' 236 | else: 237 | errorMsg = f'OS error({e.errno}): {e.strerror}' 238 | _logger.error(errorMsg) 239 | raise Exception(errorMsg) from e 240 | 241 | return exitCode 242 | 243 | def incrementallyLinkFiles(pArgs, fileNames): 244 | linkCmd = [pArgs.llvmLinker, '-v'] if pArgs.verboseFlag else [pArgs.llvmLinker] 245 | 246 | linkCmd.append(f'-o={pArgs.outputFile}') 247 | 248 | # fileNames has already been adjusted and checked in function linkFiles. 249 | first, remaining = fileNames[0], fileNames[1:] 250 | linkCmd.append(first) 251 | 252 | exitCode = executeLinker(linkCmd) 253 | 254 | # Use the output file as part of the next linking process to overwrite 255 | # it incrementally. 256 | linkCmd.append(pArgs.outputFile) 257 | 258 | for bc_file in remaining: 259 | # Adjust the previously first file path and link remaining files. 260 | # The linking process has to be done with blocking calls here too 261 | # since we are overwriting the file completely everytime. 262 | linkCmd[len(linkCmd) - 2] = bc_file 263 | exitCode = executeLinker(linkCmd) 264 | 265 | _logger.info('%s returned %s', pArgs.llvmLinker, str(exitCode)) 266 | 267 | return exitCode 268 | 269 | 270 | def linkFiles(pArgs, fileNames): 271 | linkCmd = [pArgs.llvmLinker, '-v'] if pArgs.verboseFlag else [pArgs.llvmLinker] 272 | 273 | linkCmd.append(f'-o={pArgs.outputFile}') 274 | 275 | fileNames = map(getBitcodePath, fileNames) 276 | fileNames = [x for x in fileNames if x != ''] 277 | 278 | # Check the size of the argument string first: If it is larger than the 279 | # allowed size specified by 'getconf ARG_MAX' we have to link the files 280 | # incrementally to avoid weird errors. 281 | arg_max = int(sp.getoutput('getconf ARG_MAX')) 282 | str_len = sum([len(x) for x in fileNames]) 283 | if str_len > arg_max: 284 | return incrementallyLinkFiles(pArgs, fileNames) 285 | 286 | linkCmd.extend(fileNames) 287 | 288 | exitCode = executeLinker(linkCmd) 289 | _logger.info('%s returned %s', pArgs.llvmLinker, str(exitCode)) 290 | return exitCode 291 | 292 | 293 | def archiveFiles(pArgs, fileNames): 294 | retCode = 0 295 | # We do not want full paths in the archive so we need to chdir into each 296 | # bitcode's folder. Handle this by calling llvm-ar once for all bitcode 297 | # files in the same directory 298 | 299 | # Map of directory names to list of bitcode files in that directory 300 | dirToBCMap = {} 301 | for bitCodeFile in fileNames: 302 | dirName = os.path.dirname(bitCodeFile) 303 | basename = os.path.basename(bitCodeFile) 304 | if dirName in dirToBCMap: 305 | dirToBCMap[dirName].append(basename) 306 | else: 307 | dirToBCMap[dirName] = [basename] 308 | 309 | _logger.debug('Built up directory to bitcode file list map:\n%s', pprint.pformat(dirToBCMap)) 310 | 311 | for (dirname, bcList) in dirToBCMap.items(): 312 | _logger.debug('Changing directory to "%s"', dirname) 313 | os.chdir(dirname) 314 | larCmd = [pArgs.llvmArchiver, 'rs', pArgs.outputFile] + bcList 315 | larProc = Popen(larCmd) 316 | retCode = larProc.wait() 317 | if retCode != 0: 318 | _logger.error('Failed to execute:\n%s', pprint.pformat(larCmd)) 319 | break 320 | 321 | if retCode == 0: 322 | informUser(f'Generated LLVM bitcode archive {pArgs.outputFile}\n') 323 | else: 324 | _logger.error('Failed to generate LLVM bitcode archive') 325 | 326 | return retCode 327 | 328 | def extract_from_thin_archive(inputFile): 329 | """Extracts the paths from the thin archive. 330 | 331 | """ 332 | retval = None 333 | 334 | arCmd = ['ar', '-t', inputFile] #iam: check if this might be os dependent 335 | arProc = Popen(arCmd, stdout=sp.PIPE) 336 | 337 | arOutput = arProc.communicate()[0] 338 | if arProc.returncode != 0: 339 | _logger.error('ar failed on %s', inputFile) 340 | else: 341 | retval = arOutput.splitlines() 342 | return retval 343 | 344 | 345 | 346 | def handleExecutable(pArgs): 347 | 348 | fileNames = pArgs.extractor(pArgs.inputFile) 349 | 350 | if not fileNames: 351 | return 1 352 | 353 | if pArgs.sortBitcodeFilesFlag: 354 | fileNames = sorted(fileNames) 355 | 356 | 357 | if pArgs.manifestFlag: 358 | writeManifest(f'{pArgs.inputFile}.llvm.manifest', fileNames) 359 | 360 | if pArgs.outputFile is None: 361 | pArgs.outputFile = f'{pArgs.inputFile}.{moduleExtension}' 362 | 363 | return linkFiles(pArgs, fileNames) 364 | 365 | 366 | def handleThinArchive(pArgs): 367 | 368 | objectPaths = extract_from_thin_archive(pArgs.inputFile) 369 | 370 | if not objectPaths: 371 | return 1 372 | 373 | bcFiles = [] 374 | for p in objectPaths: 375 | _logger.debug('handleThinArchive: processing %s', p) 376 | contents = pArgs.extractor(p) 377 | for c in contents: 378 | if c: 379 | _logger.debug('\t including %s', c) 380 | bcFiles.append(str(c)) 381 | 382 | 383 | 384 | return buildArchive(pArgs, bcFiles) 385 | 386 | #iam: do we want to preserve the order in the archive? if so we need to return both the list and the dict. 387 | def fetchTOC(inputFile): 388 | toc = {} 389 | 390 | arCmd = ['ar', '-t', inputFile] #iam: check if this might be os dependent 391 | arProc = Popen(arCmd, stdout=sp.PIPE) 392 | 393 | arOutput = arProc.communicate()[0] 394 | if arProc.returncode != 0: 395 | _logger.error('ar failed on %s', inputFile) 396 | return toc 397 | 398 | lines = arOutput.splitlines() 399 | 400 | for line in lines: 401 | if line in toc: 402 | toc[line] += 1 403 | else: 404 | toc[line] = 1 405 | 406 | return toc 407 | 408 | 409 | def extractFile(archive, filename, instance): 410 | arCmd = ['ar', 'xN', str(instance), archive, filename] #iam: check if this might be os dependent 411 | try: 412 | arP = Popen(arCmd) 413 | except Exception as e: 414 | _logger.error(e) 415 | return False 416 | 417 | arPE = arP.wait() 418 | 419 | if arPE != 0: 420 | errorMsg = f'Failed to execute archiver with command {arCmd}' 421 | _logger.error(errorMsg) 422 | return False 423 | 424 | return True 425 | 426 | 427 | 428 | def handleArchiveDarwin(pArgs): 429 | originalDir = os.getcwd() # This will be the destination 430 | 431 | pArgs.arCmd.append(pArgs.inputFile) 432 | 433 | # Make temporary directory to extract objects to 434 | tempDir = '' 435 | bitCodeFiles = [] 436 | 437 | try: 438 | 439 | 440 | tempDir = tempfile.mkdtemp(suffix='wllvm') 441 | os.chdir(tempDir) 442 | 443 | # Extract objects from archive 444 | try: 445 | arP = Popen(pArgs.arCmd) 446 | except OSError as e: 447 | if e.errno == 2: 448 | errorMsg = 'Your ar does not seem to be easy to find.\n' 449 | else: 450 | errorMsg = f'OS error({e.errno}): {e.strerror}' 451 | _logger.error(errorMsg) 452 | raise Exception(errorMsg) from e 453 | 454 | arPE = arP.wait() 455 | 456 | if arPE != 0: 457 | errorMsg = f'Failed to execute archiver with command {pArgs.arCmd}' 458 | _logger.error(errorMsg) 459 | raise Exception(errorMsg) 460 | 461 | _logger.debug(2) 462 | 463 | # Iterate over objects and examine their bitcode inserts 464 | for (root, _, files) in os.walk(tempDir): 465 | _logger.debug('Exploring "%s"', root) 466 | for f in files: 467 | fPath = os.path.join(root, f) 468 | if FileType.getFileType(fPath) == pArgs.fileType: 469 | 470 | # Extract bitcode locations from object 471 | contents = pArgs.extractor(fPath) 472 | 473 | for bcFile in contents: 474 | if bcFile != '': 475 | if not os.path.exists(bcFile): 476 | _logger.warning('%s lists bitcode library "%s" but it could not be found', f, bcFile) 477 | else: 478 | bitCodeFiles.append(bcFile) 479 | else: 480 | _logger.info('Ignoring file "%s" in archive', f) 481 | 482 | _logger.info('Found the following bitcode file names to build bitcode archive:\n%s', pprint.pformat(bitCodeFiles)) 483 | 484 | finally: 485 | # Delete the temporary folder 486 | _logger.debug('Deleting temporary folder "%s"', tempDir) 487 | shutil.rmtree(tempDir) 488 | 489 | #write the manifest file if asked for 490 | if pArgs.manifestFlag: 491 | writeManifest(f'{pArgs.inputFile}.llvm.manifest', bitCodeFiles) 492 | 493 | # Build bitcode archive 494 | os.chdir(originalDir) 495 | 496 | return buildArchive(pArgs, bitCodeFiles) 497 | 498 | 499 | 500 | #iam: 5/1/2018 501 | def handleArchiveLinux(pArgs): 502 | """ handleArchiveLinux processes a archive, and creates either a bitcode archive, or a module, depending on the flags used. 503 | 504 | Archives on Linux are strange beasts. handleArchive processes the archive by: 505 | 506 | 1. first creating a table of contents of the archive, which maps file names (in the archive) to the number of 507 | times a file with that name is stored in the archive. 508 | 509 | 2. for each OCCURENCE of a file (name and count) it extracts the section from the object file, and adds the 510 | bitcode paths to the bitcode list. 511 | 512 | 3. it then either links all these bitcode files together using llvm-link, or else is creates a bitcode 513 | archive using llvm-ar 514 | 515 | """ 516 | 517 | inputFile = pArgs.inputFile 518 | 519 | originalDir = os.getcwd() # We want to end up back where we started. 520 | 521 | toc = fetchTOC(inputFile) 522 | 523 | if not toc: 524 | _logger.warning('No files found, so nothing to be done.') 525 | return 0 526 | 527 | bitCodeFiles = [] 528 | 529 | try: 530 | tempDir = tempfile.mkdtemp(suffix='wllvm') 531 | os.chdir(tempDir) 532 | 533 | for filename in toc: 534 | count = toc[filename] 535 | for i in range(1, count + 1): 536 | 537 | # extact out the ith instance of filename 538 | if extractFile(inputFile, filename, i): 539 | # Extract bitcode locations from object 540 | contents = pArgs.extractor(filename) 541 | _logger.debug('From instance %s of %s in %s we extracted\n\t%s\n', i, filename, inputFile, contents) 542 | if contents: 543 | for path in contents: 544 | if path: 545 | bitCodeFiles.append(path) 546 | else: 547 | _logger.debug('From instance %s of %s in %s we extracted NOTHING\n', i, filename, inputFile) 548 | 549 | finally: 550 | # Delete the temporary folder 551 | _logger.debug('Deleting temporary folder "%s"', tempDir) 552 | shutil.rmtree(tempDir) 553 | 554 | _logger.debug('From instance %s we extracted\n\t%s\n', inputFile, bitCodeFiles) 555 | 556 | # Build bitcode archive 557 | os.chdir(originalDir) 558 | 559 | return buildArchive(pArgs, bitCodeFiles) 560 | 561 | 562 | 563 | 564 | 565 | 566 | def buildArchive(pArgs, bitCodeFiles): 567 | 568 | if pArgs.bitcodeModuleFlag: 569 | _logger.info('Generating LLVM Bitcode module from an archive') 570 | else: 571 | _logger.info('Generating LLVM Bitcode archive from an archive') 572 | 573 | if pArgs.sortBitcodeFilesFlag: 574 | bitCodeFiles = sorted(bitCodeFiles) 575 | 576 | #write the manifest file if asked for 577 | if pArgs.manifestFlag: 578 | writeManifest(f'{pArgs.inputFile}.llvm.manifest', bitCodeFiles) 579 | 580 | if pArgs.bitcodeModuleFlag: 581 | 582 | # Pick output file path if outputFile not set 583 | if pArgs.outputFile is None: 584 | pArgs.outputFile = pArgs.inputFile 585 | pArgs.outputFile += '.' + moduleExtension 586 | 587 | informUser(f'Writing output to {pArgs.outputFile}\n') 588 | return linkFiles(pArgs, bitCodeFiles) 589 | 590 | # Pick output file path if outputFile not set 591 | if pArgs.outputFile is None: 592 | bcaExtension = '.' + bitCodeArchiveExtension 593 | if pArgs.inputFile.endswith('.a'): 594 | # Strip off .a suffix 595 | pArgs.outputFile = pArgs.inputFile[:-2] 596 | pArgs.outputFile += bcaExtension 597 | else: 598 | pArgs.outputFile = pArgs.inputFile + bcaExtension 599 | 600 | informUser(f'Writing output to {pArgs.outputFile}\n') 601 | return archiveFiles(pArgs, bitCodeFiles) 602 | 603 | 604 | def writeManifest(manifestFile, bitCodeFiles): 605 | with open(manifestFile, 'w') as output: 606 | for f in bitCodeFiles: 607 | output.write(f'{f}\n') 608 | sf = getStorePath(f) 609 | if sf: 610 | output.write(f'{sf}\n') 611 | _logger.warning('Manifest written to %s', manifestFile) 612 | 613 | 614 | 615 | class ExtractedArgs: 616 | 617 | def __init__(self): 618 | self.fileType = None 619 | self.outputFile = None 620 | self.inputFile = None 621 | self.output = None 622 | self.extractor = None 623 | self.arCmd = None 624 | 625 | 626 | def extract_bc_args(): 627 | 628 | # do we need a path in front? 629 | llvmToolPrefix = os.getenv(llvmCompilerPathEnv) 630 | if not llvmToolPrefix: 631 | llvmToolPrefix = '' 632 | 633 | # is our linker called something different? 634 | llvmLinkerName = os.getenv('LLVM_LINK_NAME') 635 | if not llvmLinkerName: 636 | llvmLinkerName = 'llvm-link' 637 | llvmLinker = os.path.join(llvmToolPrefix, llvmLinkerName) 638 | 639 | # is our archiver called something different? 640 | llvmArchiverName = os.getenv('LLVM_AR_NAME') 641 | if not llvmArchiverName: 642 | llvmArchiverName = 'llvm-ar' 643 | llvmArchiver = os.path.join(llvmToolPrefix, llvmArchiverName) 644 | 645 | parser = argparse.ArgumentParser(description=__doc__) 646 | parser.add_argument(dest='inputFile', 647 | help='A binary produced by wllvm/wllvm++') 648 | parser.add_argument('--linker', '-l', 649 | dest='llvmLinker', 650 | help='The LLVM bitcode linker to use. Default "%(default)s"', 651 | default=llvmLinker) 652 | parser.add_argument('--archiver', '-a', 653 | dest='llvmArchiver', 654 | help='The LLVM bitcode archiver to use. Default "%(default)s"', 655 | default=llvmArchiver) 656 | parser.add_argument('--verbose', '-v', 657 | dest='verboseFlag', 658 | help='Call the external procedures in verbose mode.', 659 | action="store_true") 660 | parser.add_argument('--manifest', '-m', 661 | dest='manifestFlag', 662 | help='Write a manifest file listing all the .bc files used.', 663 | action='store_true') 664 | parser.add_argument('--sort', '-s', 665 | dest='sortBitcodeFilesFlag', 666 | help='Sort the list of bitcode files (for debugging).', 667 | action='store_true') 668 | parser.add_argument('--bitcode', '-b', 669 | dest='bitcodeModuleFlag', 670 | help='Extract a bitcode module rather than an archive. ' + 671 | 'Only useful when extracting from an archive.', 672 | action='store_true') 673 | parser.add_argument('--output', '-o', 674 | dest='outputFile', 675 | help='The output file. Defaults to a file in the same directory ' + 676 | 'as the input with the same name as the input but with an ' + 677 | 'added file extension (.'+ moduleExtension + ' for bitcode '+ 678 | 'modules and .' + bitCodeArchiveExtension +' for bitcode archives)', 679 | default=None) 680 | pArgs = parser.parse_args(namespace=ExtractedArgs()) 681 | 682 | 683 | # Check file exists 684 | if not os.path.exists(pArgs.inputFile): 685 | _logger.error('File "%s" does not exist.', pArgs.inputFile) 686 | return (False, None) 687 | 688 | pArgs.inputFile = os.path.abspath(pArgs.inputFile) 689 | 690 | 691 | # Check output destitionation if set 692 | outputFile = pArgs.outputFile 693 | if outputFile is not None: 694 | # Get Absolute output path 695 | outputFile = os.path.abspath(outputFile) 696 | if not os.path.exists(os.path.dirname(outputFile)): 697 | _logger.error('Output directory "%s" does not exist.', os.path.dirname(outputFile)) 698 | return (False, None) 699 | 700 | pArgs.output = outputFile 701 | 702 | return (True, pArgs) 703 | 704 | 705 | 706 | 707 | def process_file_unix(pArgs): 708 | retval = 1 709 | ft = FileType.getFileType(pArgs.inputFile) 710 | _logger.debug('Detected file type is %s', FileType.revMap[ft]) 711 | 712 | pArgs.arCmd = ['ar', 'xv'] if pArgs.verboseFlag else ['ar', 'x'] 713 | pArgs.extractor = extract_section_linux 714 | pArgs.fileType = FileType.ELF_OBJECT 715 | 716 | if ft in (FileType.ELF_EXECUTABLE, FileType.ELF_SHARED, FileType.ELF_OBJECT): 717 | _logger.info('Generating LLVM Bitcode module') 718 | retval = handleExecutable(pArgs) 719 | elif ft == FileType.ARCHIVE: 720 | retval = handleArchiveLinux(pArgs) 721 | elif ft == FileType.THIN_ARCHIVE: 722 | retval = handleThinArchive(pArgs) 723 | else: 724 | _logger.error('File "%s" of type %s cannot be used', pArgs.inputFile, FileType.revMap[ft]) 725 | return retval 726 | 727 | 728 | 729 | def process_file_darwin(pArgs): 730 | retval = 1 731 | ft = FileType.getFileType(pArgs.inputFile) 732 | _logger.debug('Detected file type is %s', FileType.revMap[ft]) 733 | 734 | pArgs.arCmd = ['ar', '-x', '-v'] if pArgs.verboseFlag else ['ar', '-x'] 735 | pArgs.extractor = extract_section_darwin 736 | pArgs.fileType = FileType.MACH_OBJECT 737 | 738 | if ft in (FileType.MACH_EXECUTABLE, FileType.MACH_SHARED, FileType.MACH_OBJECT): 739 | _logger.info('Generating LLVM Bitcode module') 740 | retval = handleExecutable(pArgs) 741 | elif ft == FileType.ARCHIVE: 742 | _logger.info('Handling archive') 743 | retval = handleArchiveDarwin(pArgs) 744 | 745 | 746 | else: 747 | _logger.error('File "%s" of type %s cannot be used', pArgs.inputFile, FileType.revMap[ft]) 748 | return retval 749 | -------------------------------------------------------------------------------- /wllvm/arglistfilter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import collections 3 | import os 4 | import re 5 | import sys 6 | 7 | # Internal logger 8 | _logger = logging.getLogger(__name__) 9 | 10 | # Flag for dumping 11 | DUMPING = False 12 | 13 | # This class applies filters to GCC argument lists. It has a few 14 | # default arguments that it records, but does not modify the argument 15 | # list at all. It can be subclassed to change this behavior. 16 | # 17 | # The idea is that all flags accepting a parameter must be specified 18 | # so that they know to consume an extra token from the input stream. 19 | # Flags and arguments can be recorded in any way desired by providing 20 | # a callback. Each callback/flag has an arity specified - zero arity 21 | # flags (such as -v) are provided to their callback as-is. Higher 22 | # arities remove the appropriate number of arguments from the list and 23 | # pass them to the callback with the flag. 24 | # 25 | # Most flags can be handled with a simple lookup in a table - these 26 | # are exact matches. Other flags are more complex and can be 27 | # recognized by regular expressions. All regular expressions must be 28 | # tried, obviously. The first one that matches is taken, and no order 29 | # is specified. Try to avoid overlapping patterns. 30 | class ArgumentListFilter: 31 | def __init__(self, inputList, exactMatches={}, patternMatches={}): 32 | defaultArgExactMatches = { 33 | 34 | '-' : (0, ArgumentListFilter.standardInCallback), 35 | 36 | '-o' : (1, ArgumentListFilter.outputFileCallback), 37 | '-c' : (0, ArgumentListFilter.compileOnlyCallback), 38 | '-E' : (0, ArgumentListFilter.preprocessOnlyCallback), 39 | '-S' : (0, ArgumentListFilter.assembleOnlyCallback), 40 | 41 | '-v' : (0, ArgumentListFilter.verboseFlagCallback), 42 | '--verbose' : (0, ArgumentListFilter.verboseFlagCallback), 43 | '--param' : (1, ArgumentListFilter.compileBinaryCallback), 44 | '-aux-info' : (1, ArgumentListFilter.defaultBinaryCallback), 45 | 46 | #iam: presumably the len(inputFiles) == 0 in this case 47 | '--version' : (0, ArgumentListFilter.compileOnlyCallback), 48 | 49 | #warnings (apart from the regex below) 50 | '-w' : (0, ArgumentListFilter.compileUnaryCallback), 51 | '-W' : (0, ArgumentListFilter.compileUnaryCallback), 52 | 53 | 54 | #iam: if this happens, then we need to stop and think. 55 | '-emit-llvm' : (0, ArgumentListFilter.emitLLVMCallback), 56 | 57 | #iam: buildworld and buildkernel use these flags 58 | '-pipe' : (0, ArgumentListFilter.compileUnaryCallback), 59 | '-undef' : (0, ArgumentListFilter.compileUnaryCallback), 60 | '-nostdinc' : (0, ArgumentListFilter.compileUnaryCallback), 61 | '-nostdinc++' : (0, ArgumentListFilter.compileUnaryCallback), 62 | '-Qunused-arguments' : (0, ArgumentListFilter.compileUnaryCallback), 63 | '-no-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback), 64 | '-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback), 65 | #iam: gcc uses this in both compile and link, but clang only in compile 66 | #iam: actually on linux it looks to be both 67 | '-pthread' : (0, ArgumentListFilter.compileLinkUnaryCallback), 68 | # I think this is a compiler search path flag. It is 69 | # clang only, so I don't think it counts as a separate CPP 70 | # flag. Android uses this flag with its clang builds. 71 | '-nostdlibinc': (0, ArgumentListFilter.compileUnaryCallback), 72 | 73 | #iam: arm stuff 74 | '-mno-omit-leaf-frame-pointer' : (0, ArgumentListFilter.compileUnaryCallback), 75 | '-maes' : (0, ArgumentListFilter.compileUnaryCallback), 76 | '-mno-aes' : (0, ArgumentListFilter.compileUnaryCallback), 77 | '-mavx' : (0, ArgumentListFilter.compileUnaryCallback), 78 | '-mno-avx' : (0, ArgumentListFilter.compileUnaryCallback), 79 | '-mcmodel=kernel' : (0, ArgumentListFilter.compileUnaryCallback), 80 | '-mno-red-zone' : (0, ArgumentListFilter.compileUnaryCallback), 81 | '-mmmx' : (0, ArgumentListFilter.compileUnaryCallback), 82 | '-mno-mmx' : (0, ArgumentListFilter.compileUnaryCallback), 83 | '-msse' : (0, ArgumentListFilter.compileUnaryCallback), 84 | '-mno-sse2' : (0, ArgumentListFilter.compileUnaryCallback), 85 | '-msse2' : (0, ArgumentListFilter.compileUnaryCallback), 86 | '-mno-sse3' : (0, ArgumentListFilter.compileUnaryCallback), 87 | '-msse3' : (0, ArgumentListFilter.compileUnaryCallback), 88 | '-mno-sse' : (0, ArgumentListFilter.compileUnaryCallback), 89 | '-msoft-float' : (0, ArgumentListFilter.compileUnaryCallback), 90 | '-m3dnow' : (0, ArgumentListFilter.compileUnaryCallback), 91 | '-mno-3dnow' : (0, ArgumentListFilter.compileUnaryCallback), 92 | '-m16': (0, ArgumentListFilter.compileLinkUnaryCallback), 93 | '-m32': (0, ArgumentListFilter.compileLinkUnaryCallback), 94 | '-mx32': (0, ArgumentListFilter.compileLinkUnaryCallback), 95 | '-m64': (0, ArgumentListFilter.compileLinkUnaryCallback), 96 | '-miamcu': (0, ArgumentListFilter.compileUnaryCallback), 97 | '-mstackrealign': (0, ArgumentListFilter.compileUnaryCallback), 98 | '-mretpoline-external-thunk': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 99 | '-mno-fp-ret-in-387': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 100 | '-mskip-rax-setup': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 101 | '-mindirect-branch-register': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 102 | # Preprocessor assertion 103 | '-A' : (1, ArgumentListFilter.compileBinaryCallback), 104 | '-D' : (1, ArgumentListFilter.compileBinaryCallback), 105 | '-U' : (1, ArgumentListFilter.compileBinaryCallback), 106 | 107 | '-arch' : (1, ArgumentListFilter.compileBinaryCallback), #iam: openssl 108 | 109 | # Dependency generation 110 | '-M' : (0, ArgumentListFilter.dependencyOnlyCallback), 111 | '-MM' : (0, ArgumentListFilter.dependencyOnlyCallback), 112 | '-MF' : (1, ArgumentListFilter.dependencyBinaryCallback), 113 | '-MG' : (0, ArgumentListFilter.dependencyOnlyCallback), 114 | '-MP' : (0, ArgumentListFilter.dependencyOnlyCallback), 115 | '-MT' : (1, ArgumentListFilter.dependencyBinaryCallback), 116 | '-MQ' : (1, ArgumentListFilter.dependencyBinaryCallback), 117 | '-MD' : (0, ArgumentListFilter.dependencyOnlyCallback), 118 | '-MMD' : (0, ArgumentListFilter.dependencyOnlyCallback), 119 | 120 | # Include 121 | '-I' : (1, ArgumentListFilter.compileBinaryCallback), 122 | '-idirafter' : (1, ArgumentListFilter.compileBinaryCallback), 123 | '-include' : (1, ArgumentListFilter.compileBinaryCallback), 124 | '-imacros' : (1, ArgumentListFilter.compileBinaryCallback), 125 | '-iprefix' : (1, ArgumentListFilter.compileBinaryCallback), 126 | '-iwithprefix' : (1, ArgumentListFilter.compileBinaryCallback), 127 | '-iwithprefixbefore' : (1, ArgumentListFilter.compileBinaryCallback), 128 | '-isystem' : (1, ArgumentListFilter.compileBinaryCallback), 129 | '-isysroot' : (1, ArgumentListFilter.compileBinaryCallback), 130 | '-iquote' : (1, ArgumentListFilter.compileBinaryCallback), 131 | '-imultilib' : (1, ArgumentListFilter.compileBinaryCallback), 132 | 133 | # Sysroot 134 | # Driver expands this into include options when compiling and 135 | # library options when linking 136 | '--sysroot' : (1, ArgumentListFilter.compileLinkBinaryCallback), 137 | 138 | # Architecture 139 | '-target' : (1, ArgumentListFilter.compileBinaryCallback), 140 | '-marm' : (0, ArgumentListFilter.compileUnaryCallback), 141 | 142 | # Language 143 | '-ansi' : (0, ArgumentListFilter.compileUnaryCallback), 144 | '-pedantic' : (0, ArgumentListFilter.compileUnaryCallback), 145 | #iam: i notice that yices configure passes -xc so 146 | # we should have a fall back pattern that captures the case 147 | # when there is no space between the x and the langauge. 148 | # for what its worth: the manual says the language can be one of 149 | # c objective-c c++ c-header cpp-output c++-cpp-output 150 | # assembler assembler-with-cpp 151 | # BD: care to comment on your configure? 152 | 153 | '-x' : (1, ArgumentListFilter.compileBinaryCallback), 154 | 155 | # Debug 156 | '-g' : (0, ArgumentListFilter.compileUnaryCallback), 157 | '-g0' : (0, ArgumentListFilter.compileUnaryCallback), #iam: clang not gcc 158 | '-ggdb' : (0, ArgumentListFilter.compileUnaryCallback), 159 | '-ggdb3' : (0, ArgumentListFilter.compileUnaryCallback), 160 | '-gdwarf-2' : (0, ArgumentListFilter.compileUnaryCallback), 161 | '-gdwarf-3' : (0, ArgumentListFilter.compileUnaryCallback), 162 | '-gdwarf-4' : (0, ArgumentListFilter.compileUnaryCallback), 163 | '-gline-tables-only' : (0, ArgumentListFilter.compileUnaryCallback), 164 | '-grecord-gcc-switches': (0, ArgumentListFilter.compileUnaryCallback), 165 | 166 | '-p' : (0, ArgumentListFilter.compileUnaryCallback), 167 | '-pg' : (0, ArgumentListFilter.compileUnaryCallback), 168 | 169 | # Optimization 170 | '-O' : (0, ArgumentListFilter.compileUnaryCallback), 171 | '-O0' : (0, ArgumentListFilter.compileUnaryCallback), 172 | '-O1' : (0, ArgumentListFilter.compileUnaryCallback), 173 | '-O2' : (0, ArgumentListFilter.compileUnaryCallback), 174 | '-O3' : (0, ArgumentListFilter.compileUnaryCallback), 175 | '-Os' : (0, ArgumentListFilter.compileUnaryCallback), 176 | '-Ofast' : (0, ArgumentListFilter.compileUnaryCallback), 177 | '-Og' : (0, ArgumentListFilter.compileUnaryCallback), 178 | # Component-specifiers 179 | '-Xclang' : (1, ArgumentListFilter.compileBinaryCallback), 180 | '-Xpreprocessor' : (1, ArgumentListFilter.defaultBinaryCallback), 181 | '-Xassembler' : (1, ArgumentListFilter.defaultBinaryCallback), 182 | '-Xlinker' : (1, ArgumentListFilter.defaultBinaryCallback), 183 | # Linker 184 | '-l' : (1, ArgumentListFilter.linkBinaryCallback), 185 | '-L' : (1, ArgumentListFilter.linkBinaryCallback), 186 | '-T' : (1, ArgumentListFilter.linkBinaryCallback), 187 | '-u' : (1, ArgumentListFilter.linkBinaryCallback), 188 | #iam: specify the entry point 189 | '-e' : (1, ArgumentListFilter.linkBinaryCallback), 190 | # runtime library search path 191 | '-rpath' : (1, ArgumentListFilter.linkBinaryCallback), 192 | # iam: showed up in buildkernel 193 | '-shared' : (0, ArgumentListFilter.linkUnaryCallback), 194 | '-static' : (0, ArgumentListFilter.linkUnaryCallback), 195 | '-pie' : (0, ArgumentListFilter.linkUnaryCallback), 196 | '-nostdlib' : (0, ArgumentListFilter.linkUnaryCallback), 197 | '-nodefaultlibs' : (0, ArgumentListFilter.linkUnaryCallback), 198 | '-rdynamic' : (0, ArgumentListFilter.linkUnaryCallback), 199 | # darwin flags 200 | '-dynamiclib' : (0, ArgumentListFilter.linkUnaryCallback), 201 | '-current_version' : (1, ArgumentListFilter.linkBinaryCallback), 202 | '-compatibility_version' : (1, ArgumentListFilter.linkBinaryCallback), 203 | '-framework' : (1, ArgumentListFilter.linkBinaryCallback), 204 | 205 | # dragonegg mystery argument 206 | '--64' : (0, ArgumentListFilter.compileUnaryCallback), 207 | 208 | # binutils nonsense 209 | '-print-multi-directory' : (0, ArgumentListFilter.compileUnaryCallback), 210 | '-print-multi-lib' : (0, ArgumentListFilter.compileUnaryCallback), 211 | '-print-libgcc-file-name' : (0, ArgumentListFilter.compileUnaryCallback), 212 | 213 | # Code coverage instrumentation 214 | '-fprofile-arcs' : (0, ArgumentListFilter.compileLinkUnaryCallback), 215 | '-coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback), 216 | '--coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback), 217 | 218 | # ian's additions while building the linux kernel 219 | '/dev/null' : (0, ArgumentListFilter.inputFileCallback), 220 | '-mno-80387': (0, ArgumentListFilter.compileUnaryCallback), #gcc Don't generate output containing 80387 instructions for floating point. 221 | 222 | 223 | # 224 | # BD: need to warn the darwin user that these flags will rain on their parade 225 | # (the Darwin ld is a bit single minded) 226 | # 227 | # 1) compilation with -fvisibility=hidden causes trouble when we try to 228 | # attach bitcode filenames to an object file. The global symbols in object 229 | # files get turned into local symbols when we invoke 'ld -r' 230 | # 231 | # 2) all stripping commands (e.g., -dead_strip) remove the __LLVM segment after 232 | # linking 233 | # 234 | # Update: found a fix for problem 1: add flag -keep_private_externs when 235 | # calling ld -r. 236 | # 237 | '-Wl,-dead_strip' : (0, ArgumentListFilter.warningLinkUnaryCallback), 238 | '-dead_strip' : (0, ArgumentListFilter.warningLinkUnaryCallback), 239 | '-Oz' : (0, ArgumentListFilter.compileUnaryCallback), #did not find this in the GCC options. 240 | '-mno-global-merge' : (0, ArgumentListFilter.compileUnaryCallback), #clang (do not merge globals) 241 | 242 | } 243 | 244 | # 245 | # Patterns for other command-line arguments: 246 | # - inputFiles 247 | # - objectFiles (suffix .o) 248 | # - libraries + linker options as in -lxxx -Lpath or -Wl,xxxx 249 | # - preprocessor options as in -DXXX -Ipath 250 | # - compiler warning options: -W.... 251 | # - optimiziation and other flags: -f... 252 | # 253 | defaultArgPatterns = { 254 | r'^-f.+$' : (0, ArgumentListFilter.compileUnaryCallback), 255 | r'^.+\.(c|cc|cpp|C|cxx|i|s|S|bc)$' : (0, ArgumentListFilter.inputFileCallback), 256 | # FORTRAN file types 257 | r'^.+\.([fF](|[0-9][0-9]|or|OR|pp|PP))$' : (0, ArgumentListFilter.inputFileCallback), 258 | #iam: the object file recogition is not really very robust, object files 259 | # should be determined by their existance and contents... 260 | r'^.+\.(o|lo|So|so|po|a|dylib)$' : (0, ArgumentListFilter.objectFileCallback), 261 | #iam: library.so.4.5.6 probably need a similar pattern for .dylib too. 262 | r'^.+\.dylib(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback), 263 | r'^.+\.(So|so)(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback), 264 | r'^-(l|L).+$' : (0, ArgumentListFilter.linkUnaryCallback), 265 | r'^-I.+$' : (0, ArgumentListFilter.compileUnaryCallback), 266 | r'^-D.+$' : (0, ArgumentListFilter.compileUnaryCallback), 267 | r'^-U.+$' : (0, ArgumentListFilter.compileUnaryCallback), 268 | r'^-Wl,.+$' : (0, ArgumentListFilter.linkUnaryCallback), 269 | r'^-W(?!l,).*$' : (0, ArgumentListFilter.compileUnaryCallback), 270 | r'^-fsanitize=.+$' : (0, ArgumentListFilter.compileLinkUnaryCallback), 271 | r'^-rtlib=.+$' : (0, ArgumentListFilter.linkUnaryCallback), 272 | r'^-std=.+$' : (0, ArgumentListFilter.compileUnaryCallback), 273 | r'^-stdlib=.+$' : (0, ArgumentListFilter.compileLinkUnaryCallback), 274 | r'^-mtune=.+$' : (0, ArgumentListFilter.compileUnaryCallback), 275 | r'^-mstack-alignment=.+$': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 276 | r'^-mcmodel=.+$': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 277 | r'^-mpreferred-stack-boundary=.+$': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 278 | r'^-mindirect-branch=.+$': (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 279 | r'^-mregparm=.+$' : (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 280 | r'^-march=.+$' : (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 281 | r'^--param=.+$' : (0, ArgumentListFilter.compileUnaryCallback), #iam: linux kernel stuff 282 | 283 | 284 | #iam: mac stuff... 285 | r'-mmacosx-version-min=.+$' : (0, ArgumentListFilter.compileUnaryCallback), 286 | 287 | r'^--sysroot=.+$' : (0, ArgumentListFilter.compileUnaryCallback), 288 | r'^--gcc-toolchain=.+$' : (0, ArgumentListFilter.compileUnaryCallback), 289 | r'^-print-prog-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback), 290 | r'^-print-file-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback), 291 | #iam: -xc from yices. why BD? 292 | r'^-x.+$' : (0, ArgumentListFilter.compileUnaryCallback), 293 | 294 | } 295 | 296 | #iam: try and keep track of the files, input object, and output 297 | self.inputList = inputList 298 | self.inputFiles = [] 299 | self.objectFiles = [] 300 | self.outputFilename = None 301 | 302 | #iam: try and split the args into linker and compiler switches 303 | self.compileArgs = [] 304 | self.linkArgs = [] 305 | # currently only dead_strip belongs here; but I guess there could be more. 306 | self.forbiddenArgs = [] 307 | 308 | 309 | self.isVerbose = False 310 | self.isDependencyOnly = False 311 | self.isPreprocessOnly = False 312 | self.isAssembleOnly = False 313 | self.isAssembly = False 314 | self.isCompileOnly = False 315 | self.isEmitLLVM = False 316 | self.isStandardIn = False 317 | 318 | argExactMatches = dict(defaultArgExactMatches) 319 | argExactMatches.update(exactMatches) 320 | argPatterns = dict(defaultArgPatterns) 321 | argPatterns.update(patternMatches) 322 | 323 | self._inputArgs = collections.deque(inputList) 324 | 325 | #iam: parse the cmd line, bailing if we discover that there will be no second phase. 326 | while (self._inputArgs and 327 | not (self.isAssembleOnly or 328 | self.isPreprocessOnly)): 329 | # Get the next argument 330 | currentItem = self._inputArgs.popleft() 331 | _logger.debug('Trying to match item %s', currentItem) 332 | # First, see if this exact flag has a handler in the table. 333 | # This is a cheap test. Otherwise, see if the input matches 334 | # some pattern with a handler that we recognize 335 | if currentItem in argExactMatches: 336 | (arity, handler) = argExactMatches[currentItem] 337 | flagArgs = self._shiftArgs(arity) 338 | handler(self, currentItem, *flagArgs) 339 | elif currentItem == '-Wl,--start-group': 340 | linkingGroup = [currentItem] 341 | terminated = False 342 | while self._inputArgs: 343 | groupCurrent = self._inputArgs.popleft() 344 | linkingGroup.append(groupCurrent) 345 | if groupCurrent == "-Wl,--end-group": 346 | terminated = True 347 | break 348 | if not terminated: 349 | _logger.warning('Did not find a closing "-Wl,--end-group" to match "-Wl,--start-group"') 350 | self.linkingGroupCallback(linkingGroup) 351 | else: 352 | matched = False 353 | for pattern, (arity, handler) in argPatterns.items(): 354 | if re.match(pattern, currentItem): 355 | flagArgs = self._shiftArgs(arity) 356 | handler(self, currentItem, *flagArgs) 357 | matched = True 358 | break 359 | # If no action has been specified, this is a zero-argument 360 | # flag that we should just keep. 361 | if not matched: 362 | _logger.warning('Did not recognize the compiler flag "%s"', currentItem) 363 | self.compileUnaryCallback(currentItem) 364 | 365 | if DUMPING: 366 | self.dump() 367 | 368 | 369 | def skipBitcodeGeneration(self): 370 | retval = (False, "") 371 | if os.environ.get('WLLVM_CONFIGURE_ONLY', False): 372 | retval = (True, "CFG Only") 373 | elif not self.inputFiles: 374 | retval = (True, "No input files") 375 | elif self.isEmitLLVM: 376 | retval = (True, "Emit LLVM") 377 | elif self.isAssembly or self.isAssembleOnly: 378 | retval = (True, "Assembly") 379 | elif self.isPreprocessOnly: 380 | retval = (True, "Preprocess Only") 381 | elif self.isStandardIn: 382 | retval = (True, "Standard In") 383 | elif (self.isDependencyOnly and not self.isCompileOnly): 384 | retval = (True, "Dependency Only") 385 | return retval 386 | 387 | def _shiftArgs(self, nargs): 388 | ret = [] 389 | while nargs > 0: 390 | a = self._inputArgs.popleft() 391 | ret.append(a) 392 | nargs = nargs - 1 393 | return ret 394 | 395 | 396 | def standardInCallback(self, flag): 397 | _logger.debug('standardInCallback: %s', flag) 398 | self.isStandardIn = True 399 | 400 | def abortUnaryCallback(self, flag): 401 | _logger.warning('Out of context experience: "%s" "%s"', str(self.inputList), flag) 402 | sys.exit(1) 403 | 404 | def inputFileCallback(self, infile): 405 | _logger.debug('Input file: %s', infile) 406 | self.inputFiles.append(infile) 407 | if re.search('\\.(s|S)$', infile): 408 | self.isAssembly = True 409 | 410 | def outputFileCallback(self, flag, filename): 411 | _logger.debug('outputFileCallback: %s %s', flag, filename) 412 | self.outputFilename = filename 413 | 414 | def objectFileCallback(self, objfile): 415 | _logger.debug('objectFileCallback: %s', objfile) 416 | self.objectFiles.append(objfile) 417 | 418 | def preprocessOnlyCallback(self, flag): 419 | _logger.debug('preprocessOnlyCallback: %s', flag) 420 | self.isPreprocessOnly = True 421 | 422 | def dependencyOnlyCallback(self, flag): 423 | _logger.debug('dependencyOnlyCallback: %s', flag) 424 | self.isDependencyOnly = True 425 | self.compileArgs.append(flag) 426 | 427 | def assembleOnlyCallback(self, flag): 428 | _logger.debug('assembleOnlyCallback: %s', flag) 429 | self.isAssembleOnly = True 430 | 431 | def verboseFlagCallback(self, flag): 432 | _logger.debug('verboseFlagCallback: %s', flag) 433 | self.isVerbose = True 434 | 435 | def compileOnlyCallback(self, flag): 436 | _logger.debug('compileOnlyCallback: %s', flag) 437 | self.isCompileOnly = True 438 | 439 | def emitLLVMCallback(self, flag): 440 | _logger.debug('emitLLVMCallback: %s', flag) 441 | self.isEmitLLVM = True 442 | self.isCompileOnly = True 443 | 444 | def linkUnaryCallback(self, flag): 445 | _logger.debug('linkUnaryCallback: %s', flag) 446 | self.linkArgs.append(flag) 447 | 448 | def compileUnaryCallback(self, flag): 449 | _logger.debug('compileUnaryCallback: %s', flag) 450 | self.compileArgs.append(flag) 451 | 452 | def compileLinkUnaryCallback(self, flag): 453 | _logger.debug('compileLinkUnaryCallback: %s', flag) 454 | self.compileArgs.append(flag) 455 | self.linkArgs.append(flag) 456 | 457 | def warningLinkUnaryCallback(self, flag): 458 | _logger.debug('warningLinkUnaryCallback: %s', flag) 459 | _logger.warning('The flag "%s" cannot be used with this tool; we are ignoring it', flag) 460 | self.forbiddenArgs.append(flag) 461 | 462 | def defaultBinaryCallback(self, flag, arg): 463 | _logger.warning('Ignoring compiler arg pair: "%s %s"', flag, arg) 464 | 465 | def dependencyBinaryCallback(self, flag, arg): 466 | _logger.debug('dependencyBinaryCallback: %s %s', flag, arg) 467 | self.isDependencyOnly = True 468 | self.compileArgs.append(flag) 469 | self.compileArgs.append(arg) 470 | 471 | def compileBinaryCallback(self, flag, arg): 472 | _logger.debug('compileBinaryCallback: %s %s', flag, arg) 473 | self.compileArgs.append(flag) 474 | self.compileArgs.append(arg) 475 | 476 | def linkBinaryCallback(self, flag, arg): 477 | _logger.debug('linkBinaryCallback: %s %s', flag, arg) 478 | self.linkArgs.append(flag) 479 | self.linkArgs.append(arg) 480 | 481 | def compileLinkBinaryCallback(self, flag, arg): 482 | _logger.debug('compileLinkBinaryCallback: %s %s', flag, arg) 483 | self.compileArgs.append(flag) 484 | self.compileArgs.append(arg) 485 | self.linkArgs.append(flag) 486 | self.linkArgs.append(arg) 487 | 488 | def linkingGroupCallback(self, args): 489 | _logger.debug('linkingGroupCallback: %s', args) 490 | self.linkArgs.extend(args) 491 | 492 | def getOutputFilename(self): 493 | if self.outputFilename is not None: 494 | return self.outputFilename 495 | if self.isCompileOnly: 496 | #iam: -c but no -o, therefore the obj should end up in the cwd. 497 | (_, base) = os.path.split(self.inputFiles[0]) 498 | (root, _) = os.path.splitext(base) 499 | return f'{root}.o' 500 | return 'a.out' 501 | 502 | def getBitcodeFileName(self): 503 | (dirs, baseFile) = os.path.split(self.getOutputFilename()) 504 | bcfilename = os.path.join(dirs, f'.{baseFile}.bc') 505 | return bcfilename 506 | 507 | # iam: returns a pair [objectFilename, bitcodeFilename] i.e .o and .bc. 508 | # the hidden flag determines whether the objectFile is hidden like the 509 | # bitcodeFile is (starts with a '.'), use the logging level & DUMPING flag to get a sense 510 | # of what is being written out. 511 | def getArtifactNames(self, srcFile, hidden=False): 512 | (_, srcbase) = os.path.split(srcFile) 513 | (srcroot, _) = os.path.splitext(srcbase) 514 | if hidden: 515 | objbase = f'.{srcroot}.o' 516 | else: 517 | objbase = f'{srcroot}.o' 518 | bcbase = f'.{srcroot}.o.bc' 519 | return [objbase, bcbase] 520 | 521 | #iam: for printing our partitioning of the args 522 | def dump(self): 523 | efn = sys.stderr.write 524 | efn(f'\ncompileArgs: {self.compileArgs}\ninputFiles: {self.inputFiles}\nlinkArgs: {self.linkArgs}\n') 525 | efn(f'\nobjectFiles: {self.objectFiles}\noutputFilename: {self.outputFilename}\n') 526 | for srcFile in self.inputFiles: 527 | efn(f'\nsrcFile: {srcFile}\n') 528 | (objFile, bcFile) = self.getArtifactNames(srcFile) 529 | efn(f'\n{srcFile} ===> ({objFile}, {bcFile})\n') 530 | efn(f'\nFlags:\nisVerbose = {self.isVerbose}\n') 531 | efn(f'isDependencyOnly = {self.isDependencyOnly}\n') 532 | efn(f'isPreprocessOnly = {self.isPreprocessOnly}\n') 533 | efn(f'isAssembleOnly = {self.isAssembleOnly}\n') 534 | efn(f'isAssembly = {self.isAssembly}\n') 535 | efn(f'isCompileOnly = {self.isCompileOnly}\n') 536 | efn(f'isEmitLLVM = {self.isEmitLLVM}\n') 537 | efn(f'isStandardIn = {self.isStandardIn}\n') 538 | --------------------------------------------------------------------------------