├── ChangeLog
├── LICENSE
├── README.txt
├── doc
    ├── android-notes
    ├── bat-manual.pdf
    ├── bat-manual.tex
    ├── bat_internal_format.txt
    ├── creating-the-database.txt
    ├── database-example-files
    │   ├── DOWNLOADURL
    │   ├── LIST
    │   ├── README
    │   └── SHA256SUM
    ├── defensivepublications
    │   └── security-bat.pdf
    ├── filesystem-observations
    ├── json.txt
    ├── kernelsymbolsspec.txt
    ├── knowledgebase-ideas
    ├── listoftags.txt
    ├── pfif
    │   ├── README
    │   ├── workreport1
    │   ├── workreport2
    │   ├── workreport3
    │   ├── workreport4
    │   └── workreport56
    ├── processing-java
    ├── running-tests.txt
    └── testsuite
    │   ├── README
    │   ├── bat-training.tex
    │   ├── bat-training2.tex
    │   ├── bat-training3.tex
    │   ├── bat-training4.tex
    │   ├── bat-training5.tex
    │   ├── bat-training6.tex
    │   ├── openwrt-configs
    │       ├── 010-fix_mixed_implicit_and_normal_rules_error.patch
    │       ├── README
    │       ├── firmware1-config
    │       ├── firmware2-config
    │       └── firmware3-config
    │   ├── testoutput
    │       └── README
    │   ├── training-notes
    │   ├── training-notes2
    │   ├── training-notes3
    │   ├── training-notes4
    │   ├── training-notes5
    │   └── training-notes6
└── src
    ├── LICENSE
    ├── MANIFEST.in
    ├── TODO
    ├── bat-scan
    ├── bat-scan.config
    ├── bat
        ├── __init__.py
        ├── batxor.py
        ├── bruteforcescan.py
        ├── busybox.py
        ├── busyboxversion.py
        ├── checks.py
        ├── derivekernelconfig.py
        ├── elfcheck.py
        ├── ext2.py
        ├── extractor.py
        ├── file2package.py
        ├── findduplicates.py
        ├── findlibs.py
        ├── fixduplicates.py
        ├── fsmagic.py
        ├── fssearch.py
        ├── fwunpack.py
        ├── generatehexdump.py
        ├── generateimages.py
        ├── generatejson.py
        ├── generatereports.py
        ├── guireport.py
        ├── identifier.py
        ├── images.py
        ├── interfaces.py
        ├── javacheck.py
        ├── jffs2.py
        ├── kernelanalysis.py
        ├── kernelsymbols.py
        ├── licenseversion.py
        ├── piecharts.py
        ├── prerun.py
        ├── prunefiles.py
        ├── renamefiles.py
        ├── reportcopyright.py
        ├── security.py
        └── unpackrpm.py
    ├── batgui
    ├── busybox-compare-configs.py
    ├── busybox-walk.py
    ├── crawlers
        ├── README
        ├── crawling-php
        ├── gnu-config
        └── gnucrawler.py
    ├── debian
        ├── changelog
        ├── compat
        ├── control
        ├── copyright
        ├── files
        ├── pyversions
        └── rules
    ├── extractkernelstrings.py
    ├── knowledgebaseadd.py
    ├── knowledgebaseaddchipset.py
    ├── knowledgebaseinit.py
    ├── maintenance
        ├── bat-sqlitetopostgresql.py
        ├── batextensions.py
        ├── busybox-appletname-extractor.py
        ├── clonedbinit.py
        ├── copybatarchives.py
        ├── createbatarchive.py
        ├── createdb.config
        ├── createdb.py
        ├── createfiledatabasedebian.py
        ├── createfiledatabasefedora.py
        ├── createmanifests.py
        ├── cveparser.py
        ├── dumplist.py
        ├── extractrpms.py
        ├── findclones.py
        ├── findthirdparty.py
        ├── generatelist-fdroid.py
        ├── generatelist.py
        ├── packagerename.py
        ├── postgresql-index.sql
        ├── postgresql-table-drop.sql
        ├── postgresql-table.sql
        ├── rewritelist.py
        ├── scorecache.py
        ├── storeresults.py
        ├── updatesha256sum.py
        ├── verifyarchive.py
        ├── verifydb.py
        └── verifylist.py
    ├── patches
        ├── README
        ├── code2html-0.9.1-add-csharp.patch
        ├── code2html-0.9.1-add-groovyscala.patch
        ├── code2html-0.9.1-add-qml.patch
        └── cramfs.patch
    ├── scripts
        ├── comparebinaries.py
        ├── extractcomments.py
        ├── findxor.py
        ├── licensecompare.py
        ├── sourcewalk.py
        └── verifysourcearchive.py
    ├── setup.cfg
    └── setup.py


/README.txt:
--------------------------------------------------------------------------------
1 | The Binary Analysis Tool (BAT) is a modular framework to analyse binary files.
2 | 
3 | This project is no longer actively maintained. There are a few forks that might suit your needs.
4 | 
5 | If you still want to use it, don't forget to also install "bat-extratools":
6 | 
7 | https://github.com/armijnhemel/bat-extratools/
8 | 


--------------------------------------------------------------------------------
/doc/android-notes:
--------------------------------------------------------------------------------
 1 | Unpacking Android things
 2 | 
 3 |   File systems in use:
 4 | 
 5 | * yaffs2 (use unyaffs for this, although this will not always work)
 6 | * ubifs (seen one example so far that had broken images)
 7 | * ext4 (soon, according to post that Ted Ts'o once did)
 8 | * possibly other file systems too
 9 | 
10 | All kinds of meta info in boot.img files:
11 | 
12 | http://android.git.kernel.org/?p=platform/bootloader/legacy.git;a=blob;f=include/boot/bootimg.h;h=44fde9277d65c82eecb8ffeaab7b078e61c6ff3f;hb=HEAD
13 | 
14 |   Location of standard license texts
15 | 
16 | After unpacking firmware often a copy of the default Google terms of service can be found in /etc/NOTICE.html.gz. Sometimes additional files with extra license texts can be found.
17 | 
18 | 
19 |   APK
20 | 
21 | Applications are in APK format:
22 | 
23 | http://en.wikipedia.org/wiki/APK_%28file_format%29
24 | 
25 | 
26 |   Android resource files and XML files
27 | 
28 | AXMLPrinter2: http://code.google.com/p/android4me/downloads/list
29 | http://code.google.com/p/android-apktool/
30 | 


--------------------------------------------------------------------------------
/doc/bat-manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/armijnhemel/binaryanalysis/ea97b6b7617128ccf7cfa19244b91675d9bf66df/doc/bat-manual.pdf


--------------------------------------------------------------------------------
/doc/database-example-files/DOWNLOADURL:
--------------------------------------------------------------------------------
 1 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.10.tar.bz2	https://www.kernel.org/
 2 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.11.tar.bz2	https://www.kernel.org/
 3 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.12.tar.bz2	https://www.kernel.org/
 4 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.13.tar.bz2	https://www.kernel.org/
 5 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.14.tar.bz2	https://www.kernel.org/
 6 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.15.tar.bz2	https://www.kernel.org/
 7 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.16.tar.bz2	https://www.kernel.org/
 8 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.17.tar.bz2	https://www.kernel.org/
 9 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.18.tar.bz2	https://www.kernel.org/
10 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.19.tar.bz2	https://www.kernel.org/
11 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.1.tar.bz2	https://www.kernel.org/
12 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.20.tar.bz2	https://www.kernel.org/
13 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.21.tar.bz2	https://www.kernel.org/
14 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.22.tar.bz2	https://www.kernel.org/
15 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.23.tar.bz2	https://www.kernel.org/
16 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.24.tar.bz2	https://www.kernel.org/
17 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.25.tar.bz2	https://www.kernel.org/
18 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.26.tar.bz2	https://www.kernel.org/
19 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.27.tar.bz2	https://www.kernel.org/
20 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.28.tar.bz2	https://www.kernel.org/
21 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.29.tar.bz2	https://www.kernel.org/
22 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.2.tar.bz2	https://www.kernel.org/
23 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.30.tar.bz2	https://www.kernel.org/
24 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.31.tar.bz2	https://www.kernel.org/
25 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.32.tar.bz2	https://www.kernel.org/
26 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.33.tar.bz2	https://www.kernel.org/
27 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.34.tar.bz2	https://www.kernel.org/
28 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.35.tar.bz2	https://www.kernel.org/
29 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.36.tar.bz2	https://www.kernel.org/
30 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.37.tar.bz2	https://www.kernel.org/
31 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.38.tar.bz2	https://www.kernel.org/
32 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.39.tar.bz2	https://www.kernel.org/
33 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.3.tar.bz2	https://www.kernel.org/
34 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.40.tar.bz2	https://www.kernel.org/
35 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.4.tar.bz2	https://www.kernel.org/
36 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.5.tar.bz2	https://www.kernel.org/
37 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.6.tar.bz2	https://www.kernel.org/
38 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.7.tar.bz2	https://www.kernel.org/
39 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.8.tar.bz2	https://www.kernel.org/
40 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.9.tar.bz2	https://www.kernel.org/
41 | ftp://ftp.kernel.org/pub/linux/kernel/v2.0/linux-2.0.tar.bz2	https://www.kernel.org/
42 | 


--------------------------------------------------------------------------------
/doc/database-example-files/LIST:
--------------------------------------------------------------------------------
 1 | linux	2.0.10	linux-2.0.10.tar.bz2	kernel
 2 | linux	2.0.11	linux-2.0.11.tar.bz2	kernel
 3 | linux	2.0.12	linux-2.0.12.tar.bz2	kernel
 4 | linux	2.0.13	linux-2.0.13.tar.bz2	kernel
 5 | linux	2.0.14	linux-2.0.14.tar.bz2	kernel
 6 | linux	2.0.15	linux-2.0.15.tar.bz2	kernel
 7 | linux	2.0.16	linux-2.0.16.tar.bz2	kernel
 8 | linux	2.0.17	linux-2.0.17.tar.bz2	kernel
 9 | linux	2.0.18	linux-2.0.18.tar.bz2	kernel
10 | linux	2.0.19	linux-2.0.19.tar.bz2	kernel
11 | linux	2.0.1	linux-2.0.1.tar.bz2	kernel
12 | linux	2.0.20	linux-2.0.20.tar.bz2	kernel
13 | linux	2.0.21	linux-2.0.21.tar.bz2	kernel
14 | linux	2.0.22	linux-2.0.22.tar.bz2	kernel
15 | linux	2.0.23	linux-2.0.23.tar.bz2	kernel
16 | linux	2.0.24	linux-2.0.24.tar.bz2	kernel
17 | linux	2.0.25	linux-2.0.25.tar.bz2	kernel
18 | linux	2.0.26	linux-2.0.26.tar.bz2	kernel
19 | linux	2.0.27	linux-2.0.27.tar.bz2	kernel
20 | linux	2.0.28	linux-2.0.28.tar.bz2	kernel
21 | linux	2.0.29	linux-2.0.29.tar.bz2	kernel
22 | linux	2.0.2	linux-2.0.2.tar.bz2	kernel
23 | linux	2.0.30	linux-2.0.30.tar.bz2	kernel
24 | linux	2.0.31	linux-2.0.31.tar.bz2	kernel
25 | linux	2.0.32	linux-2.0.32.tar.bz2	kernel
26 | linux	2.0.33	linux-2.0.33.tar.bz2	kernel
27 | linux	2.0.34	linux-2.0.34.tar.bz2	kernel
28 | linux	2.0.35	linux-2.0.35.tar.bz2	kernel
29 | linux	2.0.36	linux-2.0.36.tar.bz2	kernel
30 | linux	2.0.37	linux-2.0.37.tar.bz2	kernel
31 | linux	2.0.38	linux-2.0.38.tar.bz2	kernel
32 | linux	2.0.39	linux-2.0.39.tar.bz2	kernel
33 | linux	2.0.3	linux-2.0.3.tar.bz2	kernel
34 | linux	2.0.40	linux-2.0.40.tar.bz2	kernel
35 | linux	2.0.4	linux-2.0.4.tar.bz2	kernel
36 | linux	2.0.5	linux-2.0.5.tar.bz2	kernel
37 | linux	2.0.6	linux-2.0.6.tar.bz2	kernel
38 | linux	2.0.7	linux-2.0.7.tar.bz2	kernel
39 | linux	2.0.8	linux-2.0.8.tar.bz2	kernel
40 | linux	2.0.9	linux-2.0.9.tar.bz2	kernel
41 | linux	2.0	linux-2.0.tar.bz2	kernel
42 | 


--------------------------------------------------------------------------------
/doc/database-example-files/README:
--------------------------------------------------------------------------------
1 | In this directory there are a few examples of files used during the database
2 | creation process. In this case the files are for the Linux kernel. These files
3 | are just here as an example to explain the structure.
4 | 


--------------------------------------------------------------------------------
/doc/defensivepublications/security-bat.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/armijnhemel/binaryanalysis/ea97b6b7617128ccf7cfa19244b91675d9bf66df/doc/defensivepublications/security-bat.pdf


--------------------------------------------------------------------------------
/doc/filesystem-observations:
--------------------------------------------------------------------------------
 1 | Reading contents from file systems and compressed files
 2 | 
 3 | Apart from the kernel a device has one or more file systems. The contents of these file systems can contain all kinds of files: normal files (including more file systems), directories, device nodes, etc.
 4 | 
 5 | http://sourceforge.net/apps/mediawiki/fuse/index.php?title=CompressedFileSystems
 6 | 
 7 |    Nullsoft installer
 8 | 
 9 | http://nsis.svn.sourceforge.net/viewvc/nsis/NSIS/trunk/Source/exehead/fileform.h?revision=6101&view=markup
10 | 


--------------------------------------------------------------------------------
/doc/kernelsymbolsspec.txt:
--------------------------------------------------------------------------------
 1 | Visualising symbol relationships in the Linux kernel
 2 | 
 3 | The Linux kernel supports dynamic module loading. Kernel modules sometimes need to invoke functions that live in another piece of kernel code. Like dynamically linking user space programs during compilation time a list of symbols that are needed by the module at runtime, as well as a list of symbols that are defined/exported, are recorded. During runtime these symbols are resolved.
 4 | 
 5 | The Linux kernel developers have marked certain symbols as "for use by GPL licensed code only" to indicate that modules that use these symbols are expected to be GPLv2 (or compatible) licensed. Linux kernel modules that have a license that is not compatible with the GPLv2 license should not use these modules. Failure to comply can result in legal action by copyright holders, who wish to uphold the license requirements.
 6 | 
 7 | The visualisation code presented here tries to visualise dependencies between symbols used and the license. It does so by extracting symbols from the Linux kernel modules and kernel images, resolving symbols, querying a database with symbols extracted from Linux kernel source code and mapping the value of the exported symbol with the declared license of the module. This way we can catch if a module that is declared proprietary is actually using a GPL symbol.
 8 | 
 9 | EXPORT_SYMBOL and EXPORT_SYMBOL_GPL
10 | 
11 | The mechanism used to export normal kernel symbols is EXPORT_SYMBOL(). The GPL only kernel symbols are exported using EXPORT_SYMBOL_GPL(). If the license of a module is not GPL compatible it will not be able to call GPL only symbols. Sometimes vendors change EXPORT_SYMBOL_GPL() to EXPORT_SYMBOL() to work around these restrictions. The visualisation code can help catch these cases.
12 | 
13 | Steps for visualising
14 | 
15 | 1. extract symbols from Linux kernel modules
16 | 
17 | The symbols in Linux kernel modules can be found in the symbol table of the ELF file. Using the command:
18 | 
19 | $ readelf -W --syms
20 | 
21 | this information can be extracted and processed.
22 | 
23 | 2. extract symbols from the main Linux kernel image.
24 | 
25 | If the Linux kernel is an ELF image then symbols can be obtained in a similar way as for Linux kernel modules. If the Linux kernel image is not an ELF image then some extra work has to be done. By looking for a known symbol that can be found in all kernel images (such as "loops_per_jiffy") and searching around it (kernel symbols are separated by NUL characters) a list of symbols can be obtained.
26 | 
27 | 3. extract version information from each module and each kernel image. During runtime typically the modules and main kernel image need to have the same version. Although sometimes modules with different versions are "forced" to load into the running kernel this is rare.
28 | 
29 | 4. find out for each module where each needed symbol is defined. Some filtering is done based on the versions extracted from step 3. It might turn out that some symbols are not defined anywhere, which is an error that should be reported.
30 | 
31 | 5. for each symbol query the database to see what its type is. The result can be one of three things: normal kernel symbol, gpl only kernel symbol, or unknown. The unknown symbols indicate either out of tree kernel code or an omission in the database. The version extracted in step 3 is used because symbols can change over time (usually from normal kernel symbol to gpl only kernel symbol).
32 | 
33 | 6. extract the declared license from each module using
34 | 
35 | $ modinfo -l /path/to/kernelmodule
36 | 
37 | 7. Create a graph, checking the type for each symbol and seeing if there is a mismatch between GPL symbols that are needed and a declared license that is not GPL compatible.
38 | 
39 | 
40 | Installing the kernel visualisation code in BAT
41 | 
42 | The code should be copied into the directory with other BAT modules with the right ownership and permissions (rest as the other files). This can either be done manually, or by rebuilding the BAT binary package (see BAT manual for instructions).
43 | 
44 | The following section should be added to the BAT configuration file:
45 | 
46 | [findsymbols]
47 | type        = aggregate
48 | module      = bat.kernelsymbols
49 | method      = findsymbols
50 | envvars     = BAT_DB=/gpl/master/master.sqlite3:KERNELSYMBOL_SVG=1:KERNELSYMBOL_DEPENDENCIES=1
51 | noscan      = text:xml:graphics:pdf:audio:video:mp4
52 | enabled     = yes
53 | storetarget = images
54 | storedir    = /tmp/images
55 | storetype   = -graph.png:-graph.svg
56 | cleanup     = yes
57 | priority    = 5
58 | 
59 | 
60 | GraphViz should be installed as a dependency. Since there are buggy versions of GraphViz in older versions of Debian and Ubuntu either a recent version of Fedora (20 being the latest at time of writing this documentation), or Ubuntu (14.04 LTS being the latest at time of writing this documentation) should be used.
61 | 


--------------------------------------------------------------------------------
/doc/knowledgebase-ideas:
--------------------------------------------------------------------------------
 1 | Ideas for the knowledgebase
 2 | ===========================
 3 | 
 4 | This file describes some ideas regarding the knowledgebase (milestone 4). It is based on the flow of how a firmware might pass through a scanning system.
 5 | 
 6 |   Firmware layout
 7 | 
 8 | A firmware can consist of file systems (compressed/uncompressed), bootloaders, kernels (compressed/uncompressed), graphic files (compressed/uncompressed) and so on.
 9 | 
10 | A file system can be nested inside other file systems, or appended to a kernel image, or prepended in front of a kernel image. In short: we can have nesting.
11 | 
12 | |---bootloader
13 | |---kernel
14 | |     \-------file system
15 | |---file system
16 | 
17 | The parts that can be found in a firmware are independent of each other and can all be extracted and analyzed separately. That means that it is fairly easy to separate information in a database.
18 | 
19 | blob        -- unique number, index
20 | checksum    -- sha256, after unpacking
21 | type        -- type of the blob kernel, type of file system, combined, picture, etc.
22 | compression -- type of compression, if any
23 | offset      -- offset in the parent blob (represented as integers)
24 | parent      -- parent blob, or 0 if it is top level
25 | firmware    -- firmware it is part of (foreign key), although this actually only relevant for the top level firmware
26 | 
27 | +------+----------+----------+-------------+--------+--------+----------+
28 | | blob | sha256   |  type    | compression | offset | parent | firmware |
29 | +------+----------+----------+-------------+--------+--------+----------+
30 | |  500 | 99999999 | firmware | none        |      0 |    0   |          |
31 | +------+----------+----------+-------------+--------+--------+----------+
32 | |    2 | abcdefgh | kernel   | gzip        |     64 |  500   |          |
33 | +------+----------+----------+-------------+--------+--------+----------+
34 | |    3 | fgbfsfff | ext2     | gzip        |   8192 |  500   |          |
35 | +------+----------+----------+-------------+--------+--------+----------+
36 | 
37 | This would describe a firmware, with a kernel blob (gzip compressed) at hex offset 0x40, followed by an ext2 file system with gzip compression at hex offset 0x2000.
38 | 
39 | Every top level firmware could be identified by:
40 | 
41 | id           -- unique number
42 | checksum     -- sha256sum
43 | version      -- version number, name, if applicable
44 | product      -- id (foreign key)
45 | scandate     -- date a device was scanned
46 | scantype     -- automatic/by hand (does this make sense? should this be included?)
47 | verified     -- (not sure if I would include this information or what it would mean. Verified by hand?)
48 | public       -- whether or not a scan report of this
49 | sources      -- boolean (are there sources for this device)
50 | compliant    -- boolean (combine with 'sources'? What if it was fixed eventually? what if we could not scan the firmware)
51 | comments     -- include a full report here? Is it necessary to make this searchable? It is kinda unstructured data.
52 | 
53 | Every device could be identified by:
54 | 
55 | id               -- unique number for a device (corresponds to "product" in the previous table)
56 | vendor           -- vendor name (NETGEAR, ASUS, Cisco, Linksys, etc. etc.)
57 | name             -- name of the device (WRT54G, etc.)
58 | type             -- subtype of the device (v5, 001, whatever is used)
59 | chipset          -- Texas Instruments AR7, Broadcom BCM6851 (with a join we can reduce this to ARM, MIPS, etc.)
60 | upstream vendor  -- useful information, which is typically not something that should be made publicly available
61 | 
62 | An abstraction for the chipset:
63 | 
64 | name             -- name of the chipset
65 | vendor           -- name of the vendor
66 | chipset family   -- generic chip family (MIPS, ARM, etc.)
67 | 
68 | for example:
69 | 
70 | +---------+-------------------+-------+
71 | | name    | vendor            | chip  |
72 | +---------+-------------------+-------+
73 | | AR7     | Texas Instruments | MIPS  |
74 | +---------+-------------------+-------+
75 | | BCM6851 | Broadcom          | MIPS  |
76 | +---------+-------------------+-------+
77 | 


--------------------------------------------------------------------------------
/doc/listoftags.txt:
--------------------------------------------------------------------------------
 1 | This is a list that maps tags to files. Each file could have more than one tag.
 2 | 
 3 | empty :: empty files
 4 | symlink :: symbolic link
 5 | temporary :: temporary file used internally by BAT (should never be exposed to the outside world)
 6 | 
 7 | 
 8 | The first basic distinction is between 'text' and 'binary':
 9 | 
10 | text :: files that only contain ASCII characters.
11 | binary :: files that contain other characters than ASCII characters (possibly also including ASCII characters)
12 | 
13 | 
14 | aiff :: AIFF/AIFF-C files
15 | androidresource :: resource file (Android specific)
16 | androidxml :: Android 'binary' XML
17 | appledouble :: Apple Double files (resource forks, etc.)
18 | audio :: audio files (generic for all audio files that are tagged)
19 | bflt :: bFLT files (uClinux)
20 | certificate :: certificate files (generic for certiicates)
21 | compressed :: various compressed files (UPX, 7z, lzip, lzop, xz, gzip, compress, bzip2, lrzip, zip, lzma)
22 | cursor :: MS Windows cursor file (like ICO)
23 | dalvik :: Android dalvik file (gemeric, both dex and odex)
24 | dex :: Android Dex (old Android)
25 | font :: font data (woff, otf, ttf)
26 | graphics :: graphics files (generic for all graphics: WebP, BMP, GIF, JPEG, PNG)
27 | ico :: MS Windows ICO file
28 | ihex :: Intel iHEX file
29 | messagecatalog :: GNU message catalogue
30 | mp4 :: MPEG 4
31 | odex :: Android ODEX (optimized DEX)
32 | otf :: OpenType fonts
33 | pak :: Chrome PAK files
34 | resource :: resource files (generic for all resource files: Android resources, fonts, ICS, Chrome PAK, GNU message catalog, ICO/cursor, timezone files, Apple resource forks, certificates, terminfo)
35 | resourcefork :: Apple resource fork
36 | riff :: RIFF container (WebP, WAV)
37 | rsa ::
38 | serializedjava :: serialized Java
39 | sqlite3 :: Sqlite3 database file
40 | terminfo :: terminfo file
41 | timezone :: time zone file
42 | ttf :: True Type fonts
43 | upx :: UPX files
44 | vimswap :: Vim swap file
45 | wav :: WAV file
46 | webp :: WebP graphics file
47 | woff :: WOFF font files
48 | xml :: XML file
49 | 


--------------------------------------------------------------------------------
/doc/pfif/README:
--------------------------------------------------------------------------------
1 | This directory contains reports for development work done on BAT in 2012 with a grant coming through PFIF (Protocol Freedom Information Foundation). These are included here for the sake of transparancy.
2 | 


--------------------------------------------------------------------------------
/doc/pfif/workreport1:
--------------------------------------------------------------------------------
 1 | Work Report milestone 1 ("Official release of BAT")
 2 | 
 3 | The milestone "Official release of BAT" was performed between October 16 2011 and January 30 2012. In this period the following was done:
 4 | 
 5 | * revision 5 of BAT was tagged (October 18)
 6 | * configurations for building binary packages of bat-extratools for Fedora 14/15 and Ubuntu 10.10 and Debian 6 were added (October 16-18)
 7 | * binary packages of bat-extratools for Fedora 14/15, Ubuntu 10.10 and Debian 6 were made and released (October 18-20)
 8 | * configurations for building binary packages of BAT for Fedora 14/15, Ubuntu 10.10 and Debian 6 were added or updated (October 17-18)
 9 | * binary packages of BAT for Fedora 14/15, Ubuntu 10.10 and Debian 6 were made and released (October 18-20)
10 | * a much improved user manual was written and released on the binaryanalysis.org website (November 22)
11 | 
12 | To fix some packaging mistakes the following was done:
13 | 
14 | * revision 6 of BAT was tagged (January 30 2012)
15 | * configurations for building binary packages of bat-extratools-java for Fedora 14/15 and Ubuntu 10.10 and Debian 6 were added (January 13 - 14 2012)
16 | * binary packages of BAT, bat-extratools and bat-extratools-java for Fedora 14/15, Ubuntu 10.10 and Debian 6 were made and released (January 30 2012)
17 | * a updated user manual was released on the binaryanalysis.org website (January 30 2012)
18 | 
19 | Additionally many bugs were fixed (massive speed ups, code clean ups) and new functionality was added (November 25 2011 - January 30 2012).
20 | 


--------------------------------------------------------------------------------
/doc/pfif/workreport2:
--------------------------------------------------------------------------------
 1 | Work Report milestone 2 ("Processing Java class files for better extracting of strings for the ranking module")
 2 | 
 3 | 
 4 | The milestone "Processing Java class files for better extracting of strings for the ranking module" was performed between October 19 and November 23. In this period the following was done:
 5 | 
 6 | * the data model from the ranking database was changed to take programming languages into account by adding a new field "language"
 7 | * a public domain third party tool to process files for Android's Dalvik ("dedexer") was added to the bat-extratools collection
 8 | * wrapper code was written to process output from jcf-dump (for regular Java class files) and dedexer (for Dalvik files)
 9 | * a database consisting of code from the Apache project was generated and made available for download to PFIF
10 | 


--------------------------------------------------------------------------------
/doc/pfif/workreport3:
--------------------------------------------------------------------------------
 1 | Work Report milestone 5 ("Semi-interactive UI")
 2 | 
 3 | The milestone "Semi-interactive UI" was performed between March 8 2012 and May 18 2012. In this period the following was done:
 4 | 
 5 | * split bruteforce method into a frontend and a backend, to allow for different frontends
 6 | * add methods to create pictures of results from ranking method and other methods
 7 | * write results of bruteforce method, including program state and generated files, to an archive
 8 | * write a graphical user interface that allows viewing generated archives
 9 | * added possiblilty to interactively launch scans from interface and save results to a file
10 | * rework code for configuration, to make it easier to enable/disable scans from the graphical user interface
11 | * code for tagging files was expanded, plus display filters for tags were added to the graphical user interface
12 | 


--------------------------------------------------------------------------------
/doc/pfif/workreport4:
--------------------------------------------------------------------------------
 1 | Work Report milestone 4 ("adding support for Minix file system")
 2 | 
 3 | The milestone "adding support for Minix file system" was completed on May 21
 4 | 2012. The work on this milestone was done between May 1 2012 and May 21 2012.
 5 | In this period the following work was done:
 6 | 
 7 | * made program to extract Minix v1 file systems as used on many Linux based IP
 8 | cameras
 9 | * tested with many IP camera firmwares containing a Minix file system and
10 | manually verified contents were correct.
11 | * added code to use the program inside the Binary Analysis Tool
12 | 


--------------------------------------------------------------------------------
/doc/pfif/workreport56:
--------------------------------------------------------------------------------
 1 | Work Report milestone 3 ("standardized test set/training materials based on OpenWrt")
 2 | 
 3 | The milestone "standardized test set/training materials based on OpenWrt" was
 4 | completed on November 28 2012, when all materials were updated to reflect the
 5 | status of the upcoming BAT 10.0 release (before the end of 2012).
 6 | 
 7 | Work was done between November 27 2011 and November 28 2012. A DVD with the
 8 | software used in the tests was sent by mail in February 2012. The training
 9 | materials were released on November 30 2012:
10 | 
11 | http://www.binaryanalysis.org/en/content/show/documentation
12 | 
13 | Work Report milestone 6 ("Release incorporating milestones 2 - 5")
14 | 
15 | The milestone "release incorporating milestones 2 - 5" was completed on May
16 | 22 2012, when a release containing both milestones 4 and 5 was made. Since then
17 | a new release with several bug fixes was made on October 4 and there will be
18 | another release with new features before the end of 2012.
19 | 
20 | The training materials and configuration to rebuild the test materials used in
21 | this training were released separately on November 30 2012:
22 | 
23 | http://www.binaryanalysis.org/en/content/show/documentation
24 | 


--------------------------------------------------------------------------------
/doc/processing-java:
--------------------------------------------------------------------------------
 1 | Processing Java files in the ranking module
 2 | 
 3 |   Separating scanning for programming languages
 4 | 
 5 | We have chosen to treat Java and C executables in a different way. There are good reasons for this:
 6 | 
 7 | * strings that are very common in C programs might be very significant for Java programs or vice versa. If all strings would be used for all scans a string found in lots of C programs, but only one Java program, it would be considered irrelevant, even though it is very significant.
 8 | * although embedding Java in C programs and vice versa does happen it is not the most common situation
 9 | 
10 | The database in the ranking module has a separate field 'language' where for each string that has been extracted the language of the file is recorded. The language is determined by looking at the extension of the file and using a special lookup table that maps extensions to a programming language.
11 | 
12 |   Processing binaries and narrowing results
13 | 
14 | Java binaries contain quite a bit of data that is not useful for our string based search, such as datatypes, etcetera. Also, when running the command 'strings' on Java class files sometimes some additional whitespace (like a tab) is printed in front of the stringdata we want to use, because the Java compiler has inserted that at one point.
15 | 
16 | It is possible to get just string constants out of Java binaries (both .class files and Android's DEX files) and discard all other information. For Java class files this can be done using jcf-dump (part of gcc) and processing the output. For DEX files this can be done by running Dedexer and processing its output.
17 | 
18 |   Granularity of scans
19 | 
20 | Granularity could possibly be an issue when scanning Java class files. Executables that are generated when compiling C programs (like ELF executables or libraries) usually contain many more strings than
21 | Java class files, which are conceptually perhaps closer to object files than to executables. So the amount of strings extracted from a Java class file compared to a 'normal' executable is significantly lower. Whether or not this will affect the result is currently unknown.
22 | 
23 | Since Dalvik bytecode is always in one archive it is not a problem there.
24 | 


--------------------------------------------------------------------------------
/doc/running-tests.txt:
--------------------------------------------------------------------------------
 1 | Notes for running tests
 2 | 
 3 | (these are some personal notes for testing)
 4 | 
 5 | Smoke test for unpacking
 6 | ------------------------
 7 | 
 8 | To run a smoke test for unpacking with a large collection of firmwares it is
 9 | important to do the following:
10 | 
11 | * set 'cleanup' to yes so the disk doesn't fill up with unpacking directories
12 | * set 'writeoutputfile' to 'no' so the disk doesn't fill up with result files
13 | * disable ranking so the database is not hit (taking a long time)
14 | 


--------------------------------------------------------------------------------
/doc/testsuite/README:
--------------------------------------------------------------------------------
1 | This directory will in the future contain:
2 | 
3 | * configuration information to build a test suite based on OpenWrt
4 | * documentation, standardized testing scenarios and training materials
5 | 


--------------------------------------------------------------------------------
/doc/testsuite/bat-training4.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[11pt]{beamer}
  2 | 
  3 | \usepackage{url}
  4 | \usepackage{tikz}
  5 | %\author{Armijn Hemel}
  6 | \title{Using the Binary Analysis Tool - part 4}
  7 | \date{}
  8 | 
  9 | \begin{document}
 10 | 
 11 | \setlength{\parskip}{4pt}
 12 | 
 13 | \frame{\titlepage}
 14 | 
 15 | \frame{
 16 | \frametitle{Subjects}
 17 | In this course you will learn:
 18 | 
 19 | \begin{itemize}
 20 | \item to browse results of a scan made with the Binary Analysis Tool
 21 | \end{itemize}
 22 | }
 23 | 
 24 | \frame{
 25 | \frametitle{Starting the Binary Analysis Tool result viewer}
 26 | The Binary Analysis Tool result viewer is a Python program using wxPython. It can be invoked using the command:
 27 | 
 28 | \texttt{batgui}
 29 | 
 30 | which will launch the GUI.
 31 | }
 32 | 
 33 | \frame{
 34 | \frametitle{Alternative viewer}
 35 | An alternative viewer using Qt can be found at:
 36 | 
 37 | \url{https://github.com/monkeyiq/batgui2}
 38 | 
 39 | The rest of this training will be using the original \texttt{batgui}.
 40 | }
 41 | 
 42 | \frame{
 43 | \frametitle{Loading a file in the BAT result viewer}
 44 | 
 45 | Via File $\rightarrow$ Open in the menu a result file can be loaded and displayed.
 46 | 
 47 | On the left there will be a file tree, on the right results for individual files will be displayed.
 48 | }
 49 | 
 50 | \frame{
 51 | \frametitle{Filtering results in the BAT result viewer}
 52 | Not every file type might be interesting. To unclutter the user interface and the directory tree a display filter is present that will hide certain file types from the directory tree.
 53 | 
 54 | Configuration $\rightarrow$ Filter Configuration will show a list of checkboxes of file types to ignore.
 55 | }
 56 | 
 57 | \frame{
 58 | \frametitle{Interpreting results of a scan}
 59 | For each file a few attributes will be shown by default:
 60 | 
 61 | \begin{itemize}
 62 | \item name of the binary
 63 | \item absolute file path
 64 | \item relative file path if it is nested and parent is an unpacked compressed file or file system
 65 | \item size
 66 | \item SHA256 checksum
 67 | \item tags
 68 | \end{itemize}
 69 | 
 70 | In addition results of file specific scans might be shown (architecture, shared libraries, etcetera)
 71 | }
 72 | 
 73 | \frame{
 74 | \frametitle{Interpreting results of advanced ranking scan}
 75 | If the advanced ranking scan is enabled a lot more information becomes available:
 76 | 
 77 | \begin{itemize}
 78 | \item function names matching
 79 | \item string constants matching
 80 | \item version number guess
 81 | \item possible licenses guess
 82 | \end{itemize}
 83 | 
 84 | This information should be carefully analysed and not blindly trusted.
 85 | }
 86 | 
 87 | \frame{
 88 | \frametitle{Interpreting results: function names}
 89 | For dynamically linked ELF executables unique function names (if matched) will be displayed.
 90 | 
 91 | Many unique function names is a clear indicator of software reuse.
 92 | }
 93 | 
 94 | \frame{
 95 | \frametitle{Interpreting results: string constants (1)}
 96 | For a good classification the following things are important:
 97 | 
 98 | \begin{itemize}
 99 | \item amount of matched string constants
100 | \item distribution of matched string constants
101 | \end{itemize}
102 | 
103 | If there are only few strings that can be matched, the results are likely to be not very reliable.
104 | 
105 | An even distribution of scores, combined with few matched unique strings and non-unique strings means that nothing was reliably matched.
106 | }
107 | 
108 | \frame{
109 | \frametitle{Interpreting results: string constants (2)}
110 | The advanced ranking scan will create two pie charts. The first pie chart details how the algorithm classfied the strings (unique matches, assigned matches, unmatched, and so on), the second pie chart depicts the score for each packages.
111 | 
112 | The first pie charts determines the fidelity of the second pie chart: if many strings (dozens, hundreds) could be matched and assigned to a package (either a unique or non-unique match), then the second pie chart will have a high fidelity. If just a handful strings could be matched, the second pie chart has a low fidelity.
113 | }
114 | 
115 | \frame{
116 | \frametitle{Interpreting results: version numbers for unique strings}
117 | Based on unique strings BAT tries to determine version numbers of matched packages.
118 | 
119 | Because version number guessing is tied to unique strings version number guessing is not reliable if there are just a few unique strings.
120 | }
121 | 
122 | \frame{
123 | \frametitle{Interpreting results: license guess}
124 | Based on unique strings BAT tries to determine possible used licenses for matched packages.
125 | 
126 | License guessing is likely to be unreliable if there are just a few unique strings. Versions are not taken into account (yet) when determining the license: all possible licenses are reported, also if the software is relicensed in some version.
127 | }
128 | 
129 | \frame{
130 | \frametitle{Conclusion}
131 | In this course you have learned about:
132 | 
133 | \begin{itemize}
134 | \item to browse results of a scan made with the Binary Analysis Tool
135 | \end{itemize}
136 | 
137 | In the next course we will dig into how the Binary Analysis Tool can be extended.
138 | }
139 | \end{document}
140 | 


--------------------------------------------------------------------------------
/doc/testsuite/bat-training6.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[11pt]{beamer}
  2 | 
  3 | \usepackage{url}
  4 | \usepackage{tikz}
  5 | %\author{Armijn Hemel}
  6 | \title{Using the Binary Analysis Tool - part 6}
  7 | \date{}
  8 | 
  9 | \begin{document}
 10 | 
 11 | \setlength{\parskip}{4pt}
 12 | 
 13 | \frame{\titlepage}
 14 | 
 15 | \frame{
 16 | \frametitle{Subjects}
 17 | In this course you will learn:
 18 | 
 19 | \begin{itemize}
 20 | \item to generate a database for BAT ranking
 21 | \item to configure BAT to use the ranking database
 22 | \end{itemize}
 23 | }
 24 | 
 25 | \frame{
 26 | \frametitle{Collecting a dataset}
 27 | Before you can generate the database you need a dataset. A good dataset can be built from downloads from for example:
 28 | 
 29 | \begin{itemize}
 30 | \item upstream projects
 31 | \item distributions
 32 | \end{itemize}
 33 | 
 34 | The database works best if there is a wide range of software in the database. If there is too little software in the database there will be mismatches, possible falsely detecting software.
 35 | }
 36 | 
 37 | \frame{
 38 | \frametitle{Generating a file list for database extraction}
 39 | The database generating script that is used expects a file listing all files that should be processed. There is a helper script called \texttt{generatelist.py} that helps generating this list. It can be found in the source repository of BAT in the directory \texttt{maintenance}.
 40 | 
 41 | \texttt{python generatelist.py -f /path/to/dir/with/files -o origin | sort > /path/to/dir/with/files/LIST}
 42 | 
 43 | The parameter \texttt{-o} allows you to set an origin of where the source was downloaded, for example \texttt{debian} or \texttt{gnome}. If not set, it will be set to \texttt{unknown}.
 44 | }
 45 | 
 46 | \frame{
 47 | \frametitle{Generating the database}
 48 | The script to generate the database is called \texttt{createdb.py}. It can be found in the source repository of BAT in the directory \texttt{maintenance}. It can extract:
 49 | 
 50 | \begin{itemize}
 51 | \item string constants (\texttt{xgettext}) and function names (\texttt{ctags})
 52 | \item license information (using Ninka and FOSSology)
 53 | \item copyright information (using FOSSology)
 54 | \item configuration from Linux kernel Makefiles
 55 | \end{itemize}
 56 | 
 57 | It can be invoked as follows:
 58 | 
 59 | \texttt{python createdb.py -c /path/to/configurationfile -d /path/to/database -f /path/to/dir/with/files}
 60 | }
 61 | 
 62 | \frame{
 63 | \frametitle{Installing Ninka}
 64 | The Ninka scanner can be used to extract licensing information from source code files. It can be downloaded from:
 65 | 
 66 | \url{https://github.com/dmgerman/ninka/}
 67 | 
 68 | As of the time of writing the latest version is \texttt{1.1}. This version number is hardcoded a few times in \texttt{createdb.py} and should be changed if the version of Ninka changes.
 69 | 
 70 | Ninka can be installed as follows:
 71 | 
 72 | \begin{enumerate}
 73 | \item unpack in \texttt{/tmp}
 74 | \item \texttt{cd /tmp/ninka-1.1/}
 75 | \item \texttt{cd comments}
 76 | \item \texttt{make clean; make}
 77 | \end{enumerate}
 78 | }
 79 | 
 80 | \frame{
 81 | \frametitle{Installing FOSSology}
 82 | There are binary packages available for most distributions. Most major distributions already have support. Alternatively, packages can be downloaded from:
 83 | 
 84 | \url{http://www.fossology.org/}
 85 | }
 86 | 
 87 | \begin{frame}[fragile]
 88 | \frametitle{Creating the configuration file}
 89 | The standard BAT distribution comes with an example configuration file for \texttt{createdb.py}:
 90 | 
 91 | \begin{verbatim}
 92 | [extractconfig]
 93 | configtype = global
 94 | database = /tmp/test/master.sqlite3
 95 | scanlicense = yes
 96 | licensedb = /tmp/test/licenses.sqlite3
 97 | ninkacommentsdb = /tmp/test/ninkacomments.sqlite3
 98 | scancopyright = yes
 99 | cleanup = yes
100 | wipe = no
101 | \end{verbatim}
102 | \end{frame}
103 | 
104 | \frame{
105 | \frametitle{Running the database creation script}
106 | The database extraction script can be run as follows:
107 | 
108 | \texttt{python createdb.py -c /path/to/configuration/file -f /path/to/directory/with/sources}
109 | 
110 | This will create, depending on the configuration, one to three files: the main database, a licenses database and a temporary database for Ninka, which can be ignored or discarded later.
111 | }
112 | 
113 | \frame{
114 | \frametitle{Caching databases}
115 | The ranking scan uses several caching databases:
116 | 
117 | \begin{itemize}
118 | \item strings for each programming language
119 | \item average amount of strings per package for each programming language
120 | \item function names
121 | \end{itemize}
122 | 
123 | If caching databases are not found then the ranking code will not work properly.
124 | }
125 | 
126 | \frame{
127 | \frametitle{Configuring the ranking scan}
128 | The functionality for the ranking scan is split in two separate scans:
129 | 
130 | \begin{enumerate}
131 | \item identifier extraction (in the \texttt{[identifier]} scan)
132 | \item identifier lookup and scoring (in the \texttt{[versionlicensecopyright]} scan)
133 | \end{enumerate}
134 | }
135 | 
136 | \begin{frame}[fragile]
137 | \frametitle{Further ranking configuration}
138 | To enable license scanning and reporting the parameter \texttt{BAT\_RANKING\_LICENSE} should be set to \texttt{1}:
139 | 
140 | \begin{verbatim}
141 | [ranking]
142 | ...
143 | envvars = ...
144 |  :BAT_RANKING_LICENSE=1
145 | ...
146 | \end{verbatim}
147 | \end{frame}
148 | 
149 | \frame{
150 | \frametitle{Conclusion}
151 | In this course you have learned about:
152 | 
153 | \begin{itemize}
154 | \item to generate a database for BAT ranking
155 | \item to configure BAT to use the ranking database
156 | \end{itemize}
157 | 
158 | This concludes the Binary Analysis Tool training.
159 | }
160 | 
161 | \end{document}
162 | 


--------------------------------------------------------------------------------
/doc/testsuite/openwrt-configs/010-fix_mixed_implicit_and_normal_rules_error.patch:
--------------------------------------------------------------------------------
 1 | --- a/Makefile
 2 | +++ b/Makefile
 3 | @@ -428,7 +428,7 @@ ifeq ($(config-targets),1)
 4 |  -include $(srctree)/arch/$(ARCH)/Makefile
 5 |  export KBUILD_DEFCONFIG
 6 |  
 7 | -config %config: scripts_basic outputmakefile FORCE
 8 | +%config: scripts_basic outputmakefile FORCE
 9 |  	$(Q)mkdir -p include
10 |  	$(Q)$(MAKE) $(build)=scripts/kconfig $@
11 |  	$(Q)$(MAKE) -C $(srctree) KBUILD_SRC= .kernelrelease
12 | @@ -1276,7 +1276,7 @@ endif
13 |  	$(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@)
14 |  
15 |  # Modules
16 | -/ %/: prepare scripts FORCE
17 | +%/: prepare scripts FORCE
18 |  	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
19 |  	$(build)=$(build-dir)
20 |  %.ko: prepare scripts FORCE
21 | 


--------------------------------------------------------------------------------
/doc/testsuite/openwrt-configs/README:
--------------------------------------------------------------------------------
1 | 010-fix_mixed_implicit_and_normal_rules_error.patch :: this is a patch for BusyBox to ensure Backfire 10.03 can build on versions with a newer version of GNU make. It should be placed in package/busybox/patches/
2 | 
3 | It was copied from upstream OpenWrt
4 | 


--------------------------------------------------------------------------------
/doc/testsuite/testoutput/README:
--------------------------------------------------------------------------------
 1 | This directory contains test output from the test firmwares as generated by BAT 9.0 with the default configuration. The output has been slightly modified to not distribute compiled binaries and introducing a GPL license violation.
 2 | 
 3 | These results can be browsed with the BAT results viewer.
 4 | 
 5 | These results are 
 6 | 
 7 | Included are:
 8 | 
 9 | * ...
10 | * ...
11 | 


--------------------------------------------------------------------------------
/doc/testsuite/training-notes4:
--------------------------------------------------------------------------------
 1 | Notes/transcript for presentation.
 2 | 
 3 | File: bat-training4.pdf
 4 | 
 5 | Slide 1: no notes
 6 | 
 7 | Slide 2: The Binary Analysis Tool has a simple viewer that allows you to view
 8 | and browse results of a scan (stored as a tar archive) in a simple graphical
 9 | interface.
10 | 
11 | Slide 3: no notes
12 | 
13 | Slide 4: no notes
14 | 
15 | Slide 5: Opening a result file in the BAT results viewer is simple: via
16 | File -> Open a file selection menu will be opened, after which a file can be
17 | loaded. The results file will be unpacked. On the left there will be a file
18 | tree that can be browsed, on the right results will be displayed in tabs.
19 | 
20 | Slide 6: Because not every file will be interesting it is possible to filter
21 | uninteresting files from the file tree. Especially in larger firmwares, such
22 | as Android images, there can be thousands of files, like graphics files or
23 | text files or Android resource files, that are not directly interesting for
24 | license compliance engineering, but which clutter the interface and which make
25 | it easy to get lost in the amount of data.
26 | 
27 | The filter can be configured via Configuration -> Filter Configuration.
28 | 
29 | Slide 7: For each file a few attributes (as far as applicable) will be shown,
30 | like size, checksum, tags, file name, and absolute and relative paths.
31 | Depending on the file some other results (like scan results, architecture, list
32 | of dynamically linked libraries) might be shown as well.
33 | 
34 | Slide 8: The advanced ranking scan will give access to a lot more information,
35 | like unique function names, or which strings were matched to a database of
36 | strings and possibly a version number guess, if enough data is available.
37 | 
38 | This information should be carefully inspected and not blindly trusted.
39 | 
40 | Slide 9: no notes
41 | 
42 | Slide 10 + 11: Interpreting the string constants takes some care. To make a good
43 | classification of a program it is important to look at how many strings are
44 | matched and how they are distributed over the various packages. An even
45 | distribution of the score, with very few matched strings, is a clear miss and
46 | means that nothing has been recognized.
47 | 
48 | If there are only a few strings that can be recognized (whether unique or
49 | non-unique) also means that the results are likely to be not very good either.
50 | In short: the more strings that can be recognized, the better the fidelity.
51 | 
52 | Slide 12: Version numbers are determined based on the amount of unique strings.
53 | If there are enough unique strings it is possible to reliably guess the version
54 | number of the program because there are slight differences between strings
55 | between versions.
56 | 
57 | If just a few unique strings were matched then version guessing will not be
58 | very reliable.
59 | 
60 | Slide 13: Similarly to version numbers a guess is made of possible licenses.
61 | The possible licenses are also based upon unique matches only. If there are
62 | few unique matches this will not be reliable.
63 | 
64 | Also, currently guessed versions are not taken into account when determining
65 | the license. Since code can have a different license per version this is
66 | important. Now all possible licenses are reported.
67 | 
68 | Slide 14: no notes
69 | 


--------------------------------------------------------------------------------
/doc/testsuite/training-notes5:
--------------------------------------------------------------------------------
 1 | Notes/transcript for presentation.
 2 | 
 3 | File: bat-training5.pdf
 4 | 
 5 | Slide 1: no notes
 6 | 
 7 | Slide 2: In this course we look at extending the Binary Analysis Tool and look
 8 | at adding new identifiers and new scans.
 9 | 
10 | Slide 3: Identifiers of supported compressed files, file systems and media
11 | files are hardcoded in a file in the BAT source tree, namely bat/fsmagic.py.
12 | The structure is a simple Python dictionary called "fsmagic" that can easily be
13 | extended.
14 | 
15 | Depending on the file type the identifier might start at the beginning of the
16 | file, the end of the file, or somewhere after the beginning of the file. For
17 | identifiers that start after the beginning of the file a special dictionary
18 | with "correction" offsets should also be modified.
19 | 
20 | A new identifier will only be scanned as soon as there is a scan that actually
21 | uses it and declares it in the configuration file in the "magic" configuration
22 | parameter.
23 | 
24 | Slide 4: Identifiers can be used in scans by accessing the dictionary "offsets"
25 | that is passed to each of the scans. The keys for the dictionary are the same
26 | keys as in the "fsmagic" dictionary.
27 | 
28 | Slide 5: Prerun scans are used to quickly tag files. It takes a few parameters
29 | including the offsets and previous tags that were given to the file and returns
30 | a list of new tags the scan has found.
31 | 
32 | Slide 6: no comments
33 | 
34 | Slide 7 + 8: Unpacking scans try to carve a file from a larger file and unpack
35 | its contents. Parameters to methods include a blacklist, which contains tuples
36 | of with start/end values of byte ranges in the file that should be ignored,
37 | since these ranges have already been scanned for example by another unpack
38 | scan.
39 | 
40 | The return value of an unpack scan should be a list with three values:
41 | 1. a list of tuples containing the names of directories where files have been
42 | unpacked and the byte offset in the original file where the compressed file or
43 | file system or media file can be found
44 | 2. an updated blacklist to which new byte ranges have been added in case of
45 | successful unpacking
46 | 3. a list of tags, the same as are returned by prerun scans
47 | 4. hints for the scanning engine, for example if the type of results is already
48 | known in advance (example: PNG unpacking)
49 | 
50 | Slide 9: a new leaf scan is fairly simple and only has a few parameters, namely
51 | the full path of the file, the blacklist with byte ranges that should be
52 | ignored, and environment variables.
53 | 
54 | The result can be an arbitrary Python value. Depending on if XML output is
55 | enabled it might be necessary to write a custom XML pretty printer for return
56 | values that are more complex than basic types (integer, boolean, float,
57 | strings).
58 | 
59 | Slide 10: no comments
60 | 
61 | Slide 11: Postrun scans do not alter the scan results, but merely process them
62 | to for example create a different representation of the results, like reports
63 | or graphics.
64 | 
65 | The parameters are the results of the previous scans, plus some extra meta
66 | information like paths in the file system and environment variables.
67 | 
68 | There is no return value for post run scans.
69 | 
70 | Slide 12: no comments
71 | 


--------------------------------------------------------------------------------
/doc/testsuite/training-notes6:
--------------------------------------------------------------------------------
 1 | Notes/transcript for presentation.
 2 | 
 3 | File: bat-training6.pdf
 4 | 
 5 | Slide 1: no notes
 6 | 
 7 | Slide 2: In this final course we will look at how to generate a database for
 8 | the advanced ranking scan discussed in previous courses, plus how to configure
 9 | it so BAT can use it.
10 | 
11 | Slide 3: A good dataset is important for the ranking to work. If there is too
12 | little data the classifications will be strongly biased to what is in the
13 | database and programs will be wrongly identified.
14 | 
15 | The database works best if there is a wide range of software in it. Good
16 | sources for software are open source projects and Linux distributions.
17 | 
18 | Slide 4: There is a database generation script available in the BAT source code
19 | repository. This script processes files, but it needs to have a list of files
20 | with some metadata (like packagename, version, and the origin of the download).
21 | Creating this list is very easy (it is a simple format), but if there are many
22 | source code archives that need to be processed it can be quite a lot of work.
23 | 
24 | To help with that work there is a script that can generate these lists.
25 | 
26 | Slide 5: The actual script that processes the source code files can be found in
27 | the BAT source code repository. It extracts string constants and function names
28 | (for C/C++ programs) and can be instructed to extract license texts from source
29 | code if instructed to using the Ninka license scanner.
30 | 
31 | Slide 6: The Ninka license scanner can be used to extract license texts.
32 | Installing it is a bit tricky and BAT has, right now, a few hardcoded paths for
33 | Ninka, so it needs to be installed in a (semi-)fixed location before licenses
34 | can be scanned with the database extraction script.
35 | 
36 | Slide 7: Similarly FOSSology can be used for extracting licenses and copyright
37 | statements like e-mail addresses, URLs and more. For this FOSSology needs to
38 | be installed. Installing FOSSology is quite complex, but there are ready made
39 | packages for most distributions available.
40 | 
41 | Slide 8: The ranking scan makes use of several caching databases to speed up
42 | scanning. The caching databases contain information that is needed by the
43 | scanning process, such as the average number of strings in a package, string
44 | constants, and function names.
45 | 
46 | Slide 9:
47 | 
48 | Slide 10:
49 | 
50 | Slide 11: no comments
51 | 
52 | Slide 12: If licensing information (determined by looking at licenses of files
53 | in which unique strings were found) should be reported as well another
54 | environment variable called BAT_RANKING_LICENSE should be set to 1.
55 | 
56 | Slide 13: no comments
57 | 


--------------------------------------------------------------------------------
/src/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bat-scan.config
2 | include bat/*
3 | include README
4 | include LICENSE
5 | 


--------------------------------------------------------------------------------
/src/bat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/armijnhemel/binaryanalysis/ea97b6b7617128ccf7cfa19244b91675d9bf66df/src/bat/__init__.py


--------------------------------------------------------------------------------
/src/bat/batxor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | Sometimes some files like firmwares are encrypted. The level of encryption
  9 | varies with keys and verifying signatures at boot time to very simple
 10 | "encryption" by simply XORing with a byte string.
 11 | 
 12 | The code here scans binary files for certain known XOR parameters and applies
 13 | them, but only if no other scan succeeds.
 14 | 
 15 | For this we need to keep some state, possibly even delete the file only later,
 16 | by tagging it as 'temporary' and removing it later on.
 17 | '''
 18 | 
 19 | import sys
 20 | import os
 21 | import os.path
 22 | import tempfile
 23 | import mmap
 24 | import fwunpack
 25 | 
 26 | # some of the signatures we know about:
 27 | # * Splashtop (fast boot environment)
 28 | # * Bococom router series (2.6.21, Ralink chipset)
 29 | # * Sitecom WL-340 and WL-342
 30 | 
 31 | # Finding new signatures is done by hand. A helper tool (findxor.py) can be
 32 | # found in the scripts directory
 33 | 
 34 | # The signatures of various known XOR "encrypted" firmwares.
 35 | signatures = { 'splashtop': ['\x51', '\x57', '\x45', '\x52']
 36 |              , 'bococom':   ['\x3a', '\x93', '\xa2', '\x95', '\xc3', '\x63', '\x48', '\x45', '\x58', '\x09', '\x12', '\x03', '\x08', '\xc8', '\x3c']
 37 |              , 'sitecom':   ['\x78', '\x3c', '\x9e', '\xcf', '\x67', '\xb3', '\x59', '\xac']
 38 |              , 'edimax':   ['\x88','\x44','\xa2','\xd1','\x68','\xb4','\x5a','\x2d']
 39 |              }
 40 | 
 41 | def unpackXOR(filename, sig, tempdir=None):
 42 |     tmpdir = fwunpack.unpacksetup(tempdir)
 43 |     tmpfile = tempfile.mkstemp(dir=tmpdir)
 44 |     os.fdopen(tmpfile[0]).close()
 45 | 
 46 |     fwunpack.unpackFile(filename, 0, tmpfile[1], tmpdir, modify=True)
 47 |     datafile = open(filename)
 48 |     datafile.seek(0)
 49 |     data = datafile.read(1000000)
 50 | 
 51 |     # read data, XOR, write data out again
 52 |     f2 = open(tmpfile[1], 'w')
 53 |     counter = 0
 54 |     while data != '':
 55 |         for i in data:
 56 |             f2.write(chr(ord(i) ^ ord(signatures[sig][counter])))
 57 |             counter = (counter+1)%len(signatures[sig])
 58 |         data = datafile.read(1000000)
 59 |     f2.close()
 60 |     datafile.close()
 61 |     return tmpdir
 62 | 
 63 | def searchUnpackXOR(filename, tempdir=None, blacklist=[], offsets={}, scanenv={}, debug=False):
 64 |     hints = []
 65 |     diroffsets = []
 66 | 
 67 |     # If something else already unpacked (parts) of the file we're not
 68 |     # going to continue.
 69 |     if 'BAT_UNPACKED' in scanenv:
 70 |         if scanenv['BAT_UNPACKED'] == 'True':
 71 |             return (diroffsets, blacklist, [], hints)
 72 | 
 73 |     if 'XOR_MINIMUM' in scanenv:
 74 |         xor_minimum = int(scanenv['XOR_MINIMUM'])
 75 |     else:
 76 |         xor_minimum = 0
 77 |     # only continue if no other scan has succeeded
 78 |     if blacklist != []:
 79 |         return (diroffsets, blacklist, [], hints)
 80 |     counter = 1
 81 | 
 82 |     # only continue if we actually have signatures
 83 |     if signatures == {}:
 84 |         return (diroffsets, blacklist, [], hints)
 85 | 
 86 |     # open the file, so we can search for signatures
 87 |     # TODO: use the identifier search we have elsewhere.
 88 |     datafile = os.open(filename, os.O_RDONLY)
 89 |     datamm = mmap.mmap(datafile, 0, access=mmap.ACCESS_READ)
 90 | 
 91 |     tmpdir = fwunpack.dirsetup(tempdir, filename, "xor", counter)
 92 |     res = None
 93 |     for s in signatures:
 94 |         bs = reduce(lambda x, y: x + y, signatures[s])
 95 |         # find all instances of the signature. We might want to tweak
 96 |         # this a bit.
 97 |         bsres = datamm.find(bs)
 98 |         if bsres == -1:
 99 |             continue
100 |         siginstances = [bsres]
101 |         while bsres != -1:
102 |             bsres = datamm.find(bs, bsres +1)
103 |             if bsres != -1:
104 |                 siginstances.append(bsres)
105 |         if len(siginstances) > 0:
106 |             if len(siginstances) < xor_minimum:
107 |                 continue
108 |             res = unpackXOR(filename, s, tmpdir)
109 |             if res != None:
110 |                 diroffsets.append((res, 0, os.stat(filename).st_size))
111 |                 # blacklist the whole file
112 |                 blacklist.append((0, os.stat(filename).st_size))
113 |                 break
114 |     datamm.close()
115 |     os.close(datafile)
116 |     if res == None:
117 |         os.rmdir(tmpdir)
118 |         return (diroffsets, blacklist, [], hints)
119 |     return (diroffsets, blacklist, ['temporary'], hints)
120 | 


--------------------------------------------------------------------------------
/src/bat/busyboxversion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2009-2015 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | # Stand alone module to determine the version of BusyBox. Has a method for being called
 8 | # from one of the default scans, but can also be invoked separately.
 9 | 
10 | import sys
11 | import os
12 | import tempfile
13 | import copy
14 | from optparse import OptionParser
15 | 
16 | import busybox
17 | import extractor
18 | 
19 | def busybox_version(filename, tags, cursor, conn, filehashes, blacklist=[], scanenv={}, scandebug=False, unpacktempdir=None):
20 |     try:
21 |         filesize = os.stat(filename).st_size
22 |         ## if the whole file is blacklisted, we don't have to scan
23 |         if blacklist != []:
24 |             if extractor.inblacklist(0, blacklist) == filesize:
25 |                 return None
26 |             ## make a copy and add a bogus value for the last
27 |             ## byte to a temporary blacklist to make the loop work
28 |             ## well.
29 |             blacklist_tmp = copy.deepcopy(blacklist)
30 |             blacklist_tmp.append((filesize,filesize))
31 |             datafile = open(filename, 'rb')
32 |             lastindex = 0
33 |             datafile.seek(lastindex)
34 |             for i in blacklist_tmp:
35 |                 if i[0] == lastindex:
36 |                     lastindex = i[1] - 1
37 |                     datafile.seek(lastindex)
38 |                     continue
39 |                 if i[0] > lastindex:
40 |                     ## check if there actually is enough data to do a search first
41 |                     ## "BusyBox v" has length 9, has at least 2 digits and a dot
42 |                     if (i[0] - lastindex) < 12:
43 |                         lastindex = i[1] - 1
44 |                         datafile.seek(lastindex)
45 |                         continue
46 |                     data = datafile.read(i[0] - lastindex)
47 |                     tmpfile = tempfile.mkstemp()
48 |                     os.write(tmpfile[0], data)
49 |                     os.fdopen(tmpfile[0]).close()
50 |                     bbres = busybox.extract_version(tmpfile[1])
51 |                     os.unlink(tmpfile[1])
52 |                     ## set lastindex to the next
53 |                     lastindex = i[1] - 1
54 |                     datafile.seek(lastindex)
55 |                     if bbres != None:
56 |                         break
57 |             datafile.close()
58 |         else:
59 |             bbres = busybox.extract_version(filename)
60 |         if bbres != None:
61 |             return (['busybox'], bbres)
62 |     except Exception, e:
63 |         return None
64 | 
65 | def main(argv):
66 |     parser = OptionParser()
67 |     parser.add_option("-b", "--binary", dest="bb", help="path to BusyBox binary", metavar="FILE")
68 |     (options, args) = parser.parse_args()
69 |     if options.bb == None:
70 |         parser.error("Path to BusyBox binary needed")
71 |     (res, version) = busybox_version(options.bb, None, None, {}, [])
72 | 
73 |     if version != None:
74 |         print version
75 |     else:
76 |         print "No BusyBox found"
77 | 
78 | if __name__ == "__main__":
79 |     main(sys.argv)
80 | 


--------------------------------------------------------------------------------
/src/bat/ext2.py:
--------------------------------------------------------------------------------
 1 | # Binary Analysis Tool
 2 | # Copyright 2009-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 3 | # Licensed under Apache 2.0, see LICENSE file for details
 4 | 
 5 | import os
 6 | import subprocess
 7 | import tempfile
 8 | 
 9 | '''
10 | Module to 'unpack' an ext2 file system. We are taking a shortcut. We're using
11 | e2cp to copy files, but we're recreating the directories in the file system
12 | ourselves. We can get this information from the output of el2s.
13 | 
14 | The second column displays the Ext2/linux mode flags which can be found in
15 | <ext2fs/ext2fs.h> from e2fsprogs.
16 | 
17 | We are mostly interested in regular files and directories:
18 | 
19 | #define LINUX_S_IFREG  0100000
20 | #define LINUX_S_IFDIR  0040000
21 | '''
22 | 
23 | def copyext2fs(source, target=None):
24 |     if target == None:
25 |         targetdir = tempfile.mkdtemp()
26 |     else:
27 |         targetdir = target
28 | 
29 |     # now walk each directory and copy files
30 |     scandirs = [""]
31 |     unpackfail = False
32 |     while len(scandirs) != 0:
33 |         newscandirs = set()
34 |         for scandir in scandirs:
35 |             p = subprocess.Popen(['e2ls', '-l', source + ":" + scandir], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
36 |             (stanout, stanerr) = p.communicate()
37 |             if p.returncode != 0:
38 |                 # This could happen is for example the file system is corrupted
39 |                 # and inodes are damaged
40 |                 unpackfail = True
41 |                 break
42 |             if stanout.strip() == "No files found!":
43 |                 continue
44 |             for i in stanout.strip().split("\n"):
45 |                 if i.startswith(">"):
46 |                     continue
47 |                 isplits = i.split()
48 |                 if len(isplits[1]) < 5:
49 |                     # bogus file system, so continue
50 |                     return None
51 |                 modeflag = int(isplits[1][0:-3])
52 |                 if len(isplits) < 8:
53 |                     continue
54 |                 else:
55 |                     filename = isplits[7]
56 |                 if modeflag == 40:
57 |                     newscandirs.add(scandir + "/" + filename)
58 |                     os.mkdir(target + "/" + scandir + "/" + filename)
59 |                 # also take sticky bit, suid, sgid, etc. into account
60 |                 elif modeflag >= 100 and modeflag < 120:
61 |                     copypath = source + ":" + scandir + "/" + filename
62 |                     p = subprocess.Popen(['e2cp', copypath, "-d", os.path.normpath(target + "/" + scandir)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
63 |                     (stanout, stanerr) = p.communicate()
64 |                     if p.returncode != 0:
65 |                         continue
66 |         scandirs = newscandirs
67 |         if unpackfail:
68 |             return None
69 |     return targetdir
70 | 


--------------------------------------------------------------------------------
/src/bat/file2package.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This is a plugin for the Binary Analysis Tool. Its purpose is to determine the
  9 | package a file belongs to based on the name of a package. This information is
 10 | mined from distributions like Fedora and Debian.
 11 | '''
 12 | 
 13 | import os
 14 | import os.path
 15 | import sys
 16 | import subprocess
 17 | import copy
 18 | import Queue
 19 | import cPickle
 20 | import multiprocessing
 21 | from multiprocessing import Process, Lock
 22 | from multiprocessing.sharedctypes import Value, Array
 23 | 
 24 | def grabpackage(scanqueue, reportqueue, cursor, query):
 25 |     # select the packages that are available. It would be better to also have the directory
 26 |     # name available, so we should get rid of 'path' and use something else that is better
 27 |     # suited
 28 |     while True:
 29 |         filename = scanqueue.get(timeout=2592000)
 30 |         cursor.execute(query, (os.path.basename(filename),))
 31 |         res = cursor.fetchall()
 32 |         if res != []:
 33 |             returnres = []
 34 |             # TODO: filter results, only return files that are not in tons of packages
 35 |             for r in res:
 36 |                 (package, packageversion, distribution, distroversion) = r
 37 |                 distrores = {}
 38 |                 distrores['package'] = package
 39 |                 distrores['packageversion'] = packageversion
 40 |                 distrores['distribution'] = distribution
 41 |                 distrores['distributionversion'] = distroversion
 42 |                 returnres.append(distrores)
 43 |             reportqueue.put({filename: returnres})
 44 |         scanqueue.task_done()
 45 | 
 46 | def filename2package(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
 47 |     processtasks = []
 48 |     for i in unpackreports:
 49 |         if not 'checksum' in unpackreports[i]:
 50 |             continue
 51 |         processtasks.append(i)
 52 | 
 53 |     if processors == None:
 54 |         processamount = 1
 55 |     else:
 56 |         processamount = processors
 57 |     # create a queue for tasks, with a few threads reading from the queue
 58 |     # and looking up results and putting them in a result queue
 59 |     query = "select distinct package, packageversion, source, distroversion from file where filename = %s"
 60 |     scanmanager = multiprocessing.Manager()
 61 |     scanqueue = multiprocessing.JoinableQueue(maxsize=0)
 62 |     reportqueue = scanmanager.Queue(maxsize=0)
 63 |     processpool = []
 64 | 
 65 |     map(lambda x: scanqueue.put(x), processtasks)
 66 |     minprocessamount = min(len(processtasks), processamount)
 67 |     res = []
 68 | 
 69 |     for i in range(0,minprocessamount):
 70 |         p = multiprocessing.Process(target=grabpackage, args=(scanqueue,reportqueue,batcursors[i],query))
 71 |         processpool.append(p)
 72 |         p.start()
 73 | 
 74 |     scanqueue.join()
 75 | 
 76 |     while True:
 77 |         try:
 78 |             val = reportqueue.get_nowait()
 79 |             res.append(val)
 80 |             reportqueue.task_done()
 81 |         except Queue.Empty, e:
 82 |             # Queue is empty
 83 |             break
 84 |     reportqueue.join()
 85 | 
 86 |     for p in processpool:
 87 |         p.terminate()
 88 | 
 89 |     for r in res:
 90 |         filename = r.keys()[0]
 91 |         filehash = unpackreports[filename]['checksum']
 92 | 
 93 |         # read pickle file
 94 |         leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'rb')
 95 |         leafreports = cPickle.load(leaf_file)
 96 |         leaf_file.close()
 97 | 
 98 |         # write pickle file
 99 |         leafreports['file2package'] = r[filename]
100 |         leafreports['tags'].append('file2package')
101 |         unpackreports[filename]['tags'].append('file2package')
102 |         leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'wb')
103 |         cPickle.dump(leafreports, leaf_file)
104 |         leaf_file.close()
105 | 
106 |     returnres = res
107 | 
108 | def file2packagesetup(scanenv, cursor, conn, debug=False):
109 |     if cursor == None:
110 |         return (False, {})
111 |     cursor.execute("select table_name from information_schema.tables where table_type='BASE TABLE' and table_schema='public'")
112 |     tablenames = map(lambda x: x[0], cursor.fetchall())
113 |     conn.commit()
114 |     if not 'file' in tablenames:
115 |         return (False, {})
116 |     return (True, scanenv)
117 | 


--------------------------------------------------------------------------------
/src/bat/findduplicates.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2013-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | import sys
 8 | 
 9 | '''
10 | This aggregate scan traverses the unpackreports and reports all duplicate
11 | files as a list of lists of identical files.
12 | '''
13 | 
14 | def findduplicates(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
15 |     filehashes = {}
16 |     for r in unpackreports.keys():
17 |         if 'checksum' in unpackreports[r]:
18 |             if unpackreports[r]['checksum'] in filehashes:
19 |                 filehashes[unpackreports[r]['checksum']].append(r)
20 |             else:
21 |                 filehashes[unpackreports[r]['checksum']] = [r]
22 |     duplicates = []
23 |     for h in filehashes:
24 |         if len(filehashes[h]) > 1:
25 |             duplicates.append(filehashes[h])
26 |     if duplicates != []:
27 |         return {'duplicates': duplicates}
28 | 


--------------------------------------------------------------------------------
/src/bat/fixduplicates.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding: utf-8 -*-
  3 | 
  4 | # Binary Analysis Tool
  5 | # Copyright 2014-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  6 | # Licensed under Apache 2.0, see LICENSE file for details
  7 | 
  8 | import os
  9 | import os.path
 10 | import sys
 11 | import subprocess
 12 | import copy
 13 | import cPickle
 14 | import elfcheck
 15 | 
 16 | '''
 17 | During scanning BAT tags duplicate files (same checksums) and only processes a
 18 | single file later on. Which file is marked as the 'original' and which as the
 19 | duplicate depends on the scanning order, which is non-deterministic.
 20 | 
 21 | In some situations there is more information available to make a better choice
 22 | about the 'original' and the duplicate.
 23 | 
 24 | This module is to fix these situations.
 25 | 
 26 | 1. In ELF shared libraries the SONAME and RPATH attributes can be used.
 27 | '''
 28 | 
 29 | def fixduplicates(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
 30 |     # First deal with ELF files
 31 |     # store names of all ELF files present in scan archive
 32 |     elffiles = set()
 33 |     dupefiles = set()
 34 | 
 35 |     seendupe = False
 36 | 
 37 |     for i in unpackreports:
 38 |         if not 'checksum' in unpackreports[i]:
 39 |             continue
 40 |         filehash = unpackreports[i]['checksum']
 41 |         if not os.path.exists(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash)):
 42 |             continue
 43 | 
 44 |         if not 'elf' in unpackreports[i]['tags']:
 45 |             continue
 46 | 
 47 |         # This makes no sense for for example statically linked libraries and, Linux kernel
 48 |         # images and Linux kernel modules, so skip.
 49 |         if 'static' in unpackreports[i]['tags']:
 50 |             continue
 51 |         if 'linuxkernel' in unpackreports[i]['tags']:
 52 |             continue
 53 |         if 'duplicate' in unpackreports[i]['tags']:
 54 |             seendupe = True
 55 |             dupefiles.add(i)
 56 |         else:
 57 |             elffiles.add(i)
 58 | 
 59 |     # only process if there actually are duplicate files
 60 |     if seendupe:
 61 |         dupehashes = {}
 62 |         for i in dupefiles:
 63 |             filehash = unpackreports[i]['checksum']
 64 |             if filehash in dupehashes:
 65 |                 dupehashes[filehash].append(i)
 66 |             else:
 67 |                 dupehashes[filehash] = [i]
 68 |         dupekeys = dupehashes.keys()
 69 |         for i in elffiles:
 70 |             filehash = unpackreports[i]['checksum']
 71 |             if filehash in dupekeys:
 72 |                 realpath = unpackreports[i]['realpath']
 73 |                 filename = unpackreports[i]['name']
 74 | 
 75 |                 elfres = elfcheck.getDynamicLibs(os.path.join(realpath, filename))
 76 |                 if elfres == {} or elfres == None:
 77 |                     continue
 78 | 
 79 |                 if not 'sonames' in elfres:
 80 |                     continue
 81 | 
 82 |                 sonames = elfres['sonames']
 83 | 
 84 |                 # there should be only one SONAME
 85 |                 if len(sonames) != 1:
 86 |                     continue
 87 | 
 88 |                 soname = sonames[0]
 89 |                 if soname == filename:
 90 |                     # no need for fixing
 91 |                     continue
 92 |                 if unpackreports[i]['scans'] != []:
 93 |                     # if any unpack scans were successful then renaming might have
 94 |                     # to be done recursively which needs more thought
 95 |                     continue
 96 |                 unpackreports[i]['tags'].append('duplicate')
 97 |                 for j in dupehashes[filehash]:
 98 |                     if soname == os.path.basename(j):
 99 |                         unpackreports[j]['tags'].remove('duplicate')
100 |                         break
101 | 


--------------------------------------------------------------------------------
/src/bat/fsmagic.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2009-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''This file contains information about how to recognize certain
  8 | files, file systems, compression, and so on automatically and which
  9 | methods or functions to invoke to unpack these files for further
 10 | analysis.'''
 11 | 
 12 | # information from:
 13 | # 1. /usr/share/magic
 14 | # 2. include/linux/magic.h in the Linux kernel sources
 15 | # 3. http://www.squashfs-lzma.org/
 16 | # 4. http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=364260
 17 | # 5. various other places
 18 | 
 19 | # This is not the same as the magic database, but just a list of
 20 | # identifiers that are used for these file systems, compressed files,etc.
 21 | # In BAT a lot more work is done to verify what a file really is, which
 22 | # the magic database does not do.
 23 | 
 24 | fsmagic = {
 25 |             'gzip': '\x1f\x8b\x08',     # x08 is the only compression method according to RFC 1952
 26 |             'compress': '\x1f\x9d',
 27 |             'bz2': 'BZh',
 28 |             'rar': 'Rar!\x1a\x07',
 29 |             'rarfooter': '\xc4\x3d\x7b\x00\x40\x07\x00', # http://forensicswiki.org/wiki/RAR#Terminator_.28terminator.29
 30 |             'zip': '\x50\x4b\x03\04',
 31 |             'zipend': '\x50\x4b\x05\06',
 32 |             'lrzip': 'LRZI',
 33 |             'rzip': 'RZIP',
 34 |             'squashfs1': '\x68\x73\x71\x73', # hsqs -- little endian
 35 |             'squashfs2': '\x73\x71\x73\x68', # sqsh -- big endian
 36 |             'squashfs3': '\x71\x73\x68\x73', # qshs -- little endian
 37 |             'squashfs4': '\x73\x68\x73\x71', # shsq -- big endian
 38 |             'squashfs5': '\x74\x71\x73\x68', # tqsh - used in DD-WRT
 39 |             'squashfs6': '\x68\x73\x71\x74', # hsqt - used in DD-WRT
 40 |             'squashfs7': '\x73\x71\x6c\x7a', # sqlz
 41 |             'android-sparse': '\x3a\xff\x26\xed',
 42 |             'lzma_alone': '\x5d\x00\x00',
 43 |             'lzma_alone_alt': '\x6d\x00\x00',     # used in OpenWrt
 44 |             'lzma_alone_alt2':'\x6c\x00\x00',     # seen in some routers, like ZyXEL NBG5615
 45 |             '7z': '7z\xbc\xaf\x27\x1c',
 46 |             'xz': '\xfd\x37\x7a\x58\x5a\x00',
 47 |             'xztrailer': '\x59\x5a',
 48 |             'lzip': 'LZIP',
 49 |             'lzop': '\x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a',
 50 |             'lha': '-lh7-',
 51 |             'cramfs_le': '\x45\x3d\xcd\x28',
 52 |             'cramfs_be': '\x28\xcd\x3d\x45',
 53 |             'romfs': '-rom1fs-',
 54 |             'jffs2_le': '\x85\x19',
 55 |             'jffs2_be': '\x19\x85',
 56 |             'ubifs': '\x31\x18\x10\x06',
 57 |             'ubi': '\x55\x42\x49\x23',
 58 |             'rpm': '\xed\xab\xee\xdb',
 59 |             'ext2': '\x53\xef',        # little endian
 60 |             'minix': '\x8f\x13',        # specific version of Minix v1 file system
 61 |             'arj': '\x60\xea',
 62 |             'cab':  'MSCF\x00\x00\x00\x00',    # first four bytes following header are always 0
 63 |             'installshield': 'ISc(',
 64 |             'pkbac': 'PKBAC',
 65 |             'winrar': 'WinRAR',
 66 |             'png': '\x89PNG\x0d\x0a\x1a\x0a',
 67 |             'pngtrailer': '\x00\x00\x00\x00IEND\xae\x42\x60\x82', # length, chunk type and CRC for PNG trailer are always the same
 68 |             'cpiotrailer': 'TRAILER!!!',
 69 |             'bmp': 'BM',
 70 |             'jpeg': '\xff\xd8',
 71 |             'jpegtrailer': '\xff\xd9',
 72 |             'jfif': 'JFIF',
 73 |             'gif87': 'GIF87a',
 74 |             'gif89': 'GIF89a',
 75 |             'ico': '\x00\x00\x01\x00',
 76 |             'riff': 'RIFF',
 77 |             'cpio1': '070701',
 78 |             'cpio2': '070702',
 79 |             'cpio3': '070707',
 80 |             'iso9660': 'CD001',
 81 |             'swf': 'CWS',
 82 |             'pdf': '%PDF-',
 83 |             'pdftrailer': '%%EOF',
 84 |             'ar': '!<arch>',
 85 |             'tar1': 'ustar\x00',
 86 |             'tar2': 'ustar\x20',
 87 |             'java_serialized': '\xac\xed\x00',
 88 |             'fat12': 'FAT12',
 89 |             'fat16': 'FAT16',
 90 |             'pe': 'MZ',
 91 |             'upx': 'UPX',
 92 |             'java': '\xca\xfe\xba\xbe',
 93 |             'pack200': '\xca\xfe\xd0\x0d',
 94 |             'dex': 'dex\n', # Android Dex
 95 |             'odex': 'dey\n', # Android Odex
 96 |             'oat': 'oat\n', # Android OAT
 97 |             'otf': 'OTTO',
 98 |             'ttf': '\x00\x01\x00\x00',
 99 |             'id3': 'TAG',
100 |             'id3v2': 'ID3',
101 |             'mp4': 'ftyp',
102 |             'ogg': 'OggS',
103 |             'sqlite3': 'SQLite format 3\x00',
104 |             'u-boot': '\x27\x05\x19\x56',
105 |             'yaffs2': '\x03\x00\x00\x00\x01\x00\x00\x00\xff\xff', # this is not a an official signature, just occuring frequently
106 |             'plf': '\x50\x4c\x46\x21',
107 |             'chm': 'ITSF\x03\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00\x00',
108 |             'msi': '\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1', # not sure this is a correct signature
109 |             'windowsassemblyheader': '<assembly',
110 |             'windowsassemblytrailer': '</assembly>',
111 |             'appledouble': '\x00\x05\x16\x07',
112 |             'mswim': 'MSWIM\x00\x00\x00',
113 |             'certificate': '-----BEGIN',
114 |             'androidbackup': 'ANDROID BACKUP\n',
115 |             'aiff': 'FORM',
116 |             'woff': 'wOFF',
117 |             'woff2': 'wOF2',
118 |             'xar': '\x78\x61\x72\x21',
119 |             'icc': 'acsp',
120 |             'elf': '\x7f\x45\x4c\x46',
121 |             'bflt': '\x62\x46\x4c\x54',
122 |           }
123 | 
124 | # some offsets can be found after a certain number of bytes, but
125 | # the actual file system or file starts earlier
126 | correction = {
127 |                'ext2': 0x438,
128 |                'minix': 0x410,
129 |                'iso9660': 32769,
130 |                'tar1': 0x101,
131 |                'tar2': 0x101,
132 |                'fat12': 54,
133 |                'fat16': 54,
134 |                'lha': 2,
135 |                'icc': 36,
136 |              }
137 | 
138 | # collection of markers that should be scanned together
139 | squashtypes = ['squashfs1', 'squashfs2', 'squashfs3', 'squashfs4', 'squashfs5', 'squashfs6']
140 | lzmatypes   = ['lzma_alone', 'lzma_alone_alt', 'lzma_alone_alt2']
141 | cpio        = ['cpio1', 'cpio2', 'cpio3']
142 | gif         = ['gif87', 'gif89']
143 | tar         = ['tar1', 'tar2']
144 | 


--------------------------------------------------------------------------------
/src/bat/fssearch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2009-2013 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | import sys
  8 | import os
  9 | import tempfile
 10 | import fsmagic
 11 | 
 12 | # Find a squashfs file system, starting at a certain offset.
 13 | # Returns the offset of the file system nearest file system.
 14 | def findSquashfs(data, offset=0):
 15 |     marker = -1
 16 |     squashtype = None
 17 |     for t in fsmagic.squashtypes:
 18 |         sqshmarker = findMarker(fsmagic.fsmagic[t], data, offset)
 19 |         if sqshmarker == -1:
 20 |             continue
 21 |         if marker == -1:
 22 |             marker = sqshmarker
 23 |         else:
 24 |             marker = min(marker, sqshmarker)
 25 |     return marker
 26 | 
 27 | # Find a marker. To more efficiently deal with big files we don't read in
 28 | # the entire file at once, but use read() and seek()
 29 | def findMarker(marker, datafile, offset=0):
 30 |     databuffer = []
 31 |     datafile.seek(offset)
 32 |     databuffer = datafile.read(100000)
 33 |     while databuffer != '':
 34 |         res = databuffer.find(marker)
 35 |         if res != -1:
 36 |             datafile.seek(0)
 37 |             return offset + res
 38 |         else:
 39 |             # move the offset 50
 40 |             datafile.seek(offset + 99950)
 41 |             # read 100000 bytes from oldoffset + 50, so there is 50 bytes
 42 |             # overlap with the previous read
 43 |             databuffer = datafile.read(100000)
 44 |             if len(databuffer) >= 50:
 45 |                 offset = offset + 99950
 46 |             else:
 47 |                 offset = offset + len(databuffer)
 48 |     datafile.seek(0)
 49 |     return -1
 50 | 
 51 | def findType(type, data, offset=0):
 52 |     res = findMarker(fsmagic.fsmagic[type], data, offset)
 53 |     return res
 54 | 
 55 | def findCpio(data, offset=0):
 56 |     cpiomarker = -1
 57 |     for marker in fsmagic.cpio:
 58 |         res = findMarker(fsmagic.fsmagic[marker], data, offset)
 59 |         if res != -1 and cpiomarker == -1:
 60 |             cpiomarker = res
 61 |         elif res != -1:
 62 |             cpiomarker = min(cpiomarker, res)
 63 |     return cpiomarker
 64 | 
 65 | def findXZTrailer(data, offset=0):
 66 |     return findType('xztrailer', data, offset)
 67 | 
 68 | def findCpioTrailer(data, offset=0):
 69 |     return findType('cpiotrailer', data, offset)
 70 | 
 71 | def findExt2fs(data, offset=0):
 72 |     return findType('ext2', data, offset)
 73 | 
 74 | def findISO9660(data, offset=0):
 75 |     return findType('iso9660', data, offset)
 76 | 
 77 | def findIco(data, offset=0):
 78 |     return findType('ico', data, offset)
 79 | 
 80 | def findRPM(data, offset=0):
 81 |     return findType('rpm', data, offset)
 82 | 
 83 | def findGzip(data, offset=0):
 84 |     return findType('gzip', data, offset)
 85 | 
 86 | def findZip(data, offset=0):
 87 |     return findType('zip', data, offset)
 88 | 
 89 | def findCramfs(data, offset=0):
 90 |     return findType('cramfs', data, offset)
 91 | 
 92 | def findUbi(data, offset=0):
 93 |     return findType('ubi', data, offset)
 94 | 
 95 | def findRar(data, offset=0):
 96 |     return findType('rar', data, offset)
 97 | 
 98 | # not reliable according to comments in /usr/share/magic
 99 | def findLZMA(data, offset=0):
100 |     return findType('lzma_alone', data, offset)
101 | 
102 | def findXZ(data, offset=0):
103 |     return findType('xz', data, offset)
104 | 
105 | def findLzip(data, offset=0):
106 |     return findType('lzip', data, offset)
107 | 
108 | def findLzo(data, offset=0):
109 |     return findType('lzo', data, offset)
110 | 
111 | def findBzip2(data, offset=0):
112 |     return findType('bz2', data, offset)
113 | 
114 | def findARJ(data, offset=0):
115 |     return findType('arj', data, offset)
116 | 
117 | def findCab(data, offset=0):
118 |     return findType('cab', data, offset)
119 | 
120 | def findPNG(data, offset=0):
121 |     return findType('png', data, offset)
122 | 
123 | # http://www.w3.org/TR/PNG-Chunks.html
124 | def findPNGTrailer(data, offset=0):
125 |     return findType('pngtrailer', data, offset)
126 | 
127 | def findJFIF(data, offset=0):
128 |     jfifmarker = findType('jfif', data, offset)
129 |     if jfifmarker < 6:
130 |         return -1
131 |     else:
132 |         return jfifmarker - 6
133 | 
134 | def findGIF(data, offset=0):
135 |     gifmarker = -1
136 |     for marker in fsmagic.gif:
137 |         res = findMarker(fsmagic.fsmagic[marker], data, offset)
138 |         if res != -1 and gifmarker == -1:
139 |             gifmarker = res
140 |         elif res != -1:
141 |             gifmarker = min(gifmarker, res)
142 |     return gifmarker
143 | 
144 | def markerSearch(data):
145 |     offsets = []
146 |     marker_keys = fsmagic.marker.keys()
147 |     for key in marker_keys:
148 |         res = data.find(fsmagic.marker[key])
149 |         while res != -1:
150 |             offsets.append((res, key))
151 |             res = data.find(fsmagic.marker[key], res+1)
152 |     offsets.sort()
153 |     for i in offsets:
154 |         print hex(i[0]), i[1], i[0]%8
155 | 
156 | def bruteForceSearch(data):
157 |     offsets = []
158 |     fsmagic_keys = fsmagic.fsmagic.keys()
159 |     for key in fsmagic_keys:
160 |         res = data.find(fsmagic.fsmagic[key])
161 |         while res != -1:
162 |             offsets.append((res, key))
163 |             res = data.find(fsmagic.fsmagic[key], res+1)
164 |     offsets.sort()
165 |     for i in offsets:
166 |         print hex(i[0]), i[1], i[0]%8
167 | 


--------------------------------------------------------------------------------
/src/bat/generatehexdump.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This is a plugin for the Binary Analysis Tool. It takes the output of hexdump -Cv
 9 | and writes it to a file with gzip compression. The output is later used in the
10 | graphical user interface.
11 | 
12 | Parameters:
13 | 
14 | BAT_REPORTDIR :: directory where output should be written to. This is useful for caching
15 | BAT_IMAGE_MAXFILESIZE :: maximum size of source file
16 | 
17 | This should be run as a postrun scan
18 | '''
19 | 
20 | import os
21 | import os.path
22 | import sys
23 | import subprocess
24 | import gzip
25 | 
26 | def generateHexdump(filename, unpackreport, scantempdir, topleveldir, scanenv, cursor, conn, debug=False):
27 |     if not 'checksum' in unpackreport:
28 |         return
29 |     reportdir = scanenv.get('BAT_REPORTDIR', '.')
30 |     try:
31 |         os.stat(reportdir)
32 |     except:
33 |         # BAT_REPORTDIR does not exist
34 |         try:
35 |             os.makedirs(reportdir)
36 |         except Exception:
37 |             return
38 | 
39 |     maxsize = int(scanenv.get('BAT_IMAGE_MAXFILESIZE', sys.maxint))
40 |     # override file name, we won't use it much
41 |     filename = os.path.join(unpackreport['realpath'], unpackreport['name'])
42 |     filesize = os.stat(filename).st_size
43 |     if filesize > maxsize:
44 |         return
45 |     if not os.path.exists("%s/%s-hexdump.gz" % (reportdir, unpackreport['checksum'])):
46 |         p = subprocess.Popen(['hexdump', '-Cv', filename], stdout=subprocess.PIPE,
47 |                              stderr=subprocess.PIPE, close_fds=True)
48 |         (stanout, stanerr) = p.communicate()
49 |         if stanout != "":
50 |             gf = gzip.open("%s/%s-hexdump.gz" % (reportdir, unpackreport['checksum']), 'w')
51 |             gf.write(stanout)
52 |             gf.close()
53 | 


--------------------------------------------------------------------------------
/src/bat/images.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This is a plugin for the Binary Analysis Tool. It generates images of files, both
 9 | full files and thumbnails. The files can be used for informational purposes, such
10 | as detecting roughly where offsets can be found, if data is compressed or encrypted,
11 | etc.
12 | 
13 | This should be run as a postrun scan
14 | 
15 | Parameters for configuration file:
16 | 
17 | * BAT_IMAGE_MAXFILESIZE :: maximum size of the *source* file, to prevent
18 |   ridiculously large files from being turned into even ridiculously larger
19 |   pictures
20 | * BAT_IMAGEDIR :: location to where images should be written
21 | '''
22 | 
23 | import os
24 | import os.path
25 | import sys
26 | import subprocess
27 | from PIL import Image
28 | 
29 | def generateImages(filename, unpackreport, scantempdir, topleveldir, scanenv, cursor, conn, debug=False):
30 |     if not 'checksum' in unpackreport:
31 |         return
32 | 
33 |     imagedir = scanenv.get('BAT_IMAGEDIR', "%s/%s" % (topleveldir, "images"))
34 |     try:
35 |         os.stat(imagedir)
36 |     except:
37 |         # BAT_IMAGEDIR does not exist
38 |         try:
39 |             os.makedirs(imagedir)
40 |         except Exception, e:
41 |             return
42 | 
43 |     maxsize = int(scanenv.get('BAT_IMAGE_MAXFILESIZE', sys.maxint))
44 |     filesize = os.stat("%s/%s" % (scantempdir, filename)).st_size
45 |     if filesize > maxsize:
46 |         return
47 |     # this stuff is easily cached
48 |     if not os.path.exists("%s/%s.png" % (imagedir, unpackreport['checksum'])):
49 |         fwfile = open("%s/%s" % (scantempdir, filename))
50 | 
51 |         # this is very inefficient for large files, but we *really* need all the data :-(
52 |         fwdata = fwfile.read()
53 |         fwfile.close()
54 | 
55 |         fwlen = len(fwdata)
56 | 
57 |         if fwlen > 512:
58 |             height = 512
59 |         else:
60 |             height = fwlen
61 |         width = fwlen/height
62 | 
63 |         # we might need to add some bytes so we can create a valid picture
64 |         if fwlen%height > 0:
65 |             width = width + 1
66 |             for i in range(0, height - (fwlen%height)):
67 |                 fwdata = fwdata + chr(0)
68 | 
69 |         imgbuffer = buffer(bytearray(fwdata))
70 | 
71 |         im = Image.frombuffer("L", (height, width), imgbuffer, "raw", "L", 0, 1)
72 |         im.save("%s/%s.png" % (imagedir, unpackreport['checksum']))
73 |         #'''
74 |         if width > 100:
75 |             imthumb = im.thumbnail((height/4, width/4))
76 |             im.save("%s/%s-thumbnail.png" % (imagedir, unpackreport['checksum']))
77 |         #'''
78 | 


--------------------------------------------------------------------------------
/src/bat/piecharts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2012-2015 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This is a plugin for the Binary Analysis Tool. It generates images of results
 9 | of the ranking scan, like piecharts and version charts.
10 | 
11 | It is used by generateimages.py
12 | '''
13 | 
14 | import os
15 | import os.path
16 | import cPickle
17 | import matplotlib
18 | matplotlib.use('cairo')
19 | import pylab
20 | 
21 | def generateImages(picklefile, pickledir, filehash, imagedir, pietype):
22 | 
23 |     leaf_file = open(os.path.join(pickledir, picklefile), 'rb')
24 |     (piedata, pielabels) = cPickle.load(leaf_file)
25 |     leaf_file.close()
26 | 
27 |     pylab.figure(1, figsize=(6.5,6.5))
28 |     ax = pylab.axes([0.2, 0.15, 0.6, 0.6])
29 | 
30 |     pylab.pie(piedata, labels=pielabels)
31 | 
32 |     pylab.savefig(os.path.join(imagedir, '%s-%s.png' % (filehash, pietype)))
33 |     pylab.gcf().clear()
34 |     os.unlink(os.path.join(pickledir, picklefile))
35 | 


--------------------------------------------------------------------------------
/src/bat/prunefiles.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2013-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | import os
 8 | import os.path
 9 | import sys
10 | 
11 | '''
12 | This method can be used to prune scans, by for example ignoring all graphics files
13 | '''
14 | 
15 | def prunefiles(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
16 |     if not "PRUNE_TAGS" in scanenv:
17 |         return
18 |     prunes = scanenv['PRUNE_TAGS']
19 |     prunetags = set(prunes.split(','))
20 | 
21 |     cleanpickles = False
22 |     if scanenv.get('PRUNE_FILEREPORT_CLEAN', 0) == '1':
23 |         cleanpickles = True
24 | 
25 |     cleanfiles = set()
26 |     for u in unpackreports.keys():
27 |         if set(unpackreports[u]['tags']).intersection(prunetags) != set():
28 |             if cleanpickles:
29 |                 filehash = unpackreports[u]['checksum']
30 |                 cleanfiles.add(filehash)
31 |             del unpackreports[u]
32 | 
33 |     for filehash in cleanfiles:
34 |         try:
35 |             os.unlink(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash))
36 |         except Exception, e:
37 |             print >>sys.stderr, "error removing", filehash, e
38 |             sys.stderr.flush()
39 | 


--------------------------------------------------------------------------------
/src/bat/renamefiles.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2015-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | import shutil
  8 | import os.path
  9 | import copy
 10 | 
 11 | '''
 12 | This aggregate scan traverses the unpackreports an tries to rename certain files based on properties of
 13 | unpacked files. For example:
 14 | 
 15 | * if a file is carved out of a larger file that contains a Linux kernel,
 16 |   rename it to something like "unpacked-linux-kernel"
 17 | * if a gzip CPIO archive is extracted from a Linux kernel and contains
 18 |   files/directories, like /root or /dev it is likely an initramfs
 19 | '''
 20 | 
 21 | def renamefiles(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
 22 |     # only focus on initramfs that is also compressed for now
 23 |     kernelfiles = set()
 24 |     # known compressions for initramfs
 25 |     initramfscompressions = ['gzip']
 26 |     for r in unpackreports.keys():
 27 |         if 'checksum' in unpackreports[r]:
 28 |             if 'linuxkernel' in unpackreports[r]['tags']:
 29 |                 if 'modulekernelversion' in unpackreports[r]['tags']:
 30 |                     continue
 31 |                 if 'duplicate' in unpackreports[r]['tags']:
 32 |                     continue
 33 |                 kernelfiles.add(r)
 34 | 
 35 |     if 'TEMPLATE' in scanenv:
 36 |         template = scanenv['TEMPLATE']
 37 |         if template is not None:
 38 |             templatecutoff = template.find('%')
 39 |             template = template[:templatecutoff]
 40 | 
 41 |     cpiotemplate = "initramfs"
 42 |     for r in kernelfiles:
 43 |         if unpackreports[r]['scans'] != []:
 44 |             counter = 0
 45 |             for s in unpackreports[r]['scans']:
 46 |                 if len(s['scanreports']) != 1:
 47 |                     counter += 1
 48 |                     continue
 49 |                 renamefiles = set()
 50 |                 origcpio = ''
 51 |                 targetcpio = ''
 52 |                 process = False
 53 |                 if s['scanname'] in initramfscompressions:
 54 |                     unpackfile = s['scanreports'][0]
 55 |                     if unpackreports[unpackfile]['name'].startswith('tmp'):
 56 |                         process = True
 57 |                     else:
 58 |                         if template is not None:
 59 |                             if unpackreports[unpackfile]['name'].startswith(template):
 60 |                                 process = True
 61 |                     if not process:
 62 |                         counter += 1
 63 |                         continue
 64 |                     if unpackreports[unpackfile]['scans'] != []:
 65 |                         if len(unpackreports[unpackfile]['scans']) != 1:
 66 |                             counter += 1
 67 |                             continue
 68 |                         if unpackreports[unpackfile]['scans'][0]['scanname'] == 'cpio':
 69 |                             # it is an initramfs, so it is possible to rename the file
 70 |                             # Rename on disk:
 71 |                             # 1. file
 72 |                             # 2. unpacking directory
 73 |                             # Then rename in unpackreports
 74 |                             # 1. original file
 75 |                             # 2. any paths in scanreports (path, realpath)
 76 |                             # 3. references in parent file
 77 |                             origname = os.path.join(unpackreports[unpackfile]['realpath'], unpackreports[unpackfile]['name'])
 78 |                             targetname = os.path.join(unpackreports[unpackfile]['realpath'], cpiotemplate)
 79 |                             if not os.path.exists(targetname):
 80 |                                 # on disk
 81 |                                 shutil.move(origname, targetname)
 82 |                                 if not "duplicate" in unpackreports[unpackfile]['tags']:
 83 |                                     origcpio = "%s-cpio-1" % origname
 84 |                                     targetcpio = "%s-cpio-1" % targetname
 85 |                                     shutil.move(origcpio, targetcpio)
 86 |                                 # in unpackreports
 87 |                                 unpackreports[unpackfile]['name'] = cpiotemplate
 88 |                                 newunpackreportsname = os.path.join(os.path.dirname(unpackfile), cpiotemplate)
 89 | 
 90 |                                 unpackreports[r]['scans'][counter]['scanreports'][0] = newunpackreportsname
 91 |                                 renamefiles.add(unpackfile)
 92 | 
 93 |                 while len(renamefiles) != 0:
 94 |                     newrenamefiles = set()
 95 |                     for re in renamefiles:
 96 |                         origcpio = '/%s' % os.path.basename(origcpio)
 97 |                         targetcpio = '/%s' % os.path.basename(targetcpio)
 98 |                         newr = re.replace(origcpio, targetcpio)
 99 | 
100 |                         realpath = copy.deepcopy(unpackreports[re]['realpath'])
101 |                         newrealpath = realpath.replace(origcpio, targetcpio)
102 |                         unpackreports[re]['realpath'] = newrealpath
103 |                         # recurse into files, if any
104 |                         if 'scans' in unpackreports[re]:
105 |                             for sc in unpackreports[re]['scans']:
106 |                                 if 'scanreports' in sc:
107 |                                     newrenamefiles.update(sc['scanreports'])
108 |                                     newscanreports = []
109 |                                     for scr in sc['scanreports']:
110 |                                         newscanreports.append(scr.replace(origcpio, targetcpio))
111 |                                         sc['scanreports'] = newscanreports
112 | 
113 |                         # then rename and delete the old value
114 |                         unpackreports[newr] = copy.deepcopy(unpackreports[re])
115 |                         del unpackreports[re]
116 |                     renamefiles = newrenamefiles
117 |                 counter += 1
118 | 


--------------------------------------------------------------------------------
/src/bat/reportcopyright.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This plugin for BAT looks at the extracted identifiers and looks at if there
 9 | is some sort of copyright notice in an extracted identifier. This might not
10 | work well in the case of multiline copyright notices.
11 | '''
12 | 
13 | import os
14 | import os.path
15 | import sys
16 | import subprocess
17 | import copy
18 | import cPickle
19 | import multiprocessing
20 | 
21 | 
22 | def reportcopyright(unpackreports, scantempdir, topleveldir, processors, scanenv, batcursors, batcons, scandebug=False, unpacktempdir=None):
23 |     for i in unpackreports:
24 |         if not 'checksum' in unpackreports[i]:
25 |             continue
26 |         filehash = unpackreports[i]['checksum']
27 |         if not os.path.exists(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash)):
28 |             continue
29 |         if not 'identifier' in unpackreports[i]['tags']:
30 |             continue
31 | 
32 |         # read pickle file
33 |         leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'rb')
34 |         leafreports = cPickle.load(leaf_file)
35 |         leaf_file.close()
36 | 
37 |         writeback = False
38 |         strs = leafreports['identifier']['strings']
39 |         copyrights = []
40 |         for line in strs:
41 |             if 'copyright' in line.lower():
42 |                 writeback = True
43 |                 copyrights.append(line)
44 |                 continue
45 |             if '(c)' in line.lower():
46 |                 writeback = True
47 |                 copyrights.append(line)
48 |         if writeback:
49 |             unpackreports[i]['tags'].append('copyright')
50 |             leafreports['tags'].append('copyright')
51 |             leafreports['copyrights'] = copyrights
52 | 
53 |             leaf_file = open(os.path.join(topleveldir, "filereports", "%s-filereport.pickle" % filehash), 'wb')
54 |             leafreports = cPickle.dump(leafreports, leaf_file)
55 |             leaf_file.close()
56 | 


--------------------------------------------------------------------------------
/src/bat/unpackrpm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2009-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This module contains only code specific to RPM unpacking. This is so it can be
  9 | disabled on systems that don't have the Python RPM bindings installed.
 10 | '''
 11 | 
 12 | import os
 13 | import os.path
 14 | import struct
 15 | import subprocess
 16 | import tempfile
 17 | import rpm
 18 | import extractor
 19 | import fwunpack
 20 | 
 21 | # RPM is basically a header, plus some compressed files, so we might get
 22 | # duplicates at the moment. We can defeat this easily by setting the blacklist
 23 | # upperbound to the start of compression + 1. This is ugly and should actually
 24 | # be fixed.
 25 | def searchUnpackRPM(filename, tempdir=None, blacklist=[], offsets={}, scanenv={}, debug=False):
 26 |     hints = {}
 27 |     if 'rpm' not in offsets:
 28 |         return ([], blacklist, [], hints)
 29 |     if offsets['rpm'] == []:
 30 |         return ([], blacklist, [], hints)
 31 | 
 32 |     # sanity checks for payload compressors before even trying to process headers
 33 |     compressorfound = False
 34 |     compressors = ['gzip', 'xz', 'bz2', 'lzip']
 35 |     for compressor in compressors:
 36 |         if compressor in offsets:
 37 |             compressorfound = True
 38 |             break
 39 | 
 40 |     if not compressorfound:
 41 |         return ([], blacklist, [], hints)
 42 | 
 43 |     offsetsfound = False
 44 |     for compressor in compressors:
 45 |         if offsets[compressor] != []:
 46 |             offsetsfound = True
 47 |             break
 48 | 
 49 |     if not offsetsfound:
 50 |         return ([], blacklist, [], hints)
 51 | 
 52 |     diroffsets = []
 53 |     rpmcounter = 1
 54 |     for offset in offsets['rpm']:
 55 |         blacklistoffset = extractor.inblacklist(offset, blacklist)
 56 |         if blacklistoffset is not None:
 57 |             continue
 58 |         rpmfile = open(filename, 'rb')
 59 |         rpmfile.seek(offset+4)
 60 |         rpmversionbyte = rpmfile.read(1)
 61 |         rpmfile.close()
 62 |         rpmmajorversion = struct.unpack('<B', rpmversionbyte)[0]
 63 |         if rpmmajorversion > 3 or rpmmajorversion == 0:
 64 |             continue
 65 | 
 66 |         # now first check the header
 67 |         headervalid = False
 68 |         tset = rpm.TransactionSet()
 69 |         tset.setVSFlags(rpm._RPMVSF_NOSIGNATURES)
 70 |         sizeofheader = 0
 71 |         # search all compressors, sorted by prevalence
 72 |         for compressor in ['gzip', 'xz', 'bz2', 'lzip']:
 73 |             if not compressor in offsets:
 74 |                 continue
 75 |             for compressoroffset in offsets[compressor]:
 76 |                 if compressoroffset < offset:
 77 |                     continue
 78 |                 try:
 79 |                     tmprpm = tempfile.mkstemp()
 80 |                     rpmfile = open(filename, 'rb')
 81 |                     rpmfile.seek(offset)
 82 |                     rpmdata = rpmfile.read(compressoroffset - offset)
 83 |                     rpmfile.close()
 84 |                     os.write(tmprpm[0], rpmdata)
 85 |                     os.fsync(tmprpm[0])
 86 |                     os.close(tmprpm[0])
 87 |                     fdno = os.open(tmprpm[1], os.O_RDONLY)
 88 |                     header = tset.hdrFromFdno(fdno)
 89 |                     os.close(fdno)
 90 |                     os.unlink(tmprpm[1])
 91 |                     headervalid = True
 92 |                     sizeofheader = compressoroffset - offset
 93 |                     break
 94 |                 except:
 95 |                     if os.path.exists(tmprpm[1]):
 96 |                         os.close(fdno)
 97 |                         os.unlink(tmprpm[1])
 98 |             if headervalid:
 99 |                 break
100 | 
101 |         if not headervalid:
102 |             ## no valid header was found so continue with the next RPM file
103 |             continue
104 | 
105 |         # The RPM file format is heavily underdocumented, so scrape bits and pieces
106 |         # of docs from various sources.
107 |         # http://www.rpm.org/max-rpm/s1-rpm-file-format-rpm-file-format.html
108 |         # https://docs.fedoraproject.org/ro/Fedora_Draft_Documentation/0.1/html/RPM_Guide/ch-package-structure.html
109 | 
110 |         # payload format always has to be cpio
111 |         if header[rpm.RPMTAG_PAYLOADFORMAT] != 'cpio':
112 |             continue
113 | 
114 |         # possibly good statistic to have
115 |         #compressor = header[rpm.RPMTAG_PAYLOADCOMPRESSOR]
116 | 
117 |         # the size of the headers and payload, but not of the lead and any signatures
118 |         bl = header[rpm.RPMTAG_SIGSIZE]
119 |         filesize = os.stat(filename).st_size
120 | 
121 |         # after the header checks are done carve the possible RPM file from
122 |         # the bigger archive (right now just removing all leading bytes) and
123 |         # use rpm2cpio to unpack the RPM file.
124 |         tmpdir = fwunpack.dirsetup(tempdir, filename, "rpm", rpmcounter)
125 |         tmpfile = tempfile.mkstemp(dir=tmpdir)
126 |         os.fdopen(tmpfile[0]).close()
127 | 
128 |         fwunpack.unpackFile(filename, offset, tmpfile[1], tmpdir)
129 | 
130 |         # first use rpm2cpio to unpack the rpm data
131 |         p = subprocess.Popen(['rpm2cpio', tmpfile[1]], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
132 |         (stanout, stanerr) = p.communicate()
133 |         if len(stanout) != 0:
134 |             # cleanup first
135 |             os.unlink(tmpfile[1])
136 |             if tempdir is None:
137 |                 os.rmdir(tmpdir)
138 |             # then use unpackCpio() to unpack the RPM
139 |             res = fwunpack.unpackCpio(stanout, tmpdir)
140 |         else:
141 |             os.unlink(tmpfile[1])
142 |             if tempdir is None:
143 |                 os.rmdir(tmpdir)
144 | 
145 |         if res is not None:
146 |             rpmcounter = rpmcounter + 1
147 |             try:
148 |                 # this header describes the size of headers +
149 |                 # compressed payload size. It might be a few bytes off
150 |                 # with the actual size of the file.
151 |                 bl = header[rpm.RPMTAG_SIGSIZE]
152 |                 filesize = os.stat(filename).st_size
153 |                 # sanity check. It should not happen with a properly
154 |                 # formatted RPM file, but you never know.
155 |                 if bl > filesize:
156 |                     bl = payloadoffset + 1
157 |             except:
158 |                 bl = payloadoffset + 1
159 |             diroffsets.append((res, offset, bl))
160 |             blacklist.append((offset, bl))
161 |         else:
162 |             # cleanup
163 |             os.rmdir(tmpdir)
164 |     return (diroffsets, blacklist, [], hints)
165 | 


--------------------------------------------------------------------------------
/src/busybox-walk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2009-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This program can be used to walk a directory tree and report the names
 9 | of the applets that symlink to BusyBox. While not accurate (symlinks could
10 | have been removed) it might come in handy as an extra tool.
11 | '''
12 | 
13 | import os
14 | import sys sys
15 | from optparse import OptionParser
16 | 
17 | def busyboxWalk(busyboxdir):
18 |     busybox_applets = []
19 | 
20 |     osgen = os.walk(busyboxdir)
21 | 
22 |     try:
23 |         while True:
24 |             i = osgen.next()
25 |             for p in i[2]:
26 |                 if os.path.basename(os.path.realpath(os.path.join(i[0], p))) == 'busybox':
27 |                     busybox_applets.append(p)
28 |     except StopIteration:
29 |         pass
30 | 
31 |     busybox_applets.sort()
32 |     return busybox_applets
33 | 
34 | def main(argv):
35 |     parser = OptionParser()
36 |     parser.add_option("-d", "--directory", dest="bd", help="directory", metavar="DIR")
37 |     (options, args) = parser.parse_args()
38 |     if options.bd == None:
39 |         parser.error("Path to top level directory of unpacked firmware needed")
40 |     applets = busyboxWalk(options.bd)
41 |     if applets != []:
42 |         print "The following applets were found as symlinks:"
43 |         for a in applets:
44 |             if a != 'busybox':
45 |                 print "* %s" % a
46 | 
47 | if __name__ == "__main__":
48 |         main(sys.argv)
49 | 


--------------------------------------------------------------------------------
/src/crawlers/README:
--------------------------------------------------------------------------------
 1 | This is a directory that contains several crawlers for initializing and updating a database.
 2 | 
 3 | We need crawlers for:
 4 | 
 5 | * ftp.gnu.org and mirrors
 6 | * ftp.kernel.org and mirrors (for non-kernel tools)
 7 | * sourceforge.net
 8 | * savannah
 9 | * apache (at least the C/C++ based projects)
10 | 


--------------------------------------------------------------------------------
/src/crawlers/crawling-php:
--------------------------------------------------------------------------------
 1 | Crawling PHP's PEAR and PECL repositories
 2 | 
 3 | PHP has an extension mechanism with which packages can be downloaded and
 4 | installed, called 'pear'. Various 'channels' (or repositories) can be
 5 | configured, like PEAR, PECL and more.
 6 | 
 7 | The packages are downloaded, built (if needed) and installed on the system.
 8 | The pear command also has an option to download packages. To download all
 9 | packages there is a convenient 'download-all' option.
10 | 
11 | For PEAR use:
12 | 
13 | $ pear download-all
14 | 
15 | For PECL use:
16 | 
17 | $ pecl download-all
18 | 
19 | By default only packages marked as 'stable' will be downloaded. To download
20 | other packages you can set the preferred_state, for example:
21 | 
22 | $ pear config-set preferred_state alpha
23 | $ pear config-set preferred_state beta
24 | $ pear config-set preferred_state devel
25 | 
26 | Please note: not all distributions build PHP with the pear command (for example
27 | Fedora does not) because it means that some packages will be installed outside
28 | the control of the system package manager which can lead to a sysadmin
29 | nightmare. You might need to build and install PHP as a normal user somewhere
30 | before being able to run the commands mentioned above.
31 | 


--------------------------------------------------------------------------------
/src/crawlers/gnu-config:
--------------------------------------------------------------------------------
1 | ## change this to a local mirror!
2 | [hostconfig]
3 | protocol = ftp
4 | url      = ftp.nluug.nl/pub/gnu
5 | storedir = /tmp/gpl
6 | 


--------------------------------------------------------------------------------
/src/debian/compat:
--------------------------------------------------------------------------------
1 | 9
2 | 


--------------------------------------------------------------------------------
/src/debian/control:
--------------------------------------------------------------------------------
 1 | Source: bat
 2 | Section: misc
 3 | Priority: extra
 4 | Maintainer: Armijn Hemel <armijn@binaryanalysis.org>
 5 | Build-Depends: debhelper (>= 7.0.50~), python (>= 2.7)
 6 | Standards-Version: 3.9.8
 7 | Homepage: http://www.binaryanalysis.org/
 8 | 
 9 | Package: bat
10 | Architecture: all
11 | Depends: python-support (>= 0.90), python (>= 2.7), python-magic, binutils, e2tools, squashfs-tools, xz-utils, zip, unrar, cabextract, unshield, p7zip, p7zip-full, cpio, bzip2, mtd-utils, lzip, lzop, arj, icoutils, gettext, rpm, python-rpm, bat-extratools (>= 27.0), poppler-utils, upx-ucl, libxml2-utils, netpbm, lrzip, ncompress, python-imaging, vorbis-tools, ctags, python-matplotlib, unzip, python-pydot, bsdiff, python-reportlab, fonts-liberation, clamav, john, python-psycopg2, openssl
12 | Description: Modular framework to assist auditing binary files
13 |  The Binary Analysis Tool is a modular framework that assists with auditing
14 |  the contents of compiled software. It makes it easier and cheaper to look
15 |  inside technology, and this helps compliance and due diligence activities.
16 |  .
17 |  The tool is freely available to everyone. The community can use it and
18 |  participate in further development, and work together to help reduce errors
19 |  when shipping devices or products containing Free and Open Source Software.
20 | 
21 | ## these are apparently always provided, so they don't need to be explicitely
22 | ## defined as a dependency. Personally I think this is stupid and all
23 | ## dependencies should be listed, but hey, anything to keep lintian happy!
24 | # Depends: e2fsprogs, coreutils, gzip, tar
25 | 


--------------------------------------------------------------------------------
/src/debian/copyright:
--------------------------------------------------------------------------------
 1 | This work was packaged for Debian by:
 2 | 
 3 |     Armijn Hemel <armijn@binaryanalysis.org> on Wed, 12 Mar 2014 19:20:43 +0100
 4 | 
 5 | It was downloaded from:
 6 | 
 7 |     <http://www.binaryanalysis.org>
 8 | 
 9 | Upstream Author:
10 | 
11 |     Armijn Hemel <armijn@binaryanalysis.org>
12 | 
13 | Copyright:
14 | 
15 |     <Copyright (C) 2010-2015 Armijn Hemel>
16 | 
17 | License:
18 | 
19 |    Licensed under the Apache License, Version 2.0 (the "License");
20 |    you may not use this file except in compliance with the License.
21 |    You may obtain a copy of the License at
22 | 
23 |        http://www.apache.org/licenses/LICENSE-2.0
24 | 
25 |    Unless required by applicable law or agreed to in writing, software
26 |    distributed under the License is distributed on an "AS IS" BASIS,
27 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 |    See the License for the specific language governing permissions and
29 |    limitations under the License.
30 | 
31 | On Debian systems, the complete text of the Apache version 2.0 license
32 | can be found in "/usr/share/common-licenses/Apache-2.0".
33 | 
34 | The Debian packaging is:
35 | 
36 |     Copyright (C) 2010-2015 Armijn Hemel <armijn@binaryanalysis.org>
37 | 


--------------------------------------------------------------------------------
/src/debian/files:
--------------------------------------------------------------------------------
1 | bat_5.0-1_i386.deb unknown extra
2 | 


--------------------------------------------------------------------------------
/src/debian/pyversions:
--------------------------------------------------------------------------------
1 | 2.5-
2 | 


--------------------------------------------------------------------------------
/src/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | # -*- makefile -*-
 3 | # Sample debian/rules that uses debhelper.
 4 | # This file was originally written by Joey Hess and Craig Small.
 5 | # As a special exception, when this file is copied by dh-make into a
 6 | # dh-make output file, you may use that output file without restriction.
 7 | # This special exception was added by Craig Small in version 0.37 of dh-make.
 8 | 
 9 | # Uncomment this to turn on verbose mode.
10 | #export DH_VERBOSE=1
11 | 
12 | %:
13 | 	dh $@ 
14 | 


--------------------------------------------------------------------------------
/src/extractkernelstrings.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | ## Binary Analysis Tool
 4 | ## Copyright 2010-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | ## Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | import sys, os, string, re
 8 | from optparse import OptionParser
 9 | import sqlite3
10 | from bat import extractor
11 | 
12 | ## TODO: replace by generic code from ranking.py
13 | 
14 | ## some strings we are interested in can't be extracted using xgettext.
15 | ## We use a few regular expressions for them to extract them. Since there
16 | ## macros being introduced (and removed) from the kernel sources regularly
17 | ## we should try and keep this list up to date.
18 | exprs = []
19 | 
20 | bugtrapexpr = re.compile("BUG_TRAP\s*\(([\w\s\.:<>\-+=~!@#$^%&*\[\]{}+?|/,'\(\)\\\]+)\);", re.MULTILINE)
21 | 


--------------------------------------------------------------------------------
/src/knowledgebaseadd.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | ## Binary Analysis Tool
 4 | ## Copyright 2009-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | ## Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This script can be used to add firmware data to an existing knowledgebase
 9 | '''
10 | 
11 | import os, sys, sqlite3, hashlib
12 | from optparse import OptionParser
13 | 
14 | def gethash(path):
15 |         scanfile = open("%s" % (path,), 'r')
16 |         h = hashlib.new('sha256')
17 |         h.update(scanfile.read())
18 |         scanfile.close()
19 |         return h.hexdigest()
20 | 
21 | def main(argv):
22 |         parser = OptionParser()
23 | 	parser.add_option("-b", "--binary", dest="binary", help="path to binary", metavar="FILE")
24 | 	parser.add_option("-d", "--database", dest="db", help="path to database", metavar="FILE")
25 | 	parser.add_option("-c", "--chipset", dest="chipset", help="name of chipset", metavar="CHIPSET")
26 | 	parser.add_option("-f", "--firmwareversion", dest="fwversion", help="firmware version", metavar="FWVERSION")
27 | 	parser.add_option("-m", "--manufacturer", dest="vendor", help="name of manufacturer", metavar="MANUFACTURER")
28 | 	parser.add_option("-n", "--name", dest="name", help="name of device", metavar="NAME")
29 | 	parser.add_option("-u", "--upstream", dest="upstream", help="upstream vendor (optional)", metavar="UPSTREAM")
30 | 	parser.add_option("-w", "--hardwareversion", dest="hwversion", help="hardware version", metavar="HWVERSION")
31 | 	(options, args) = parser.parse_args()
32 | 
33 | 	if options.db == None:
34 |                 parser.error("Path to database file needed")
35 |         try:
36 |                 conn = sqlite3.connect(options.db)
37 |         except:
38 |                 print "Can't open database file"
39 |                 sys.exit(1)
40 | 
41 | 	if options.chipset == None:
42 | 		parser.error("Need name of chipset")
43 | 	if options.fwversion == None:
44 | 		parser.error("Need firmware version")
45 | 	if options.hwversion == None:
46 | 		parser.error("Need hardware version")
47 | 	if options.vendor == None:
48 | 		parser.error("Need manufacturer name")
49 | 	if options.name == None:
50 | 		parser.error("Need device name")
51 | 	if options.upstream == None:
52 | 		options.upstream = ''
53 | 	if options.binary == None:
54 | 		parser.error("Need path to binary")
55 | 	else:
56 | 		try:
57 | 			os.stat(options.binary)
58 | 		except:
59 | 			print >>sys.stderr, "Can't open binary"
60 | 			sys.exit(1)
61 | 
62 | 	c = conn.cursor()
63 | 
64 | 	c.execute('''create table if not exists device (id integer primary key autoincrement, vendor text, name text, version text, chipset text, upstream text)''')
65 | 	c.execute('''create table if not exists binary (id integer primary key autoincrement, sha256 text, deviceid integer)''')
66 | 	c.execute('''create index if not exists sha256_index on binary (sha256)''')
67 | 
68 | 	t = (options.vendor, options.name, options.hwversion, options.chipset, options.upstream)
69 | 	c.execute('''insert into device(vendor, name, version,chipset, upstream) values (?, ?, ?, ?, ?)''', t)
70 | 	conn.commit()
71 | 	lastrow = c.lastrowid
72 | 
73 | 	fwhash = gethash(options.binary)
74 | 	c.execute('''insert into binary (sha256, deviceid) values (?, ?)''', (fwhash, lastrow))
75 | 	conn.commit()
76 | 	c.close()
77 | 
78 | if __name__ == "__main__":
79 |         main(sys.argv)
80 | 


--------------------------------------------------------------------------------
/src/knowledgebaseaddchipset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | ## Binary Analysis Tool
 4 | ## Copyright 2009-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | ## Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This script can be used to add chipset data to an existing knowledgebase
 9 | '''
10 | 
11 | import os, sys, sqlite3
12 | from optparse import OptionParser
13 | 
14 | def main(argv):
15 |         parser = OptionParser()
16 | 	parser.add_option("-d", "--database", dest="db", help="path to database", metavar="FILE")
17 | 	parser.add_option("-c", "--chipset", dest="chipset", help="name of chipset", metavar="CHIPSET")
18 | 	parser.add_option("-a", "--architecture", dest="architecture", help="chipset architecture (MIPS, ARM, etc.)", metavar="ARCHITECTURE")
19 | 	parser.add_option("-m", "--manufacturer", dest="manufacturer", help="chipset manufacturer", metavar="MANUFACTURER")
20 | 	(options, args) = parser.parse_args()
21 | 
22 | 	if options.db == None:
23 |                 parser.error("Path to database file needed")
24 |         try:
25 |                 conn = sqlite3.connect(options.db)
26 |         except:
27 |                 print "Can't open database file"
28 |                 sys.exit(1)
29 | 
30 | 	if options.chipset == None:
31 |                 parser.error("Need name of chipset")
32 | 	if options.manufacturer == None:
33 |                 parser.error("Need name of manufacturer")
34 | 	if options.architecture == None:
35 |                 parser.error("Need name of architecture")
36 | 
37 | 	c = conn.cursor()
38 | 
39 | 	## insert some test data
40 | 	## chipset information from http://wiki.openwrt.org/toh/start
41 | 	c.execute('''insert into chipset values (?, ?, ?)''', (options.chipset, options.manufacturer, options.architecture))
42 | 	#c.execute('''insert into chipset values ('AR7', 'Texas Instruments', 'MIPS')''')
43 | 	#c.execute('''insert into chipset values ('BCM6851', 'Broadcom', 'MIPS')''')
44 | 	#c.execute('''insert into chipset values ('BCM4712', 'Broadcom', 'MIPS')''')
45 | 	conn.commit()
46 | 	c.close()
47 | 
48 | if __name__ == "__main__":
49 |         main(sys.argv)
50 | 


--------------------------------------------------------------------------------
/src/knowledgebaseinit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | ## Binary Analysis Tool
 4 | ## Copyright 2009-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | ## Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This script can be used to initialize an empty knowledgebase and create all tables.
 9 | '''
10 | 
11 | import os, sys, sqlite3
12 | from optparse import OptionParser
13 | 
14 | def main(argv):
15 |         parser = OptionParser()
16 | 	parser.add_option("-d", "--database", dest="db", help="path to database", metavar="FILE")
17 | 	(options, args) = parser.parse_args()
18 | 	if options.db == None:
19 |                 parser.error("Path to database file needed")
20 |         try:
21 |                 conn = sqlite3.connect(options.db)
22 |         except:
23 |                 print "Can't open database file"
24 |                 sys.exit(1)
25 | 
26 | 	c = conn.cursor()
27 | 
28 | 	## create some tables
29 | 	c.execute('''create table if not exists chipset (name text, vendor text, family text)''')
30 | 	c.execute('''create table if not exists device (id integer primary key autoincrement, vendor text, name text, version text, chipset text, upstream text)''')
31 | 	c.execute('''create table if not exists binary (id integer primary key autoincrement, sha256 text, deviceid integer)''')
32 | 	c.execute('''create index if not exists sha256_index on binary (sha256)''')
33 | 
34 | 	conn.commit()
35 | 	c.close()
36 | 
37 | if __name__ == "__main__":
38 |         main(sys.argv)
39 | 


--------------------------------------------------------------------------------
/src/maintenance/batextensions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Binary Analysis Tool
 5 | # Copyright 2009-2015 Armijn Hemel for Tjaldur Software Governance Solutions
 6 | # Licensed under Apache 2.0, see LICENSE file for details
 7 | 
 8 | # list of extensions, plus what language they should be mapped to
 9 | # This is not necessarily correct, but right now it suffices. Ideally a parser
10 | # would be run on each file to see what kind of file it is.
11 | extensions = {'.c'      : 'C',
12 |               '.cc'     : 'C',
13 |               '.cpp'    : 'C',
14 |               '.cxx'    : 'C',
15 |               '.c++'    : 'C',
16 |               '.h'      : 'C',
17 |               '.hh'     : 'C',
18 |               '.hpp'    : 'C',
19 |               '.hxx'    : 'C',
20 |               '.l'      : 'C',
21 |               '.qml'    : 'C',
22 |               '.s'      : 'C',
23 |               '.txx'    : 'C',
24 |               '.y'      : 'C',
25 |               '.dts'    : 'C', # specific to Linux kernel
26 |               '.dtsi'   : 'C', # specific to Linux kernel
27 |               '.cs'     : 'C#',
28 |               '.groovy' : 'Java',
29 |               '.java'   : 'Java',
30 |               '.jsp'    : 'Java',
31 |               '.scala'  : 'Java',
32 |               '.as'     : 'ActionScript',
33 |               '.js'     : 'JavaScript',
34 |               '.php'    : 'PHP',
35 |               '.py'     : 'Python',
36 |               '.rb'     : 'Ruby',
37 |               '.patch'  : 'patch',
38 |               '.diff'   : 'patch',
39 |              }
40 | 


--------------------------------------------------------------------------------
/src/maintenance/busybox-appletname-extractor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2009-2012 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | #
 8 | # Helper script to extract configurations from busybox source code.
 9 | # Results are dumped as a pickle file, which can later be used by the
10 | # BusyBox processing scripts to map applet names back to configuration
11 | # directives. This is useful when comparing with a supplied configuration
12 | # file to see if these match.
13 | #
14 | 
15 | # For newer BusyBox versions you first need to generate applets.h
16 | # First unpack the archive, go to the root of the unpacked archive and run:
17 | # ./scripts/gen_build_files.sh . .
18 | 
19 | import sys, os, re, pickle
20 | from optparse import OptionParser
21 | 
22 | def extract_major_version(version):
23 |     return version.rsplit('.', version.count('.')-1)[0]
24 | 
25 | # configs format:
26 | # {symbolic link name: (appletname, config option)}
27 | # Example:
28 | # {'sha1sum': ('md5_sha1_sum', 'SHA1SUM')}
29 | #
30 | def extract_configuration(lines, version):
31 |     configs = {}
32 |     if version >= "1.1.1":
33 |         if extract_major_version(version) >= "1.15":
34 |             prefix = "IF_"
35 |         else:
36 |             prefix = "USE_"
37 |         for line in lines:
38 |             configname = re.match("%s([\w_]*)\(APPLET_\w+\(([\w\.\-_\[]+),\s*([\w\.\-_]*)" % (prefix,), line.strip())
39 |             if configname != None:
40 |                 configs[configname.groups()[1]] = (configname.groups()[2], configname.groups()[0])
41 |             else:
42 |                 configname = re.match("%s([\w_]*)\(APPLET\(([\w\.\-_\[]+)" % (prefix,), line.strip())
43 |                 if configname != None:
44 |                     configs[configname.groups()[1]] = (configname.groups()[1], configname.groups()[0])
45 |     else:
46 |         if version < "1.00":
47 |             prefix = "BB"
48 |         else:
49 |             prefix = "CONFIG"
50 |         for line in range(0,len(lines) -1):
51 |             config = re.match("#ifdef %s\_([\_\w]+)" % (prefix,), lines[line].strip())
52 |             if config == None:
53 |                 config = re.match("#if ENABLE\_([\_\w]+)", lines[line].strip())
54 |                 if config == None:
55 |                     config = re.match("#if BB\_APPLET\_([\_\w]+)", lines[line].strip())
56 |                     if config == None:
57 |                         config = re.match("#if defined\(%s\_(FEATURE\_[\_\w]+)\)" % (prefix,), lines[line].strip())
58 |                         if config == None:
59 |                             continue
60 |             configname = re.match("APPLET\(([\w\.\-\_\[]+), ([\w\_]+),", lines[line+1].strip())
61 |             if configname == None:
62 |                 configname = re.match("APPLET_(?:NOUSAGE|ODDNAME)\(\"([\w\.\-\_\[]+)\", ([\w\_]+),", lines[line+1].strip())
63 |                 if configname != None:
64 |                     # remove _main from the name of the applet, assuming it is
65 |                     # the same as the name of the applet
66 |                     configs[configname.groups()[0]] = (configname.groups()[1][:-5], config.groups()[0])
67 |             else:
68 |                 # remove _main from the name of the applet, assuming it is
69 |                 # the same as the name of the applet
70 |                 configs[configname.groups()[0]] = (configname.groups()[1][:-5], config.groups()[0])
71 |     return configs
72 | 
73 | def main(argv):
74 |     parser = OptionParser()
75 |     parser.add_option("-a", "--applets", action="store", dest="applets", help="path to applets.h", metavar="FILE")
76 |     parser.add_option("-n", "--busyboxversion", action="store", dest="busyboxversion", help="BusyBox version", metavar="VERSION")
77 |     (options, args) = parser.parse_args()
78 |     if options.applets == None:
79 |         parser.error("Path to applets.h in BusyBox directory needed")
80 |     if options.busyboxversion == None:
81 |         parser.error("BusyBox version needed")
82 | 
83 |     busybox_applets = open(options.applets, 'rb')
84 |     busybox_lines = busybox_applets.readlines()
85 |     version = options.busyboxversion
86 |     bb_configuration = extract_configuration(busybox_lines, version)
87 |     if bb_configuration != []:
88 |         output = open('%s-config' % (version, ), 'w')
89 |         pickle.dump(bb_configuration, output)
90 |         output.close()
91 | 
92 | if __name__ == "__main__":
93 |     main(sys.argv)
94 | 


--------------------------------------------------------------------------------
/src/maintenance/clonedbinit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This script can be used to initialize the clone database. The clone database is
 9 | used to record information about which packages should be treated as the same
10 | package, or under which alternative names it is known, and so on.
11 | 
12 | Cloning of packages happens frequently:
13 | 
14 | * renaming: packages are renamed for some reason, like politics
15 | * bundling: one package is copied entirely into another package, for example
16 |   as "third party software"
17 | * partial copying: parts of a package have been copied into another package.
18 |   Examples are glue code for some Python packages, where a large amount of
19 |   packages share just one file
20 | 
21 | Apart from verbatim cloning there is also cloning that happens in a more subtle
22 | way. For example, code was copied, then slightly adapted. It might not be the
23 | same when looking at SHA256 checksums of the files, but it might still look
24 | the same when looking at strings or function names.
25 | '''
26 | 
27 | import os
28 | import sys
29 | import sqlite3
30 | from optparse import OptionParser
31 | 
32 | def main(argv):
33 |     parser = OptionParser()
34 |     parser.add_option("-d", "--database", dest="db", help="path to clone database", metavar="FILE")
35 |     (options, args) = parser.parse_args()
36 |     if options.db == None:
37 |         parser.error("Path to clone database file needed")
38 |     try:
39 |         conn = sqlite3.connect(options.db)
40 |     except:
41 |         print "Can't open clone database file"
42 |         sys.exit(1)
43 | 
44 |     c = conn.cursor()
45 | 
46 |     # create table for renamed packages
47 |     c.execute('''create table if not exists renames (originalname text, newname text)''')
48 |     c.execute('''create index if not exists renames_index_originalname on renames (originalname)''')
49 |     c.execute('''create index if not exists renames_index_newname on renames (newname)''')
50 | 
51 |     # insert some values as examples
52 |     c.execute('''insert into renames values ('ethereal', 'wireshark')''')
53 |     c.execute('''insert into renames values ('koffice', 'calligra')''')
54 |     c.execute('''insert into renames values ('ucd-snmp', 'net-snmp')''')
55 |     c.execute('''insert into renames values ('iproute', 'iproute2')''')
56 |     c.execute('''insert into renames values ('gaim', 'pidgin')''')
57 |     c.execute('''insert into renames values ('kdebase-runtime', 'kde-runtime')''')
58 |     c.execute('''insert into renames values ('kdebase-workspace', 'kde-workspace')''')
59 |     c.execute('''insert into renames values ('eglibc', 'glibc')''')
60 |     c.execute('''insert into renames values ('org.apache.servicemix.bundles.ant', 'apache-ant')''')
61 |     c.execute('''insert into renames values ('wengophone', 'qutecom')''')
62 |     c.execute('''insert into renames values ('gaim-plugin_pack', 'purple-plugin_pack')''')
63 |     
64 |     conn.commit()
65 |     c.close()
66 |     conn.close()
67 | 
68 | if __name__ == "__main__":
69 |     main(sys.argv)
70 | 


--------------------------------------------------------------------------------
/src/maintenance/copybatarchives.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2014-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | Script to copy BAT archive files efficiently. Takes three arguments:
  9 | 
 10 | 1. directory with 'original' archives (used to create the BAT archives)
 11 | 2. directory with BAT archives
 12 | 3. target directory where to copy BAT archives to
 13 | 
 14 | It is very important that 1. is the same directory as used to generate the BAT archives
 15 | '''
 16 | 
 17 | import sys
 18 | import os
 19 | import re
 20 | import subprocess
 21 | import shutil
 22 | import stat
 23 | from optparse import OptionParser
 24 | from multiprocessing import Pool
 25 | 
 26 | def main(argv):
 27 |     parser = OptionParser()
 28 |     parser.add_option("-a", "--archivedir", action="store", dest="archivedir", help="path to directory with BAT archives", metavar="DIR")
 29 |     parser.add_option("-o", "--origdir", action="store", dest="origdir", help="directory with original archives", metavar="DIR")
 30 |     parser.add_option("-t", "--targetdir", action="store", dest="targetdir", help="target directory", metavar="DIR")
 31 |     (options, args) = parser.parse_args()
 32 | 
 33 |     if options.archivedir == None:
 34 |         parser.error("specify archivedir")
 35 |     else:
 36 |         try:
 37 |             archivelist = open(os.path.join(options.archivedir,"ARCHIVELIST")).readlines()
 38 |         except:
 39 |             parser.error("'ARCHIVELIST' not found in file dir")
 40 |     if options.origdir == None:
 41 |         parser.error("specify origdir")
 42 |     else:
 43 |         try:
 44 |             filelist = open(os.path.join(options.origdir,"LIST")).readlines()
 45 |         except:
 46 |             parser.error("'LIST' not found in file dir")
 47 | 
 48 |     archives = os.listdir(options.archivedir)
 49 |     archivenames = set()
 50 |     for a in archives:
 51 |         asplits = a.rsplit('.', 2)
 52 |         if len(asplits) != 3:
 53 |             continue
 54 |         if asplits[2] != 'bz2':
 55 |             continue
 56 |         if asplits[1] != 'tar':
 57 |             continue
 58 |         if not 'bat' in asplits[0]:
 59 |             continue
 60 |         archivenames.add(a)
 61 |         
 62 |     if options.targetdir == None:
 63 |         parser.error("specify targetdir")
 64 |     else:
 65 |         if not os.path.exists(options.targetdir):
 66 |             parser.error("targetdir does not exist")
 67 | 
 68 |     copyfromarchives = set()
 69 |     copyfromorig = set()
 70 |     archivetometa = {}
 71 | 
 72 |     for unpackfile in filelist:
 73 |         try:
 74 |             unpacks = unpackfile.strip().split()
 75 |             if len(unpacks) == 4:
 76 |                 (package, version, filename, origin) = unpacks
 77 |                 if '%s-%s-%s-bat.tar.bz2' % (package, version, origin) in archivenames:
 78 |                     copyfromarchives.add('%s-%s-%s-bat.tar.bz2' % (package, version, origin))
 79 |                     archivetometa['%s-%s-%s-bat.tar.bz2' % (package, version, origin)] = (version, origin)
 80 |                 else:
 81 |                     copyfromorig.add(filename)
 82 |         except:
 83 |             pass
 84 | 
 85 |     print "copying %d archives" % len(copyfromarchives)
 86 |     for i in copyfromarchives:
 87 |         shutil.copy(os.path.join(options.archivedir, i), options.targetdir)
 88 |     print "copying %d original files" % len(copyfromorig)
 89 |     for i in copyfromorig:
 90 |         shutil.copy(os.path.join(options.origdir, i), options.targetdir)
 91 |     print "copying manifests"
 92 |     if os.path.exists(os.path.join(options.origdir, 'MANIFESTS')):
 93 |         os.mkdir(os.path.join(options.targetdir, 'MANIFESTS'))
 94 |         manifests = os.listdir(os.path.join(options.origdir, 'MANIFESTS'))
 95 |         for i in manifests:
 96 |             shutil.copy(os.path.join(options.origdir, 'MANIFESTS', i), os.path.join(options.targetdir, 'MANIFESTS'))
 97 |     if os.path.exists(os.path.join(options.archivedir, 'MANIFESTS')):
 98 |         manifests = os.listdir(os.path.join(options.archivedir, 'MANIFESTS'))
 99 |         for i in manifests:
100 |             shutil.copy(os.path.join(options.archivedir, 'MANIFESTS', i), os.path.join(options.targetdir, 'MANIFESTS'))
101 |     print "copying checksums and downloadurl"
102 |     if os.path.exists(os.path.join(options.origdir, 'SHA256SUM')):
103 |         shutil.copy(os.path.join(options.origdir, 'SHA256SUM'), options.targetdir)
104 |     if os.path.exists(os.path.join(options.origdir, 'DOWNLOADURL')):
105 |         shutil.copy(os.path.join(options.origdir, 'DOWNLOADURL'), options.targetdir)
106 |     #if os.path.exists(os.path.join(options.origdir, 'SHA256SUM')):
107 |         #sha256sums = open(os.path.join(options.origdir, 'SHA256SUM')).readlines()
108 |     #if os.path.exists(os.path.join(options.archivedir, 'SHA256SUM')):
109 |         #sha256sums = open(os.path.join(options.archivedir, 'SHA256SUM')).readlines()
110 | 
111 |     print "writing LIST"
112 |     newlistfile = open(os.path.join(options.targetdir, "LIST"), 'wb')
113 |     # walk the original LIST file and write lines for the files for which there are no archives
114 |     for f in filelist:
115 |         unpacks = f.strip().split()
116 |         filename = unpacks[2]
117 |         if filename in copyfromorig:
118 |             newlistfile.write(f)
119 |     # then walk the list for archives
120 |     for f in archivelist:
121 |         archivename = f.strip()
122 |         if archivename in copyfromarchives:
123 |             (version, origin) = archivetometa[archivename]
124 |             newlistfile.write("%s\t%s\t%s\t%s\tbatarchive\n" % (archivename[:-12], version, archivename, origin))
125 |     newlistfile.close()
126 | 
127 | if __name__ == "__main__":
128 |     main(sys.argv)
129 | 


--------------------------------------------------------------------------------
/src/maintenance/createdb.config:
--------------------------------------------------------------------------------
 1 | [extractconfig]
 2 | configtype = global
 3 | 
 4 | ## first the database credentials
 5 | postgresql_user     = bat
 6 | postgresql_password = bat
 7 | postgresql_db       = bat
 8 | #postgresql_port     = 5432
 9 | #postgresql_host     = 127.0.0.1
10 | #
11 | ## first the database credentials
12 | #auth_postgresql_user     = bat
13 | #auth_postgresql_password = bat
14 | #auth_postgresql_db       = bat_old
15 | #auth_postgresql_port     = 5432
16 | #auth_postgresql_host     = 127.0.0.1
17 | #authcopy = string:function:variable
18 | #
19 | 
20 | ##
21 | scanlicense = yes
22 | scancopyright = yes
23 | scansecurity = yes
24 | cleanup = yes
25 | ## should the database be wiped?
26 | wipe = no
27 | ## directory where to unpack sources
28 | unpackdir = /ramdisk
29 | extrahashes = md5:sha1:crc32:tlsh
30 | nomoschunks = 10
31 | urlcutoff = 1000
32 | maxstringcutoff = 1000
33 | minstringcutoff = 4
34 | ## below are for for generatelistrpm.py
35 | patchesdir = /tmp/patches
36 | rpmdb = /tmp/rpmdb.sqlite3
37 | insecurerpm = yes
38 | ## cutoff is 200 MiB
39 | cutoff = 209715200
40 | 
41 | ## configuration for CVE parser
42 | [cveconfig]
43 | ## directory of where to store patches that are mentioned
44 | ## in a CVE report
45 | patchdir = /home/bat/cve/git
46 | 
47 | ## now follows per package configuration
48 | 
49 | [bash]
50 | configtype = package
51 | extensions = .def:C
52 | 
53 | [chromium]
54 | configtype = package
55 | blacklist = icudt46l_dat.S:icudt42l_dat.S:icudtl_dat.S:icudt42l_dat.s
56 | 
57 | [qt]
58 | configtype = package
59 | blacklist = icudt46l_dat.S:icudt42l_dat.S:icudtl_dat.S:icudt42l_dat.s
60 | 
61 | [freecad]
62 | configtype = package
63 | blacklist = Arch_rc.py
64 | 
65 | [linux]
66 | configtype = package
67 | alwaysscan = string:function
68 | 


--------------------------------------------------------------------------------
/src/maintenance/createfiledatabasedebian.py:
--------------------------------------------------------------------------------
  1 | #/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script mines data from Debian package databases (available on any Debian
  9 | mirror as Contents-$ARCH.gz) and puts it in another database.
 10 | '''
 11 | 
 12 | import os
 13 | import os.path
 14 | import sys
 15 | import psycopg2
 16 | import gzip
 17 | import ConfigParser
 18 | from optparse import OptionParser
 19 | 
 20 | def main(argv):
 21 |     config = ConfigParser.ConfigParser()
 22 |     parser = OptionParser()
 23 |     parser.add_option("-c", "--config", action="store", dest="cfg", help="path to configuration file", metavar="FILE")
 24 |     parser.add_option("-f", "--file", action="store", dest="contentsfile", help="path to file containing contents of Debian packages", metavar="FILE")
 25 | 
 26 |     (options, args) = parser.parse_args()
 27 |     if options.contentsfile == None:
 28 |         parser.error("Need path to Debian packages file")
 29 | 
 30 |     if not os.path.exists(options.contentsfile):
 31 |         print >>sys.stderr, "Debian packages file does not exist"
 32 |         sys.stderr.flush()
 33 |         sys.exit(1)
 34 | 
 35 |     if options.cfg == None:
 36 |         parser.error("Need path to configuration file")
 37 | 
 38 |     try:
 39 |         configfile = open(options.cfg, 'r')
 40 |     except:
 41 |         parser.error("Configuration file not readable")
 42 | 
 43 |     config.readfp(configfile)
 44 |     configfile.close()
 45 | 
 46 |     section = 'extractconfig'
 47 | 
 48 |     try:
 49 |         postgresql_user = config.get(section, 'postgresql_user')
 50 |         postgresql_password = config.get(section, 'postgresql_password')
 51 |         postgresql_db = config.get(section, 'postgresql_db')
 52 | 
 53 |         # check to see if a host (IP-address) was supplied either
 54 |         # as host or hostaddr. hostaddr is not supported on older
 55 |         # versions of psycopg2, for example CentOS 6.6, so it is not
 56 |         # used at the moment.
 57 |         try:
 58 |             postgresql_host = config.get(section, 'postgresql_host')
 59 |         except:
 60 |             postgresql_host = None
 61 |         try:
 62 |             postgresql_hostaddr = config.get(section, 'postgresql_hostaddr')
 63 |         except:
 64 |             postgresql_hostaddr = None
 65 |         # check to see if a port was specified. If not, default to 'None'
 66 |         try:
 67 |             postgresql_port = config.get(section, 'postgresql_port')
 68 |         except Exception, e:
 69 |             postgresql_port = None
 70 |     except:
 71 |         print >>sys.stderr, "Database connection not defined in configuration file. Exiting..."
 72 |         sys.stderr.flush()
 73 |         sys.exit(1)
 74 |     try:
 75 |         conn = psycopg2.connect(database=postgresql_db, user=postgresql_user, password=postgresql_password, host=postgresql_host, port=postgresql_port)
 76 | 
 77 |         cursor = conn.cursor()
 78 |     except:
 79 |         print >>sys.stderr, "Can't open database"
 80 |         sys.exit(1)
 81 | 
 82 |     contents = gzip.open(options.contentsfile)
 83 |     seenstart = False
 84 |     for i in contents:
 85 |         if not seenstart:
 86 |             if i.startswith('FILE'):
 87 |                 seenstart = True
 88 |                 continue
 89 |             else:
 90 |                 continue
 91 |         packageversion=''
 92 |         (filepath, categorypackage) = i.strip().rsplit(' ', 1)
 93 |         package = categorypackage.rsplit('/')[1].strip()
 94 |         
 95 |         cursor.execute("insert into file values (%s,%s,%s,%s, 'Debian', %s)", (os.path.basename(filepath.strip()), os.path.dirname(filepath.strip()), package, packageversion, ''))
 96 | 
 97 |     contents.close()
 98 |     conn.commit()
 99 |     cursor.close()
100 |     conn.close()
101 | 
102 | if __name__ == "__main__":
103 |     main(sys.argv)
104 | 


--------------------------------------------------------------------------------
/src/maintenance/createfiledatabasefedora.py:
--------------------------------------------------------------------------------
  1 | #/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script mines data from Fedora package databases (available on any Fedora
  9 | mirror under os/repodata) and puts it in another database.
 10 | 
 11 | The names of the files that are needed end in "filelists.sqlite.bz2"
 12 | (file list database) and "primary.sqlite.bz2" (package database)
 13 | 
 14 | Example: linux/releases/24/Everything/x86_64/os/repodata/
 15 | 
 16 | The files need to be decompressed first
 17 | '''
 18 | 
 19 | import os
 20 | import os.path
 21 | import sys
 22 | import sqlite3
 23 | import psycopg2
 24 | from optparse import OptionParser
 25 | import ConfigParser
 26 | 
 27 | # select version,name,pkgKey from packages;
 28 | # store in {pkgKey: {'name': name, 'version': version}}
 29 | # from other database:
 30 | # select version,name,pkgKey from packages;
 31 | # process all files (not directories)
 32 | # store in database
 33 | 
 34 | def main(argv):
 35 |     config = ConfigParser.ConfigParser()
 36 |     parser = OptionParser()
 37 |     parser.add_option("-c", "--config", action="store", dest="cfg", help="path to configuration file", metavar="FILE")
 38 |     parser.add_option("-f", "--filelistdatabase", action="store", dest="filelistdatabase", help="path to database containing file info (filelists.sqlite)", metavar="FILE")
 39 |     parser.add_option("-p", "--packagedatabase", action="store", dest="packagedatabase", help="path to database containing package info (primary.sqlite)", metavar="FILE")
 40 |     parser.add_option("-s", "--fedoraversion", action="store", dest="fedoraversion", help="Fedora version", metavar="VERSION")
 41 | 
 42 |     (options, args) = parser.parse_args()
 43 | 
 44 |     if options.cfg == None:
 45 |         parser.error("Need path to configuration file")
 46 | 
 47 |     try:
 48 |         configfile = open(options.cfg, 'r')
 49 |     except:
 50 |         parser.error("Configuration file not readable")
 51 |     config.readfp(configfile)
 52 |     configfile.close()
 53 | 
 54 |     section = 'extractconfig'
 55 | 
 56 |     try:
 57 |         postgresql_user = config.get(section, 'postgresql_user')
 58 |         postgresql_password = config.get(section, 'postgresql_password')
 59 |         postgresql_db = config.get(section, 'postgresql_db')
 60 | 
 61 |         # check to see if a host (IP-address) was supplied either
 62 |         # as host or hostaddr. hostaddr is not supported on older
 63 |         # versions of psycopg2, for example CentOS 6.6, so it is not
 64 |         # used at the moment.
 65 |         try:
 66 |             postgresql_host = config.get(section, 'postgresql_host')
 67 |         except:
 68 |             postgresql_host = None
 69 |         try:
 70 |             postgresql_hostaddr = config.get(section, 'postgresql_hostaddr')
 71 |         except:
 72 |             postgresql_hostaddr = None
 73 |             # check to see if a port was specified. If not, default to 'None'
 74 |         try:
 75 |             postgresql_port = config.get(section, 'postgresql_port')
 76 |         except Exception, e:
 77 |             postgresql_port = None
 78 |     except:
 79 |         print >>sys.stderr, "Database connection not defined in configuration file. Exiting..."
 80 |         sys.stderr.flush()
 81 |         sys.exit(1)
 82 |     try:
 83 |         conn = psycopg2.connect(database=postgresql_db, user=postgresql_user, password=postgresql_password, host=postgresql_host, port=postgresql_port)
 84 | 
 85 |         cursor = conn.cursor()
 86 |     except:
 87 |         print >>sys.stderr, "Can't open database"
 88 |         sys.exit(1)
 89 | 
 90 |     if options.filelistdatabase == None or options.packagedatabase == None:
 91 |         parser.error("Provide paths to Fedora databases")
 92 |     if options.fedoraversion == None:
 93 |         parser.error("Provide version of Fedora")
 94 | 
 95 |     filelistconn = sqlite3.connect(options.filelistdatabase)
 96 |     filelistcursor = filelistconn.cursor()
 97 | 
 98 |     packageconn = sqlite3.connect(options.packagedatabase)
 99 |     packagecursor = packageconn.cursor()
100 | 
101 |     pkgnameversion = {}
102 |     packagecursor.execute("select pkgKey, name, version from packages")
103 |     res = packagecursor.fetchall()
104 |     packageconn.commit()
105 |     for i in res:
106 |         pkgnameversion[i[0]] = {'name': i[1], 'version': i[2]}
107 |     packagecursor.close()
108 |     packageconn.close()
109 | 
110 |     for pkg in pkgnameversion.keys():
111 |         filelistcursor.execute("select pkgKey, dirname, filenames, filetypes from filelist where pkgKey=%d" % pkg)
112 |         res = filelistcursor.fetchall()
113 |         distroversion=''
114 |         for r in res:
115 |             (pkgKey, dirname, filenames, filetypes) = r
116 |             files = filenames.split('/')
117 |             # very crude filter to take care of '/' in filenames, which split will
118 |             # turn into ['', '']
119 |             if '' in files:
120 |                 newfiles = []
121 |                 for i in range(0,len(files)):
122 |                     empty = False
123 |                     if files[i] == '':
124 |                         if not empty:
125 |                             empty = True
126 |                             continue
127 |                         else:
128 |                             newfiles.append('/')
129 |                             empty = False
130 |                     else:
131 |                         newfiles.append(files[i])
132 |                         empty = False
133 |                 files = newfiles
134 |             for i in range(0,len(files)):
135 |                 if files[i] == '':
136 |                     continue
137 |                 if filetypes[i] == 'd':
138 |                     continue
139 |                 cursor.execute("insert into file values (%s,%s,%s,%s, 'Fedora', %s)", (files[i], dirname, pkgnameversion[pkg]['name'], pkgnameversion[pkg]['version'], options.fedoraversion))
140 |                 #print dirname, files[i], pkgnameversion[pkg]
141 |     filelistcursor.close()
142 |     filelistconn.close()
143 |     conn.commit()
144 |     cursor.close()
145 |     conn.close()
146 | 
147 | if __name__ == "__main__":
148 |     main(sys.argv)
149 | 


--------------------------------------------------------------------------------
/src/maintenance/dumplist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script can be used to regenerate a LIST file from a database. This
  9 | can be useful in situations like a diskcrash (and only the 'processed' table
 10 | could be recovered), or in case of errors in the extraction scripts where parts
 11 | of the database have to be regenerated.
 12 | 
 13 | By default the script writes data for files from all origins, unless 'origin'
 14 | is specified.
 15 | 
 16 | This script needs the same configuration file as the database creation script.
 17 | '''
 18 | 
 19 | import os
 20 | import os.path
 21 | import re
 22 | import sys
 23 | import fnmatch
 24 | import ConfigParser
 25 | from optparse import OptionParser
 26 | 
 27 | import psycopg2
 28 | 
 29 | def main(argv):
 30 |     config = ConfigParser.ConfigParser()
 31 |     parser = OptionParser()
 32 |     parser.add_option("-c", "--configuration", action="store", dest="cfg", help="path to configuration file", metavar="FILE")
 33 |     parser.add_option("-l", "--listfile", action="store", dest="listfile", help="path to LIST file (output)", metavar="FILE")
 34 |     parser.add_option("-o", "--origin", action="store", dest="origin", help="optional origin filter")
 35 | 
 36 |     (options, args) = parser.parse_args()
 37 |     if options.listfile == None:
 38 |         parser.error("Need path to LIST file")
 39 |     if options.cfg == None:
 40 |         parser.error("Need path to configuration file")
 41 | 
 42 |     try:
 43 |         configfile = open(options.cfg, 'r')
 44 |     except:
 45 |         parser.error("Configuration file not readable")
 46 |     config.readfp(configfile)
 47 |     configfile.close()
 48 | 
 49 |     section = 'extractconfig'
 50 | 
 51 |     try:
 52 |         postgresql_user = config.get(section, 'postgresql_user')
 53 |         postgresql_password = config.get(section, 'postgresql_password')
 54 |         postgresql_db = config.get(section, 'postgresql_db')
 55 | 
 56 |         # check to see if a host (IP-address) was supplied either
 57 |         # as host or hostaddr. hostaddr is not supported on older
 58 |         # versions of psycopg2, for example CentOS 6.6, so it is not
 59 |         # used at the moment.
 60 |         try:
 61 |             postgresql_host = config.get(section, 'postgresql_host')
 62 |         except:
 63 |             postgresql_host = None
 64 |         try:
 65 |             postgresql_hostaddr = config.get(section, 'postgresql_hostaddr')
 66 |         except:
 67 |             postgresql_hostaddr = None
 68 |             # check to see if a port was specified. If not, default to 'None'
 69 |         try:
 70 |             postgresql_port = config.get(section, 'postgresql_port')
 71 |         except Exception, e:
 72 |             postgresql_port = None
 73 |     except:
 74 |         print >>sys.stderr, "Database connection not defined in configuration file. Exiting..."
 75 |         sys.stderr.flush()
 76 |         sys.exit(1)
 77 |     try:
 78 |         conn = psycopg2.connect(database=postgresql_db, user=postgresql_user, password=postgresql_password, host=postgresql_host, port=postgresql_port)
 79 | 
 80 |         cursor = conn.cursor()
 81 |     except:
 82 |         print >>sys.stderr, "Can't open database"
 83 |         sys.exit(1)
 84 | 
 85 |     # TODO: add some sanity checks for 'origin' first
 86 |     if options.origin != None:
 87 |         cursor.execute("select package, version, filename, origin from processed where origin=%s", (options.origin,))
 88 |     else:
 89 |         cursor.execute("select package, version, filename, origin from processed")
 90 |     res = cursor.fetchall()
 91 |     cursor.close()
 92 |     conn.close()
 93 | 
 94 |     if res != []:
 95 |         listfile = open(options.listfile, 'w')
 96 |         for i in res:
 97 |             (package, version, filename, origin) = i
 98 |             listfile.write("%s\t%s\t%s\t%s\n" % (package, version, filename, origin))
 99 |         listfile.flush()
100 |         listfile.close()
101 | 
102 | if __name__ == "__main__":
103 |     main(sys.argv)
104 | 


--------------------------------------------------------------------------------
/src/maintenance/findthirdparty.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2015-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script finds clones in packages that are very specifically indicated in
  9 | the source code tree of a package as "third party" by looking if certain
 10 | patterns occur in path names.
 11 | '''
 12 | 
 13 | import sys
 14 | import os
 15 | import psycopg2
 16 | import multiprocessing
 17 | from optparse import OptionParser
 18 | import ConfigParser
 19 | 
 20 | def main(argv):
 21 |     config = ConfigParser.ConfigParser()
 22 |     parser = OptionParser()
 23 |     parser.add_option("-c", "--config", action="store", dest="cfg", help="path to configuration file", metavar="FILE")
 24 |     parser.add_option("-t", "--test", action="store_true", dest="dryrun", help="do a test run, only report", metavar="TEST")
 25 |     (options, args) = parser.parse_args()
 26 | 
 27 |     if options.cfg == None:
 28 |         parser.error("No configuration file found")
 29 | 
 30 |     if not os.path.exists(options.cfg):
 31 |         parser.error("Configuration file does not exist")
 32 |     try:
 33 |         configfile = open(options.cfg, 'r')
 34 |     except:
 35 |         parser.error("Configuration file not readable")
 36 |     config.readfp(configfile)
 37 |     configfile.close()
 38 | 
 39 |     if not options.dryrun:
 40 |         options.dryrun = False
 41 | 
 42 |     # search configuration to see if it is correct and/or not malformed
 43 |     # first search for a section called 'extractconfig' with configtype = global
 44 |     for section in config.sections():
 45 |         if section == "extractconfig":
 46 |             try:
 47 |                 postgresql_user = config.get(section, 'postgresql_user')
 48 |                 postgresql_password = config.get(section, 'postgresql_password')
 49 |                 postgresql_db = config.get(section, 'postgresql_db')
 50 | 
 51 |                 # check to see if a host (IP-address) was supplied either
 52 |                 # as host or hostaddr. hostaddr is not supported on older
 53 |                 # versions of psycopg2, for example CentOS 6.6, so it is not
 54 |                 # used at the moment.
 55 |                 try:
 56 |                     postgresql_host = config.get(section, 'postgresql_host')
 57 |                 except:
 58 |                     postgresql_host = None
 59 |                 try:
 60 |                     postgresql_hostaddr = config.get(section, 'postgresql_hostaddr')
 61 |                 except:
 62 |                     postgresql_hostaddr = None
 63 | 
 64 |                 # check to see if a port was specified. If not, default to 'None'
 65 |                 try:
 66 |                     postgresql_port = config.get(section, 'postgresql_port')
 67 |                 except Exception, e:
 68 |                     postgresql_port = None
 69 |             except:
 70 |                 print >>sys.stderr, "Database connection not defined in configuration file. Exiting..."
 71 |                 sys.stderr.flush()
 72 |                 sys.exit(1)
 73 | 
 74 |     try:
 75 |         conn = psycopg2.connect(database=postgresql_db, user=postgresql_user, password=postgresql_password, host=postgresql_host, port=postgresql_port)
 76 |         cursor = conn.cursor()
 77 |         packagecursor = conn.cursor()
 78 |     except:
 79 |         print >>sys.stderr, "Database not running or misconfigured"
 80 |         sys.exit(1)
 81 | 
 82 |     packages = cursor.execute("select package, version, origin from processed")
 83 |     packages = cursor.fetchall()
 84 |     conn.commit()
 85 | 
 86 |     ignorepackages = ['linux', 'busybox']
 87 | 
 88 |     packages = map(lambda x: x[:2], packages)
 89 | 
 90 |     packages.sort()
 91 | 
 92 |     thirdparty = set(['thirdparty', 'third_party', '3rdparty', '3rdpart'])
 93 | 
 94 |     seensha256 = set()
 95 |     for i in packages:
 96 |         packagecursor.execute("select distinct checksum,thirdparty from processed_file where package=%s and version=%s", i)
 97 |         while True:
 98 |             res = packagecursor.fetchmany(50000)
 99 |             conn.commit()
100 |             if len(res) == 0:
101 |                 break
102 |             for s in res:
103 |                 if s[0] in seensha256:
104 |                     continue
105 |                 if s[1] != None:
106 |                     continue
107 |                 checksum = s[0]
108 |                 cursor.execute("select distinct package,pathname,thirdparty from processed_file where checksum=%s", (checksum,))
109 |                 packageres = cursor.fetchall()
110 |                 conn.commit()
111 |                 packageres = filter(lambda x: x[0] != i[0], packageres)
112 |                 for p in packageres:
113 |                     if p[0] in ignorepackages:
114 |                         continue
115 |                     if p[2] != None:
116 |                         continue
117 |                     # check if specific markers are in the path
118 |                     if i[0] in os.path.dirname(p[1]):
119 |                         marked = False
120 |                         for t in thirdparty:
121 |                             if t in os.path.dirname(p[1]):
122 |                                 if options.dryrun:
123 |                                     print i[0], i[1], checksum, p[:-1]
124 |                                 else:
125 |                                     cursor.execute("update processed_file set thirdparty=%s where package=%s and pathname=%s and checksum=%s", (True, p[0], p[1], checksum))
126 |                                 marked = True
127 |                                 break
128 |                         if 'external' in os.path.dirname(p[1]) and not marked:
129 |                             if options.dryrun:
130 |                                 print i[0], i[1], checksum, p[:-1]
131 |                             else:
132 |                                 cursor.execute("update processed_file set thirdparty=%s where package=%s and pathname=%s and checksum=%s", (True, p[0], p[1], checksum))
133 |                         else:
134 |                             if options.dryrun:
135 |                                 pass
136 |                                 #print i[0], i[1], checksum, p[:-1]
137 |                             else:
138 |                                 pass
139 |                 conn.commit()
140 |                 seensha256.add(s[0])
141 |     conn.commit()
142 |     packagecursor.close()
143 |     cursor.close()
144 |     conn.close()
145 | 
146 | if __name__ == "__main__":
147 |     main(sys.argv)
148 | 


--------------------------------------------------------------------------------
/src/maintenance/generatelist-fdroid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2011-2016 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | Helper script to generate the LIST files for the string extraction scripts.
 9 | While this script is not foolproof, it will save lots of typing :-)
10 | '''
11 | 
12 | import sys
13 | import os
14 | import os.path
15 | import bz2
16 | import tarfile
17 | import gzip
18 | from optparse import OptionParser
19 | 
20 | # translation table for renames. None currently for F-Droid
21 | packagerenames = {}
22 | 
23 | def generatelist(filedir):
24 |     files = os.walk(filedir)
25 |     try:
26 |         while True:
27 |             i = files.next()
28 |             for p in i[2]:
29 |                 if p == "LIST" or p == 'SHA256SUM':
30 |                     continue
31 |                 # first determine things like the extension
32 |                 res = p.rsplit('_src.tar.gz', 1)
33 |                 if len(res) != 2:
34 |                     continue
35 |                 (packageversion, extension) =  res
36 |                 (package, version) = packageversion.rsplit('_', 1)
37 |                 # f-droid specific package renames go here
38 |                 if package in packagerenames:
39 |                     package = packagerenames[package]
40 |                 print "%s\t%s\t%s\tf-droid" % (package, version, p)
41 |                 
42 |     except Exception, e:
43 |         print >>sys.stderr, e
44 |         sys.stderr.flush()
45 | 
46 | def main(argv):
47 |     parser = OptionParser()
48 |     parser.add_option("-f", "--filedir", action="store", dest="filedir", help="path to directory containing files to unpack", metavar="DIR")
49 |     (options, args) = parser.parse_args()
50 |     if options.filedir == None:
51 |         print >>sys.stderr, "Specify dir with files"
52 |         sys.exit(1)
53 |     generatelist(options.filedir)
54 | 
55 | if __name__ == "__main__":
56 |     main(sys.argv)
57 | 


--------------------------------------------------------------------------------
/src/maintenance/generatelist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2011-2015 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | Helper script to generate the LIST files for the string extraction scripts.
 9 | While this script is not foolproof, it will save lots of typing :-)
10 | '''
11 | 
12 | import sys
13 | import os
14 | import os.path
15 | from optparse import OptionParser
16 | 
17 | # it's either in the form of:
18 | #   package-version.extension
19 | #   package_version.extension
20 | # where extension is tar.gz, tar.bz2, tar.xz, tgz, zip, tbz2, etc.
21 | def generatelist(filedir, origin):
22 |     files = os.walk(filedir)
23 |     try:
24 |         while True:
25 |             i = files.next()
26 |             for p in i[2]:
27 |                 if p == "LIST":
28 |                     continue
29 |                 if p == "SHA256SUM":
30 |                     continue
31 |                 if p == "DOWNLOADURL":
32 |                     continue
33 |                 # first determine things like the extension
34 |                 res = p.rsplit('.', 1)
35 |                 if len(res) == 1:
36 |                     print >>sys.stderr, "can't split %s -- add manually" % (p,)
37 |                     continue
38 |                 (packageversion, extension) = res
39 |                 if extension in ["tgz", "tbz2"]:
40 |                     pass
41 |                 elif extension in ["jar", "zip"]:
42 |                     pass
43 |                 else:
44 |                     try:
45 |                         (packageversion, extension, compression) = p.rsplit('.', 2)
46 |                     except:
47 |                         continue
48 |                     if not (extension in ["tar"] and compression in ["gz", "bz2", "xz", "lz", "lzma", "Z"]):
49 |                         continue
50 |                 # exceptions go here
51 |                 if "wireless_tools" in packageversion:
52 |                     res = packageversion.rsplit(".", 1)
53 |                 # first try package-version
54 |                 else:
55 |                     res = packageversion.rsplit("-", 1)
56 |                     if len(res) == 1:
57 |                         # then try package_version
58 |                         res = packageversion.rsplit("_", 1)
59 |                         if len(res) == 1:
60 |                             print >>sys.stderr, "can't split %s -- add manually" % (p,)
61 |                             continue
62 |                     # perhaps there is a better split possible
63 |                     if res[1] in ['src', 'source', 'sources', 'Source', 'CLEAN', 'RHsemiCLEAN', 'RHCLEAN']:
64 |                         if '-' in res[0]:
65 |                             pass
66 |                 (package, version) = res
67 |                 print "%s\t%s\t%s\t%s" % (package, version, p, origin)
68 |     except Exception:
69 |         pass
70 | 
71 | def main(argv):
72 |     parser = OptionParser()
73 |     parser.add_option("-f", "--filedir", action="store", dest="filedir", help="path to directory containing files to unpack", metavar="DIR")
74 |     parser.add_option("-o", "--origin", action="store", dest="origin", help="origin of packages (default: unknown)", metavar="ORIGIN")
75 |     (options, args) = parser.parse_args()
76 |     if options.filedir is None:
77 |         parser.error("Specify dir with files")
78 |     if options.origin is None:
79 |         origin = "unknown"
80 |     else:
81 |         origin = options.origin
82 |     generatelist(options.filedir, origin)
83 | 
84 | if __name__ == "__main__":
85 |     main(sys.argv)
86 | 


--------------------------------------------------------------------------------
/src/maintenance/packagerename.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2013 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script mass renames files in the database. It uses a file with names and
  9 | versions of packages, plus the new name and version the package should be
 10 | given. Per package one line is used. Each line has four fields, separated by |
 11 | 
 12 | oldname|oldversion|newname|newversion
 13 | 
 14 | Optionally takes extra argument to dump data. This is useful to update the caches
 15 | without having to regenerate the complete cache (which can take a looong time).
 16 | '''
 17 | 
 18 | import sys
 19 | import os
 20 | import sqlite3
 21 | import cPickle
 22 | from optparse import OptionParser
 23 | 
 24 | def main(argv):
 25 |     parser = OptionParser()
 26 |     parser.add_option("-d", "--database", action="store", dest="db", help="path to database file", metavar="FILE")
 27 |     parser.add_option("-r", "--rename", action="store", dest="removal", help="path to file listing package/version that need to be renamed", metavar="FILE")
 28 |     parser.add_option("-p", "--dump", action="store", dest="pickle", help="path to dump file", metavar="FILE")
 29 |     (options, args) = parser.parse_args()
 30 | 
 31 |     if options.db == None:
 32 |         parser.error("No database found")
 33 | 
 34 |     if options.removal == None:
 35 |         parser.error("No rename file found")
 36 | 
 37 |     dump = False
 38 |     if options.pickle != None:
 39 |         dump = True
 40 |         #parser.error("No dump file found")
 41 | 
 42 |     # store in pickle:
 43 |     # * package
 44 |     # * function names
 45 |     # * strings
 46 |     # * variable names
 47 |     pickledumps = []
 48 | 
 49 |     rename = open(options.removal).readlines()
 50 |     renamefiles = []
 51 |     for i in rename:
 52 |         (oldpackage, oldversion, newpackage, newversion) = i.strip().split('|')
 53 |         renamefiles.append((oldpackage, oldversion, newpackage, newversion))
 54 |     conn = sqlite3.connect(options.db)
 55 |     cursor = conn.cursor()
 56 |     for r in renamefiles:
 57 |         (oldpackage, oldversion, newpackage, newversion) = r
 58 |         renamesha256 = set()
 59 |         removesha256 = set()
 60 |         cursor.execute('select checksum from processed_file where package=? and version=?', ((oldpackage, oldversion)))
 61 |         sha256s = cursor.fetchall()
 62 |         # now check for each SHA256 if it already exists with the new version (and the
 63 |         # old entry only needs to be removed) or if it actually needs to be renamed.
 64 |         for sha256 in sha256s:
 65 |             cursor.execute('select distinct package, version from processed_file where checksum=?', sha256)
 66 |             res = cursor.fetchall()
 67 |             if (newpackage, newversion) in res:
 68 |                 removesha256.add(sha256)
 69 |                 continue
 70 |             else:
 71 |                 renamesha256.add(sha256)
 72 |         if dump:
 73 |             # first dump all data
 74 |             programstrings = []
 75 |             functionnames = []
 76 |             varnames = []
 77 |             allsha256 = set()
 78 |             #allsha256 = removesha256 + renamesha256
 79 |             allsha256.update(removesha256)
 80 |             allsha256.update(renamesha256)
 81 |             for s in allsha256:
 82 |                 res = cursor.execute("select stringidentifier,language from extracted_string where checksum=?", (s[0],))
 83 |                 if res != None:
 84 |                     programstrings += res
 85 |                 res = cursor.execute("select functionname,language from extracted_function where checksum=?", (s[0],))
 86 |                 if res != None:
 87 |                     functionnames += res
 88 |                 res = cursor.execute("select name,language,type from extracted_name where checksum=?", (s[0],))
 89 |                 if res != None:
 90 |                     varnames += res
 91 |             pickledumps.append({'package': oldpackage, 'programstrings': programstrings, 'functionnames': functionnames, 'varnames': varnames})
 92 | 
 93 |         for s in renamesha256:
 94 |             cursor.execute("update processed_file set package=?, version=? where checksum=? and package=? and version=?", (r[2], r[3], s[0], r[0], r[1]))
 95 |         for s in removesha256:
 96 |             cursor.execute("delete from processed_file where checksum=? and package=? and version=?", (s[0], r[0], r[1]))
 97 |         conn.commit()
 98 |         cursor.execute("select * from processed where package=? and version=?", (r[2], r[3]))
 99 |         res = cursor.fetchall()
100 |         # only when doesn't exist in processed yet
101 |         if res == []:
102 |             cursor.execute("update processed set package=?, version=? where package=? and version=?", (r[2], r[3], r[0], r[1]))
103 |         else:
104 |             cursor.execute("delete from processed where package=? and version=?", (r[0], r[1]))
105 |         conn.commit()
106 |     conn.close()
107 | 
108 |     if dump:
109 |         dumpfile = open(options.pickle, 'wb')
110 |         cPickle.dump(pickledumps, dumpfile)
111 |         dumpfile.close()
112 | 
113 | if __name__ == "__main__":
114 |     main(sys.argv)
115 | 


--------------------------------------------------------------------------------
/src/maintenance/postgresql-index.sql:
--------------------------------------------------------------------------------
 1 | create index processed_index on processed(package, version);
 2 | create index processed_checksum on processed(checksum);
 3 | create index processed_origin on processed(origin);
 4 | create index processed_website on processed(website);
 5 | create index processedfile_package_checksum_index on processed_file(checksum, package);
 6 | create index processedfile_package_version_index on processed_file(package, version);
 7 | create index processedfile_filename_index on processed_file(filename);
 8 | create index stringidentifier_index on extracted_string(stringidentifier,language);
 9 | create index extracted_hash_index on extracted_string(checksum);
10 | create index extracted_language_index on extracted_string(language);
11 | create index function_index on extracted_function(checksum);
12 | create index functionname_index on extracted_function(functionname);
13 | create index functionname_language on extracted_function(language);
14 | create index name_checksum_index on extracted_name(checksum);
15 | create index name_name_index on extracted_name(name);
16 | create index name_type_index on extracted_name(type);
17 | create index name_language_index on extracted_name(language);
18 | create index kernel_configuration_filename on kernel_configuration(filename);
19 | create index kernelmodule_alias_index on kernelmodule_alias(alias);
20 | create index kernelmodule_author_index on kernelmodule_author(author);
21 | create index kernelmodule_description_index on kernelmodule_description(description);
22 | create index kernelmodule_firmware_index on kernelmodule_firmware(firmware);
23 | create index kernelmodule_license_index on kernelmodule_license(license);
24 | create index kernelmodule_parameter_index on kernelmodule_parameter(paramname);
25 | create index kernelmodule_parameter_description_index on kernelmodule_parameter_description(description);
26 | create index kernelmodule_version_index on kernelmodule_version(version);
27 | create index kernelmodule_alias_checksum_index on kernelmodule_alias(checksum);
28 | create index kernelmodule_author_checksum_index on kernelmodule_author(checksum);
29 | create index kernelmodule_description_checksum_index on kernelmodule_description(checksum);
30 | create index kernelmodule_firmware_checksum_index on kernelmodule_firmware(checksum);
31 | create index kernelmodule_license_checksum_index on kernelmodule_license(checksum);
32 | create index kernelmodule_parameter_checksum_index on kernelmodule_parameter(checksum);
33 | create index kernelmodule_parameter_description_checksum_index on kernelmodule_parameter_description(checksum);
34 | create index kernelmodule_version_checksum_index on kernelmodule_version(checksum);
35 | create index batresult_checksum_index on batresult(checksum);
36 | create index batresult_filename_index on batresult(filename);
37 | create index blacklist_checksum_index on blacklist(checksum);
38 | create index rpm_checksum_index on rpm(checksum);
39 | create index rpm_rpmname_index on rpm(rpmname);
40 | create index archivealias_checksum_index on archivealias(checksum);
41 | create index misc_checksum_index on misc(checksum);
42 | create index misc_name_index on misc(name);
43 | create index hashconversion_sha256_index on hashconversion(sha256);
44 | create index hashconversion_md5_index on hashconversion(md5);
45 | create index hashconversion_sha1_index on hashconversion(sha1);
46 | create index hashconversion_crc32_index on hashconversion(crc32);
47 | create index hashconversion_tlsh_index on hashconversion(tlsh);
48 | create index license_index on licenses(checksum);
49 | create index copyright_index on extracted_copyright(checksum);
50 | create index copyright_type_index on extracted_copyright(copyright, type);
51 | create index security_cert_checksum_index on security_cert(checksum);
52 | create index security_cve_checksum_index on security_cve(checksum);
53 | create index security_password_hash_index on security_password(hash);
54 | create index renames_index_originalname on renames (originalname);
55 | create index renames_index_newname on renames (newname);
56 | create index file_index on file(filename, directory);
57 | 
58 | create index linuxkernelfunctionname_index on linuxkernelfunctionnamecache(functionname);
59 | create index linuxkernelnamecache_index on linuxkernelnamecache(varname);
60 | create index functionname_c_index on functionnamecache_c(functionname);
61 | create index varnamecache_c_index on varnamecache_c(varname);
62 | create index functionname_java_index on functionnamecache_java(functionname);
63 | create index fieldname_java_cache on fieldcache_java(fieldname);
64 | create index classname_java_cache on classcache_java(classname);
65 | 
66 | create index stringidentifier_actionscript_index on stringscache_actionscript(stringidentifier);
67 | create index scores_actionscript_index on scores_actionscript(stringidentifier);
68 | create index package_actionscript_index on avgstringscache_actionscript(package);
69 | 
70 | create index stringidentifier_c_index on stringscache_c(stringidentifier);
71 | create index scores_c_index on scores_c(stringidentifier);
72 | create index avgpackage_c_index on avgstringscache_c(package);
73 | 
74 | create index stringidentifier_csharp_index on stringscache_csharp(stringidentifier);
75 | create index scores_csharp_index on scores_csharp(stringidentifier);
76 | create index avgpackage_csharp_index on avgstringscache_csharp(package);
77 | 
78 | create index stringidentifier_java_index on stringscache_java(stringidentifier);
79 | create index scores_java_index on scores_java(stringidentifier);
80 | create index avgpackage_java_index on avgstringscache_java(package);
81 | 
82 | create index stringidentifier_javascript_index on stringscache_javascript(stringidentifier);
83 | create index scores_javascript_index on scores_javascript(stringidentifier);
84 | create index avgpackage_javascript_index on avgstringscache_javascript(package);
85 | 
86 | create index stringidentifier_php_index on stringscache_php(stringidentifier);
87 | create index scores_php_index on scores_php(stringidentifier);
88 | create index avgpackage_php_index on avgstringscache_php(package);
89 | 
90 | create index stringidentifier_python_index on stringscache_python(stringidentifier);
91 | create index scores_python_index on scores_python(stringidentifier);
92 | create index avgpackage_python_index on avgstringscache_python(package);
93 | 
94 | create index stringidentifier_ruby_index on stringscache_ruby(stringidentifier);
95 | create index scores_ruby_index on scores_ruby(stringidentifier);
96 | create index avgpackage_ruby_index on avgstringscache_ruby(package);
97 | 


--------------------------------------------------------------------------------
/src/maintenance/postgresql-table-drop.sql:
--------------------------------------------------------------------------------
 1 | drop table processed;
 2 | drop table processed_file;
 3 | drop table extracted_string;
 4 | drop table extracted_function;
 5 | drop table extracted_name;
 6 | 
 7 | drop table kernel_configuration;
 8 | drop table kernelmodule_alias;
 9 | drop table kernelmodule_author;
10 | drop table kernelmodule_description;
11 | drop table kernelmodule_firmware;
12 | drop table kernelmodule_license;
13 | drop table kernelmodule_parameter;
14 | drop table kernelmodule_parameter_description;
15 | drop table kernelmodule_version;
16 | 
17 | drop table batresult;
18 | drop table blacklist;
19 | drop table rpm;
20 | drop table archivealias;
21 | drop table misc;
22 | drop table hashconversion;
23 | drop table licenses;
24 | drop table extracted_copyright;
25 | drop table security_cert;
26 | drop table security_cve;
27 | drop table security_password;
28 | drop table renames;
29 | drop table file;
30 | drop table stringscache_actionscript;
31 | drop table scores_actionscript;
32 | drop table avgstringscache_actionscript;
33 | 
34 | drop table stringscache_c;
35 | drop table scores_c;
36 | drop table avgstringscache_c;
37 | 
38 | drop table stringscache_csharp;
39 | drop table scores_csharp;
40 | drop table avgstringscache_csharp;
41 | 
42 | drop table stringscache_java;
43 | drop table scores_java;
44 | drop table avgstringscache_java;
45 | 
46 | drop table stringscache_javascript;
47 | drop table scores_javascript;
48 | drop table avgstringscache_javascript;
49 | 
50 | drop table stringscache_php;
51 | drop table scores_php;
52 | drop table avgstringscache_php;
53 | 
54 | drop table stringscache_python;
55 | drop table scores_python;
56 | drop table avgstringscache_python;
57 | 
58 | drop table stringscache_ruby;
59 | drop table scores_ruby;
60 | drop table avgstringscache_ruby;
61 | 
62 | drop table varnamecache_c;
63 | drop table linuxkernelnamecache;
64 | drop table functionnamecache_c;
65 | drop table linuxkernelfunctionnamecache;
66 | drop table functionnamecache_java;
67 | drop table fieldcache_java;
68 | drop table classcache_java;
69 | 


--------------------------------------------------------------------------------
/src/maintenance/postgresql-table.sql:
--------------------------------------------------------------------------------
 1 | create table if not exists processed (package text, version text, filename text, origin text, checksum text, downloadurl text, website text);
 2 | create table if not exists processed_file (package text, version text, pathname text, checksum text, filename text, thirdparty boolean);
 3 | create table if not exists extracted_string (stringidentifier text, checksum text, language text, linenumber int);
 4 | create table if not exists extracted_function (checksum text, functionname text, language text, linenumber int);
 5 | create table if not exists extracted_name (checksum text, name text, type text, language text, linenumber int);
 6 | 
 7 | create table if not exists kernel_configuration(configstring text, filename text, version text);
 8 | create table if not exists kernelmodule_alias(checksum text, modulename text, alias text);
 9 | create table if not exists kernelmodule_author(checksum text, modulename text, author text);
10 | create table if not exists kernelmodule_description(checksum text, modulename text, description text);
11 | create table if not exists kernelmodule_firmware(checksum text, modulename text, firmware text);
12 | create table if not exists kernelmodule_license(checksum text, modulename text, license text);
13 | create table if not exists kernelmodule_parameter(checksum text, modulename text, paramname text, paramtype text);
14 | create table if not exists kernelmodule_parameter_description(checksum text, modulename text, paramname text, description text);
15 | create table if not exists kernelmodule_version(checksum text, modulename text, version text);
16 | 
17 | create table if not exists batresult(checksum text, filename text, tlsh text, pathname text, parentname text, parentchecksum text);
18 | create table if not exists blacklist(checksum text, filename text, origin text);
19 | create table if not exists rpm(rpmname text, checksum text, downloadurl text);
20 | create table if not exists archivealias(checksum text, archivename text, origin text, downloadurl text, website text);
21 | create table if not exists misc(checksum text, name text);
22 | create table if not exists hashconversion (sha256 text, md5 text, sha1 text, crc32 text, tlsh text);
23 | create table if not exists licenses (checksum text, license text, scanner text, version text);
24 | create table if not exists extracted_copyright (checksum text, copyright text, type text, byteoffset int);
25 | create table if not exists security_cert(checksum text, securitybug text, linenumber int, function text, whitelist boolean);
26 | create table if not exists security_cve(checksum text, cve text);
27 | create table if not exists security_password(hash text, password text, origin text);
28 | create table if not exists renames (originalname text, newname text);
29 | create table if not exists file(filename text, directory text, package text, packageversion text, source text, distroversion text);
30 | create table if not exists stringscache_actionscript (stringidentifier text, package text, filename text);
31 | create table if not exists scores_actionscript (stringidentifier text, packages int, score real);
32 | create table if not exists avgstringscache_actionscript (package text, avgstrings real, primary key (package));
33 | 
34 | create table if not exists stringscache_c (stringidentifier text, package text, filename text);
35 | create table if not exists scores_c (stringidentifier text, packages int, score real);
36 | create table if not exists avgstringscache_c (package text, avgstrings real, primary key (package));
37 | 
38 | create table if not exists stringscache_csharp (stringidentifier text, package text, filename text);
39 | create table if not exists scores_csharp (stringidentifier text, packages int, score real);
40 | create table if not exists avgstringscache_csharp (package text, avgstrings real, primary key (package));
41 | 
42 | create table if not exists stringscache_java (stringidentifier text, package text, filename text);
43 | create table if not exists scores_java (stringidentifier text, packages int, score real);
44 | create table if not exists avgstringscache_java (package text, avgstrings real, primary key (package));
45 | 
46 | create table if not exists stringscache_javascript (stringidentifier text, package text, filename text);
47 | create table if not exists scores_javascript (stringidentifier text, packages int, score real);
48 | create table if not exists avgstringscache_javascript (package text, avgstrings real, primary key (package));
49 | 
50 | create table if not exists stringscache_php (stringidentifier text, package text, filename text);
51 | create table if not exists scores_php (stringidentifier text, packages int, score real);
52 | create table if not exists avgstringscache_php (package text, avgstrings real, primary key (package));
53 | 
54 | create table if not exists stringscache_python (stringidentifier text, package text, filename text);
55 | create table if not exists scores_python (stringidentifier text, packages int, score real);
56 | create table if not exists avgstringscache_python (package text, avgstrings real, primary key (package));
57 | 
58 | create table if not exists stringscache_ruby (stringidentifier text, package text, filename text);
59 | create table if not exists scores_ruby (stringidentifier text, packages int, score real);
60 | create table if not exists avgstringscache_ruby (package text, avgstrings real, primary key (package));
61 | 
62 | create table if not exists varnamecache_c (varname text, package text);
63 | create table if not exists linuxkernelnamecache (varname text, package text);
64 | create table if not exists functionnamecache_c (functionname text, package text);
65 | create table if not exists linuxkernelfunctionnamecache (functionname text, package text);
66 | create table if not exists functionnamecache_java (functionname text, package text);
67 | create table if not exists fieldcache_java (fieldname text, package text);
68 | create table if not exists classcache_java (classname text, package text);
69 | 


--------------------------------------------------------------------------------
/src/maintenance/rewritelist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | This program can be used to generate a LIST file, like generatelist.py, but
 9 | taking two LIST files as input. The first ('correctedlist') is LIST that has
10 | corrected input. The second one is a possibly non-corrected list.
11 | 
12 | The main use case is when the database has to be regenerated (new license
13 | scanners, better string extraction, and so on), with possibly new input.
14 | Using dumplist.py the (supposedly) corrected list (for old packages) can be
15 | extracted from the database. With generatelist.py a new list can be generated
16 | for the packages. By comparing the two and reusing the corrected results a lot
17 | of effort can be saved.
18 | '''
19 | 
20 | import os
21 | import os.path
22 | import sys
23 | from optparse import OptionParser
24 | 
25 | def main(argv):
26 |     parser = OptionParser()
27 |     parser.add_option("-c", "--correctedlist", action="store", dest="correctedlist", help="path to corrected list", metavar="FILE")
28 |     parser.add_option("-n", "--newlist", action="store", dest="newlist", help="path to new list", metavar="FILE")
29 |     (options, args) = parser.parse_args()
30 | 
31 |     if options.correctedlist == None:
32 |         parser.error("Need corrected list")
33 |     if options.newlist == None:
34 |         parser.error("Need new list")
35 | 
36 |     if not os.path.exists(options.correctedlist):
37 |         parser.error("Need corrected list")
38 |     if not os.path.exists(options.newlist):
39 |         parser.error("Need new list")
40 | 
41 |     # first suck in the corrected data, filename is key
42 |     correctedfiles = {}
43 |     correctedfile_list = open(options.correctedlist).readlines()
44 |     for c in correctedfile_list:
45 |         (package, version, filename, origin) = c.strip().split()
46 |         # this should actually not happen
47 |         if correctedfiles.has_key(filename):
48 |             continue
49 |         else:
50 |             correctedfiles[filename] = (package, version, origin)
51 | 
52 |     # then suck in the new data, filename is key
53 |     newfiles = {}
54 |     newfile_list = open(options.newlist).readlines()
55 |     for c in newfile_list:
56 |         (package, version, filename, origin) = c.strip().split()
57 |         # this should actually not happen
58 |         if newfiles.has_key(filename):
59 |             continue
60 |         else:
61 |             newfiles[filename] = (package, version, origin)
62 |     listentries = []
63 |     for i in newfiles.keys():
64 |         if correctedfiles.has_key(i):
65 |             # entries are not the same!
66 |             if newfiles[i] != correctedfiles[i]:
67 |                 listentries.append("%s\t%s\t%s\t%s" % (correctedfiles[i][0], correctedfiles[i][1], i, correctedfiles[i][2]))
68 |             else:
69 |                 listentries.append("%s\t%s\t%s\t%s" % (newfiles[i][0], newfiles[i][1], i, newfiles[i][2]))
70 |         else:
71 |             listentries.append("%s\t%s\t%s\t%s" % (newfiles[i][0], newfiles[i][1], i, newfiles[i][2]))
72 |     listentries.sort()
73 |     for i in listentries:
74 |         print i
75 | 
76 | if __name__ == "__main__":
77 |     main(sys.argv)
78 | 


--------------------------------------------------------------------------------
/src/maintenance/scorecache.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2014-2015 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | import sys
 8 | import os
 9 | import os.path
10 | import sqlite3
11 | from optparse import OptionParser
12 | 
13 | def main(argv):
14 |     alpha = 5.0
15 | 
16 |     parser = OptionParser()
17 |     parser.add_option("-d", "--database", action="store", dest="db", help="path to caching database", metavar="FILE")
18 |     (options, args) = parser.parse_args()
19 |     if options.db == None:
20 |         parser.error("Path to caching database")
21 |     if not os.path.exists(options.db):
22 |         print >>sys.stderr, "Caching database %s does not exist" % options.db
23 |         sys.exit(1)
24 | 
25 |     conn = sqlite3.connect(options.db)
26 |     c = conn.cursor()
27 | 
28 |     c.execute("create table if not exists scores (stringidentifier text, packages int, score real)")
29 |     c.execute("create index if not exists scoresindex on scores(stringidentifier)")
30 |     conn.commit()
31 |     c2 = conn.cursor()
32 | 
33 |     c.execute("select distinct stringidentifier from stringscache")
34 |     programstrings = c.fetchmany(10000)
35 |     while programstrings != []:
36 |         for p in programstrings:
37 |             pkgs = {}
38 |             filenames = {}
39 | 
40 |             pfs = c2.execute("select package, filename from stringscache where stringidentifier=?", p).fetchall()
41 |             packages = set(map(lambda x: x[0], pfs))
42 | 
43 |             if len(packages) == 1:
44 |                 score = float(len(p[0]))
45 |             else:
46 |                 for pf in pfs:
47 |                     (package, filename) = pf
48 |                     if not filenames.has_key(filename):
49 |                         filenames[filename] = [package]
50 |                     else:   
51 |                         filenames[filename] = list(set(filenames[filename] + [package]))
52 |                 try:
53 |                     score = float(len(p[0])) / pow(alpha, (len(filenames) - 1))
54 |                 except Exception, e:
55 |                     score = len(p[0]) / sys.maxint
56 |                 # cut off for for example postgresql
57 |                 if score < 1e-37:
58 |                     score = 0.0
59 |             c2.execute("insert into scores(stringidentifier, packages, score) values (?,?,?)", (p[0], len(packages), float(score)))
60 |         programstrings = c.fetchmany(10000)
61 |     conn.commit()
62 |     c2.close()
63 |     print "vacuuming"
64 |     c.execute("vacuum")
65 |     conn.commit()
66 |     c.close()
67 |     conn.close()
68 |     
69 | if __name__ == "__main__":
70 |     main(sys.argv)
71 | 


--------------------------------------------------------------------------------
/src/maintenance/updatesha256sum.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2014-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script is to update the SHA256SUM file in a directory that contains sha256 checksums 
  9 | for each file, that speeds up database creation.
 10 | '''
 11 | 
 12 | import os
 13 | import os.path
 14 | import sys
 15 | import hashlib
 16 | import multiprocessing
 17 | import zlib
 18 | from optparse import OptionParser
 19 | 
 20 | try:
 21 |     import tlsh
 22 |     tlshscan = True
 23 | except Exception, e:
 24 |     tlshscan = False
 25 | 
 26 | def computehash((filedir, filename, extrahashes)):
 27 |     filehashes = {}
 28 |     resolved_path = os.path.join(filedir, filename)
 29 |     scanfile = open(resolved_path, 'r')
 30 |     filedata = scanfile.read()
 31 |     scanfile.close()
 32 |     h = hashlib.new('sha256')
 33 |     h.update(filedata)
 34 |     filehashes['sha256'] = h.hexdigest()
 35 | 
 36 |     if 'crc32' in extrahashes:
 37 |         try:
 38 |             filehashes['crc32'] = zlib.crc32(filedata) & 0xffffffff
 39 |         except:
 40 |             return None
 41 | 
 42 |     if 'tlsh' in extrahashes:
 43 |         if os.stat(resolved_path).st_size >= 256:
 44 |             filehashes['tlsh'] = tlsh.hash(filedata)
 45 |         else:
 46 |             filehashes['tlsh'] = None
 47 | 
 48 |     # first remove 'crc32' from extrahashes
 49 |     extrahashesset = set(extrahashes)
 50 |     try:
 51 |         extrahashesset.remove('crc32')
 52 |     except KeyError:
 53 |         pass
 54 | 
 55 |     # then remove 'tlsh' from extrahashes
 56 |     try:
 57 |         extrahashesset.remove('tlsh')
 58 |     except KeyError:
 59 |         pass
 60 | 
 61 |     temphashes = {}
 62 |     for i in extrahashesset:
 63 |         temphashes[i] = hashlib.new(i)
 64 |     for i in extrahashesset:
 65 |         temphashes[i].update(filedata)
 66 |     for i in extrahashesset:
 67 |         filehashes[i] = temphashes[i].hexdigest()
 68 |     return (filename, filehashes)
 69 | 
 70 | def main(argv):
 71 |     parser = OptionParser()
 72 |     parser.add_option("-f", "--filedir", action="store", dest="filedir", help="path to directory with files", metavar="DIR")
 73 |     (options, args) = parser.parse_args()
 74 |     if options.filedir == None:
 75 |         parser.error("No directory defined")
 76 |     if not os.path.exists(options.filedir):
 77 |         parser.error("No directory found")
 78 |     dirlist = os.listdir(options.filedir)
 79 |     dirlist = filter(lambda x: x != 'LIST' and x != 'SHA256SUM', dirlist)
 80 |     dirlist = filter(lambda x: os.path.isfile(os.path.join(options.filedir, x)), dirlist)
 81 | 
 82 |     # no files, so exit
 83 |     if len(dirlist) == 0:
 84 |         sys.exit(0)
 85 | 
 86 |     extrahashes = ['md5', 'sha1', 'crc32']
 87 |     if tlshscan:
 88 |         extrahashes.append('tlsh')
 89 | 
 90 |     filetohash = {}
 91 |     if os.path.exists(os.path.join(options.filedir, "SHA256SUM")):
 92 |         sha256file = os.path.join(options.filedir, "SHA256SUM")
 93 |         sha256lines = open(sha256file, 'r').readlines()
 94 |         # first line should have the supported hashes
 95 | 
 96 |         checksumsused = sha256lines[0].strip().split()
 97 |         # first line is always a list of supported hashes.
 98 |         process = True
 99 |         if set(checksumsused).intersection(set(extrahashes)) != set(extrahashes):
100 |             process = False
101 |         if process:
102 |             for i in sha256lines[1:]:
103 |                 entries = i.strip().split()
104 |                 filename = entries[0]
105 |                 if filename == 'SHA256SUM':
106 |                     continue
107 |                 if filename == 'LIST':
108 |                     continue
109 |                 if filename == 'DOWNLOADURL':
110 |                     continue
111 |                 # sha256 is always the first hash and second entry
112 |                 hashentry = entries[1]
113 |                 filetohash[filename] = {}
114 |                 filetohash[filename]['sha256'] = hashentry
115 |                 counter = 2
116 |                 for c in checksumsused[1:]:
117 |                     # only record results for hashes that are in 'extrahashes'
118 |                     if c in extrahashes:
119 |                         filetohash[filename][c] = entries[counter]
120 |                     counter += 1
121 | 
122 |     # determine which files need to be scanned
123 |     diffset = set(dirlist).difference(set(filetohash))
124 |     if len(diffset) == 0:
125 |         sys.exit(0)
126 | 
127 |     # find hashes in parallel
128 |     shatasks = map(lambda x: (options.filedir, x, extrahashes), diffset)
129 |     pool = multiprocessing.Pool()
130 |     sharesults = filter(lambda x: x != None, pool.map(computehash, shatasks, 1))
131 |     pool.terminate()
132 | 
133 |     for i in sharesults:
134 |         (filename, filehashes) = i
135 |         filetohash[filename] = filehashes
136 | 
137 |     # write results
138 |     filenameskeys = filetohash.keys()
139 |     filenameskeys.sort()
140 |     sha256file = open(os.path.join(options.filedir, "SHA256SUM"), 'w')
141 |     # first write a line with the hashes that are supported
142 |     if extrahashes == []:
143 |         sha256file.write("sha256\n")
144 |     else:
145 |         hashesstring = "sha256"
146 |         for h in extrahashes:
147 |             hashesstring += "\t%s" % h
148 |         sha256file.write("%s\n" % hashesstring)
149 |     for i in filenameskeys:
150 |         # first hashes, since file names could contain spaces
151 |         hashesstring = filetohash[i]['sha256']
152 |         for h in extrahashes:
153 |             hashesstring += "\t%s" % filetohash[i][h]
154 |         sha256file.write("%s  %s\n" % (i, hashesstring))
155 |     sha256file.close()
156 | 
157 | if __name__ == "__main__":
158 |     main(sys.argv)
159 | 


--------------------------------------------------------------------------------
/src/maintenance/verifyarchive.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2014 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | Script to test integrity of archives. TODO: properly handle ZIP archives
  9 | '''
 10 | 
 11 | import sys, os, magic, multiprocessing, subprocess
 12 | import tempfile, bz2, tarfile, gzip
 13 | from optparse import OptionParser
 14 | 
 15 | tarmagic = ['POSIX tar archive (GNU)'
 16 |            , 'tar archive'
 17 |            ]
 18 | 
 19 | ms = magic.open(magic.MAGIC_NONE)
 20 | ms.load()
 21 | 
 22 | # unpack the directories to be scanned.
 23 | def unpack((directory, filename)):
 24 |     try:
 25 |         os.stat(os.path.join(directory, filename))
 26 |     except:
 27 |         print >>sys.stderr, "Can't find %s" % filename
 28 |         return None
 29 | 
 30 |     filemagic = ms.file(os.path.realpath(os.path.join(directory, filename)))
 31 | 
 32 |     # Assume if the files are bz2 or gzip compressed they are compressed tar files
 33 |     if 'bzip2 compressed data' in filemagic:
 34 |         # for some reason the tar.bz2 unpacking from python doesn't always work, like
 35 |         # aeneas-1.0.tar.bz2 from GNU, so use a subprocess instead of using the
 36 |         # Python tar functionality.
 37 |         p = subprocess.Popen(['tar', 'jtf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 38 |         (stanout, stanerr) = p.communicate()
 39 |     elif 'LZMA compressed data, streamed' in filemagic:
 40 |         p = subprocess.Popen(['tar', 'itf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 41 |         (stanout, stanerr) = p.communicate()
 42 |     elif 'XZ compressed data' in filemagic:
 43 |         p = subprocess.Popen(['tar', 'itf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 44 |         (stanout, stanerr) = p.communicate()
 45 |     elif 'gzip compressed data' in filemagic:
 46 |         p = subprocess.Popen(['tar', 'ztf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 47 |         (stanout, stanerr) = p.communicate()
 48 |     elif 'compress\'d data 16 bits' in filemagic:
 49 |         p = subprocess.Popen(['tar', 'ztf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 50 |         (stanout, stanerr) = p.communicate()
 51 |     elif 'Minix filesystem' in filemagic and filename.endswith('.gz'):
 52 |         # sometimes libmagic gets it wrong
 53 |         p = subprocess.Popen(['tar', 'ztf', os.path.join(directory, filename)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 54 |         (stanout, stanerr) = p.communicate()
 55 |     else:
 56 |         return None
 57 |     if p.returncode != 0:
 58 |         return (filename, False)
 59 |     else:
 60 |         return (filename, True)
 61 |     '''
 62 |     elif 'Zip archive data' in filemagic:
 63 |         try:
 64 |             tmpdir = tempfile.mkdtemp(dir=unpackdir)
 65 |             p = subprocess.Popen(['unzip', "-B", os.path.join(directory, filename), '-d', tmpdir], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 66 |             (stanout, stanerr) = p.communicate()
 67 |             if p.returncode != 0 and p.returncode != 1:
 68 |                 print >>sys.stderr, "unpacking ZIP failed for", filename, stanerr
 69 |                 shutil.rmtree(tmpdir)
 70 |             else:
 71 |                 return tmpdir
 72 |         except Exception, e:
 73 |             print >>sys.stderr, "unpacking ZIP failed", e
 74 |     '''
 75 | 
 76 | def main(argv):
 77 |     parser = OptionParser()
 78 |     parser.add_option("-f", "--filedir", action="store", dest="filedir", help="path to directory containing files to unpack", metavar="DIR")
 79 | 
 80 |     (options, args) = parser.parse_args()
 81 |     if options.filedir == None:
 82 |         parser.error("Specify dir with files")
 83 |     else:
 84 |         try:
 85 |             filelist = open(os.path.join(options.filedir, "LIST")).readlines()
 86 |         except:
 87 |             parser.error("'LIST' not found in file dir")
 88 | 
 89 |     # first process the LIST file
 90 |     pkgmeta = []
 91 |     for unpackfile in filelist:
 92 |         try:
 93 |             unpacks = unpackfile.strip().split()
 94 |             if len(unpacks) == 3:
 95 |                 origin = "unknown"
 96 |                 (package, version, filename) = unpacks
 97 |             else:
 98 |                 (package, version, filename, origin) = unpacks
 99 |             pkgmeta.append((options.filedir, filename))
100 |         except Exception, e:
101 |             # oops, something went wrong
102 |             print >>sys.stderr, e
103 | 
104 |     pool = multiprocessing.Pool()
105 |     unpackresults = pool.map(unpack, pkgmeta, 1)
106 |     pool.terminate()
107 |     for i in unpackresults:
108 |         if i != None:
109 |             (filename, result) = i
110 |             if not result:
111 |                 print "corrupt archive: %s" % filename
112 | 
113 | if __name__ == "__main__":
114 |     main(sys.argv)
115 | 


--------------------------------------------------------------------------------
/src/maintenance/verifydb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2012-2016 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This script verifies that the tables in a database are in sync, which means:
  9 | all of the files in the tables "extracted_string" and "extracted_function" can
 10 | also be found in "processed_file"
 11 | '''
 12 | 
 13 | import sys
 14 | import os
 15 | import os.path
 16 | import re
 17 | import fnmatch
 18 | import psycopg2
 19 | import ConfigParser
 20 | from optparse import OptionParser
 21 | 
 22 | def main(argv):
 23 |     config = ConfigParser.ConfigParser()
 24 |     parser = OptionParser()
 25 |     parser.add_option("-c", "--config", action="store", dest="cfg", help="path to configuration file", metavar="FILE")
 26 | 
 27 |     (options, args) = parser.parse_args()
 28 |     if options.cfg == None:
 29 |         parser.error("Need path to configuration file")
 30 | 
 31 |     try:
 32 |         configfile = open(options.cfg, 'r')
 33 |     except:
 34 |         parser.error("Configuration file not readable")
 35 |     config.readfp(configfile)
 36 |     configfile.close()
 37 | 
 38 |     section = 'extractconfig'
 39 | 
 40 |     try:
 41 |         postgresql_user = config.get(section, 'postgresql_user')
 42 |         postgresql_password = config.get(section, 'postgresql_password')
 43 |         postgresql_db = config.get(section, 'postgresql_db')
 44 | 
 45 |         # check to see if a host (IP-address) was supplied either
 46 |         # as host or hostaddr. hostaddr is not supported on older
 47 |         # versions of psycopg2, for example CentOS 6.6, so it is not
 48 |         # used at the moment.
 49 |         try:
 50 |             postgresql_host = config.get(section, 'postgresql_host')
 51 |         except:
 52 |             postgresql_host = None
 53 |         try:
 54 |             postgresql_hostaddr = config.get(section, 'postgresql_hostaddr')
 55 |         except:
 56 |             postgresql_hostaddr = None
 57 |         # check to see if a port was specified. If not, default to 'None'
 58 |         try:
 59 |             postgresql_port = config.get(section, 'postgresql_port')
 60 |         except Exception, e:
 61 |             postgresql_port = None
 62 |     except:
 63 |         print >>sys.stderr, "Database connection not defined in configuration file. Exiting..."
 64 |         sys.stderr.flush()
 65 |         sys.exit(1)
 66 | 
 67 |     try:
 68 |         conn = psycopg2.connect(database=postgresql_db, user=postgresql_user, password=postgresql_password, host=postgresql_host, port=postgresql_port)
 69 | 
 70 |         cursor = conn.cursor()
 71 |     except:
 72 |         print >>sys.stderr, "Can't open database"
 73 |         sys.exit(1)
 74 | 
 75 |     print "checking processed"
 76 |     sys.stdout.flush()
 77 |     cursor.execute("select distinct checksum from processed")
 78 |     res = cursor.fetchall()
 79 |     conn.commit()
 80 |     for r in res:
 81 |         cursor.execute('select checksum from processed where checksum=%s', r)
 82 |         processed_results = cursor.fetchall()
 83 |         conn.commit()
 84 |         if len(processed_results) != 1:
 85 |             cursor.execute('select * from processed where checksum=%s', r)
 86 |             processed_results = cursor.fetchall()
 87 |             conn.commit()
 88 |             print "identical:", map(lambda x: "%s %s" % (x[0], x[1]), processed_results)
 89 |             sys.stdout.flush()
 90 | 
 91 |     # create a new cursor
 92 |     ncursor = conn.cursor()
 93 | 
 94 |     cursor.execute("select package,version from processed_file")
 95 |     res = cursor.fetchmany(40000)
 96 |     conn.commit()
 97 | 
 98 |     totals = 0
 99 |     print "checking processed_file"
100 |     sys.stdout.flush()
101 |     while res != []:
102 |         totals += len(res)
103 |         #print "processing", totals
104 |         for r in res:
105 |             (package,version) = r
106 |             ncursor.execute('select checksum from processed where package=%s and version=%s LIMIT 1', r)
107 |             pres = ncursor.fetchall()
108 |             conn.commit()
109 |             if pres == []:
110 |                 print "database not in sync", r
111 |                 sys.stdout.flush()
112 |         res = cursor.fetchmany(40000)
113 |         conn.commit()
114 | 
115 |     for i in ["extracted_string", "extracted_function"]:
116 |         cursor.execute("select distinct(checksum) from %s" % i)
117 |         res = cursor.fetchmany(40000)
118 |         conn.commit()
119 |         totals = 0
120 |         while res != []:
121 |             totals += len(res)
122 |             print "processing %s" % i, totals
123 |             sys.stdout.flush()
124 |             for r in res:
125 |                 ncursor.execute('select checksum from processed_file where checksum=%s LIMIT 1', r)
126 |                 pres = ncursor.fetchall()
127 |                 conn.commit()
128 |                 if pres == []:
129 |                     print "database %s not in sync" % i, r[0]
130 |                     sys.stdout.flush()
131 |             res = cursor.fetchmany(40000)
132 |             conn.commit()
133 | 
134 | if __name__ == "__main__":
135 |     main(sys.argv)
136 | 


--------------------------------------------------------------------------------
/src/maintenance/verifylist.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2012-2013 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | Helper script to verify the LIST files generated by generatelist.py. This is useful to see if any typos were made.
 9 | '''
10 | 
11 | import os
12 | import os.path
13 | import sys
14 | from optparse import OptionParser
15 | 
16 | def main(argv):
17 |     parser = OptionParser()
18 |     parser.add_option("-l", "--list", action="store", dest="listfile", help="path to LIST", metavar="FILE")
19 |     (options, args) = parser.parse_args()
20 | 
21 |     try:
22 |         filelist = open(options.listfile).readlines()
23 |     except:
24 |         parser.error("'LIST' not found")
25 | 
26 |     prev_split = None
27 |     for unpackfile in filelist:
28 |         # simple format check
29 |         try:
30 |             unpacks = unpackfile.strip().split()
31 |             if len(unpacks) != 4:
32 |                 print >>sys.stderr, "FORMAT ERROR", unpackfile.strip()
33 |                 sys.stderr.flush()
34 |                 continue
35 |         except Exception, e:
36 |             # oops, something went wrong
37 |             print >>sys.stderr, e
38 |         # see if dfsg is in the package name, since Debian tends to do this
39 |         if 'dfsg' in unpacks[0]:
40 |             print >>sys.stderr, "DFSG ERROR", unpackfile.strip()
41 |             sys.stderr.flush()
42 |         if prev_split == None:
43 |             prev_split = unpacks
44 |             continue
45 |         # see if we have the same package with different case
46 |         if unpacks[0] != prev_split[0] and unpacks[0].lower() == prev_split[0].lower():
47 |             print >>sys.stderr, "CASE ERROR", unpackfile.strip()
48 |             sys.stderr.flush()
49 |         prev_split = unpacks
50 | 
51 | if __name__ == "__main__":
52 |     main(sys.argv)
53 | 


--------------------------------------------------------------------------------
/src/patches/README:
--------------------------------------------------------------------------------
 1 | This directory contains a few patches that need to be applied to programs before they can be reliably used by BAT
 2 | 
 3 | * cramfs.patch : this patch enables the -x option and removes the unpacking of special inodes, such as device files. Creating these files sometimes requires root privileges. This means that BAT would have to run as root. Since these special files are not inspected anyway there is no need to unpack them.
 4 | 
 5 | * code2html-0.9.1-add-qml.patch : this patch lets code2html also process QML files (more and more frequently used in Qt programs)
 6 | 
 7 | * code2html-0.9.1-add-groovyscala.patch : this patch lets code2html also process Scala and Groovy files as Java
 8 | 
 9 | 
10 | * code2html-0.9.1-add-csharp.patch : this patch lets code2html process C# files. It is a direct copy of the Java config, with a few minor modifications
11 | 


--------------------------------------------------------------------------------
/src/patches/code2html-0.9.1-add-groovyscala.patch:
--------------------------------------------------------------------------------
 1 | diff -ruN code2html-0.9.1/code2html code2html-0.9.1.new/code2html
 2 | --- code2html-0.9.1/code2html	2002-01-12 22:17:02.000000000 +0100
 3 | +++ code2html-0.9.1.new/code2html	2012-03-04 17:27:14.207492320 +0100
 4 | @@ -2549,7 +2549,7 @@
 5 |  # taken from nedit
 6 |  # modified by PP
 7 |  $LANGUAGE{'java'}       = {
 8 | -                            'filename'   => '\\.java$',
 9 | +                            'filename'   => '\\.(java|groovy|scala)$',
10 |                              'regex'      => '',
11 |                              'patterns'   => [
12 |                                                {
13 | 


--------------------------------------------------------------------------------
/src/patches/code2html-0.9.1-add-qml.patch:
--------------------------------------------------------------------------------
 1 | diff -ruN code2html-0.9.1/code2html code2html-0.9.1.new/code2html
 2 | --- code2html-0.9.1/code2html	2002-01-12 22:17:02.000000000 +0100
 3 | +++ code2html-0.9.1.new/code2html	2012-03-03 19:28:46.000000000 +0100
 4 | @@ -2258,7 +2258,7 @@
 5 |  # taken from nedit
 6 |  # modified by PP
 7 |  $LANGUAGE{'c++'}        = {
 8 | -                            'filename'   => '\\.(c(c|pp|xx)|h(h|pp|xx)|C(C|PP|XX)?|H(H|PP|XX)?|i)$',
 9 | +                            'filename'   => '\\.(c(c|pp|xx)|h(h|pp|xx)|C(C|PP|XX)?|H(H|PP|XX)?|i|qml)$',
10 |                              'regex'      => '',
11 |                              'patterns'   => [
12 |                                                {
13 | 


--------------------------------------------------------------------------------
/src/patches/cramfs.patch:
--------------------------------------------------------------------------------
 1 | diff -ru util-linux-ng-2.18-rc1/disk-utils/fsck.cramfs.c util-linux-ng-2.18-rc1.new/disk-utils/fsck.cramfs.c
 2 | --- util-linux-ng-2.18-rc1/disk-utils/fsck.cramfs.c	2010-03-18 23:11:23.000000000 +0100
 3 | +++ util-linux-ng-2.18-rc1.new/disk-utils/fsck.cramfs.c	2010-06-09 10:11:29.000000000 +0200
 4 | @@ -34,7 +34,7 @@
 5 |   */
 6 |  
 7 |  /* compile-time options */
 8 | -//#define INCLUDE_FS_TESTS	/* include cramfs checking and extraction */
 9 | +#define INCLUDE_FS_TESTS	/* include cramfs checking and extraction */
10 |  
11 |  #include <stdio.h>
12 |  #include <stdarg.h>
13 | @@ -640,13 +640,14 @@
14 |  	if (opt_verbose) {
15 |  		print_node(type, i, path);
16 |  	}
17 | -
18 | +/*
19 |  	if (opt_extract) {
20 |  		if (mknod(path, i->mode, devtype) < 0) {
21 |  			die(FSCK_ERROR, 1, _("mknod failed: %s"), path);
22 |  		}
23 |  		change_file_status(path, i);
24 |  	}
25 | +*/
26 |  }
27 |  
28 |  static void expand_fs(char *path, struct cramfs_inode *inode)
29 | 


--------------------------------------------------------------------------------
/src/scripts/comparebinaries.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2013-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This program compares two binaries (firmwares, files, etc.) in various ways to
  9 | see how close they are.
 10 | 
 11 | There are a few scenarios where this program can be used:
 12 | 
 13 | 1. comparing an old firmware (that is already known and which has been verified)
 14 | to a new firmware (update) and see if there are any big differences.
 15 | 2. comparing a firmware to a rebuild of a firmware as part of compliance
 16 | engineering
 17 | 3. comparing two binaries to see if a certain security bug might be present
 18 | 
 19 | A few assumptions are made:
 20 | 
 21 | 1. both firmwares were unpacked using the Binary Analysis Tool
 22 | 2. files that are in the original firmware, but not in the new firmware, are
 23 | not reported (example: removed binaries). This might change in a future version.
 24 | 3. files that are in the new firmware but not not in the original firmware are
 25 | reported, since this would mean additions to the firmware, possibly with
 26 | license conditions or security concerns.
 27 | 4. files that appear in both firmwares but which are not identical are checked
 28 | using bsdiff and, if available, tlsh.
 29 | 
 30 | With just checksums it is easy to find the files that are different. Using BSDIFF
 31 | and tlsh it becomes easier to see how big the difference really is.
 32 | 
 33 | Low values are probably not interesting at all:
 34 | * time stamps (BusyBox, Linux kernel, etc. record a time stamp in the binary)
 35 | * slightly different compiler settings
 36 | 
 37 | If the diffs get larger there are of course bigger changes.
 38 | 
 39 | This approach will make it easier to make a baseline scan of a firmware, then
 40 | find, prioritize and scan only the differences in an update of the firmware.
 41 | '''
 42 | 
 43 | import sys
 44 | import os
 45 | import os.path
 46 | import hashlib
 47 | import subprocess
 48 | import tempfile
 49 | import magic
 50 | import multiprocessing
 51 | from optparse import OptionParser
 52 | try:
 53 |     import tlsh
 54 |     tlshscanning = True
 55 | except:
 56 |     tlshscanning = False
 57 | 
 58 | # copied from bruteforce.py
 59 | def gethash(path, filename):
 60 |     scanfile = open("%s/%s" % (path, filename), 'r')
 61 |     h = hashlib.new('sha256')
 62 |     scanfile.seek(0)
 63 |     hashdata = scanfile.read(10000000)
 64 |     while hashdata != '':
 65 |         h.update(hashdata)
 66 |         hashdata = scanfile.read(10000000)
 67 |     scanfile.close()
 68 |     return h.hexdigest()
 69 | 
 70 | # method to compare binaries. Returns the amount of bytes that differ
 71 | # according to bsdiff, or 0 if the files are identical
 72 | def comparebinaries(path1, path2):
 73 |     basepath1 = os.path.basename(path1)
 74 |     dirpath1 = os.path.dirname(path1)
 75 |     basepath2 = os.path.basename(path2)
 76 |     dirpath2 = os.path.dirname(path2)
 77 |     # binaries are identical
 78 |     if gethash(dirpath1, basepath1) == gethash(dirpath2, basepath2):
 79 |         return 0
 80 |     difftmp = tempfile.mkstemp()
 81 |     os.fdopen(difftmp[0]).close()
 82 |     p = subprocess.Popen(["bsdiff", path1, path2, difftmp[1]], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
 83 |     # cleanup
 84 |     (stanout, stanerr) = p.communicate()
 85 |     diffsize = os.stat(difftmp[1]).st_size
 86 |     os.unlink(difftmp[1])
 87 |     return diffsize
 88 | 
 89 | def main(argv):
 90 |     parser = OptionParser()
 91 |     parser.add_option("-n", "--new", action="store", dest="newdir", help="path to BAT results of new binary", metavar="DIR")
 92 |     parser.add_option("-o", "--original", action="store", dest="olddir", help="path to BAT results of original binary", metavar="DIR")
 93 |     (options, args) = parser.parse_args()
 94 |     if options.olddir == None or options.newdir == None:
 95 |         parser.error("Supply paths to both directories")
 96 | 
 97 |     if not os.path.exists(options.olddir):
 98 |         parser.error("Directory \"%s\" does not exist" % (options.olddir,))
 99 | 
100 |     if not os.path.exists(options.newdir):
101 |         parser.error("Directory \"%s\" does not exist" % (options.newdir,))
102 | 
103 |     ms = magic.open(magic.MAGIC_NONE)
104 |     ms.load()
105 | 
106 |     # The goal is to check the files from the new binary and
107 |     # compare them with files from the old binary
108 |     # First build a list of files in the original binary
109 |     # Then do the same for the new binary and check:
110 |     # * does a file with the same name exist in the original binary
111 |     # * do the files differ
112 |     # and report about it
113 |     checkfiles = {}
114 |     osgen = os.walk(options.olddir)
115 |     try:
116 |         while True:
117 |             i = osgen.next()
118 |             for p in i[2]:
119 |                 if os.path.islink(os.path.join(i[0], p)):
120 |                     continue
121 |                 if not os.path.isfile(os.path.join(i[0], p)):
122 |                     continue
123 |                 if not checkfiles.has_key(p):
124 |                     checkfiles[p] = [os.path.join(i[0], p)]
125 |                 else:
126 |                     checkfiles[p].append(os.path.join(i[0],p))
127 |     except StopIteration:
128 |         pass
129 |     notfoundnewdir = []
130 |     notfoundorigdir = []
131 |     # now loop over the new binary
132 |     osgen = os.walk(options.newdir)
133 |     try:
134 |         while True:
135 |             i = osgen.next()
136 |             for p in i[2]:
137 |                 if os.path.islink(os.path.join(i[0], p)):
138 |                     continue
139 |                 if not os.path.isfile(os.path.join(i[0], p)):
140 |                     continue
141 |                 # name of this file can't be found in old scan tree, so report
142 |                 if not checkfiles.has_key(p):
143 |                     notfoundnewdir.append(p)
144 |                 else:
145 |                     for j in checkfiles[p]:
146 |                         diff = comparebinaries(j, os.path.join(i[0], p))
147 |                         # bsdiff between two identical files is 143 bytes
148 |                         if diff <= 143 :
149 |                             continue
150 |                         else:
151 |                             print "* %s and %s differ %d bytes according to bsdiff" % ("%s/%s" % (i[0], p), j, diff)
152 |     except StopIteration:
153 |         pass
154 | 
155 |     if notfoundnewdir != []:
156 |         print "\nThe following files from the new binary were not found in the original binary:"
157 |         for i in notfoundnewdir:
158 |             print "* %s" % i
159 | 
160 |     # TODO: check for files in the original directory as well, although
161 |     # removal of files might not be as interesting
162 |     if notfoundorigdir != []:
163 |         print "\nThe following files from the original binary were not found in the new binary:"
164 |         for i in notfoundorigdir:
165 |             print "* %s" % i
166 | 
167 | if __name__ == "__main__":
168 |     main(sys.argv)
169 | 


--------------------------------------------------------------------------------
/src/scripts/findxor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Binary Analysis Tool
 4 | # Copyright 2015 Armijn Hemel for Tjaldur Software Governance Solutions
 5 | # Licensed under Apache 2.0, see LICENSE file for details
 6 | 
 7 | '''
 8 | Find XOR key using some very superdumb methods.
 9 | 
10 | The idea is to exploit the idea that padding is used in firmwares. Usually padding
11 | consists of NUL bytes. When XORing the key with NUL bytes the result will be the key.
12 | Often it is very easy to see the key in plain sight using for example the command
13 | "hexdump -C".
14 | 
15 | In this script it is assumed (for now) that the keylength is 16 and that there is just
16 | one single key used. Manual inspection is definitely needed.
17 | '''
18 | 
19 | import collections
20 | import os
21 | import sys
22 | 
23 | from optparse import OptionParser
24 | 
25 | def findpadding(firmware):
26 |     counter = collections.Counter()
27 |     fwfile = open(firmware)
28 |     firmwarebytes = fwfile.read()
29 |     fwfile.close()
30 |     fwlen = len(firmwarebytes)
31 |     blocks = fwlen/16
32 |     byteblocks = []
33 |     for i in xrange(0, blocks):
34 |         byteblocks.append(firmwarebytes[i*16:i*16+16])
35 |     counter.update(byteblocks)
36 |     rank = 1
37 |     reportamount = 10
38 |     print "MOST COMMON, TOP %d" % reportamount
39 |     for i in counter.most_common(reportamount):
40 |         print rank, i[1], map(lambda x: hex(ord(x)), i[0])
41 |         rank += 1
42 | 
43 | def main(argv):
44 |     parser = OptionParser()
45 |     parser.add_option("-f", "--firmware", action="store", dest="firmware", help="path to firmware", metavar="FILE")
46 |     (options, args) = parser.parse_args()
47 |     if options.firmware == None:
48 |         parser.exit("Path to firmware not supplied, exiting")
49 |     if os.path.isdir(options.firmware):
50 |         print >>sys.stderr, "%s is not a file" % options.firmware
51 |         sys.exit(1)
52 | 
53 |     findpadding(options.firmware)
54 | 
55 | if __name__ == "__main__":
56 |     main(sys.argv)
57 | 


--------------------------------------------------------------------------------
/src/scripts/sourcewalk.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Binary Analysis Tool
  4 | # Copyright 2013-2015 Armijn Hemel for Tjaldur Software Governance Solutions
  5 | # Licensed under Apache 2.0, see LICENSE file for details
  6 | 
  7 | '''
  8 | This program can quickly determine whether or not a file is in known upstream
  9 | sources. It uses a pregenerated database containing names and checksums of
 10 | files (for example the Linux kernel) and reports whether or not it can be found
 11 | in the database.
 12 | 
 13 | The purpose of this script is to find files that differ from upstream files and
 14 | reduce the search space.
 15 | 
 16 | This script will *NOT* catch:
 17 | 
 18 | * binary files
 19 | * patch/diff files
 20 | * anything that does not have an extension from the list
 21 | * configuration files
 22 | '''
 23 | 
 24 | import os
 25 | import os.path
 26 | import sys
 27 | import sqlite3
 28 | import hashlib
 29 | from optparse import OptionParser
 30 | 
 31 | # list of extensions, plus what language they should be mapped to
 32 | # This is not necessarily correct, but for now it is good enough.
 33 | extensions = {'.c'      : 'C',
 34 |               '.cc'     : 'C',
 35 |               '.cpp'    : 'C',
 36 |               '.cxx'    : 'C',
 37 |               '.c++'    : 'C',
 38 |               '.h'      : 'C',
 39 |               '.hh'     : 'C',
 40 |               '.hpp'    : 'C',
 41 |               '.hxx'    : 'C',
 42 |               '.l'      : 'C',
 43 |               '.qml'    : 'C',
 44 |               '.s'      : 'C',
 45 |               '.txx'    : 'C',
 46 |               '.y'      : 'C',
 47 |               '.cs'     : 'C#',
 48 |               '.groovy' : 'Java',
 49 |               '.java'   : 'Java',
 50 |               '.jsp'    : 'Java',
 51 |               '.scala'  : 'Java',
 52 |               '.as'     : 'ActionScript',
 53 |               '.js'     : 'JavaScript',
 54 |              }
 55 | 
 56 | def sourceWalk(scandir, dbpath):
 57 | 	conn = sqlite3.connect(dbpath, check_same_thread = False)
 58 | 
 59 | 	cursor = conn.cursor()
 60 | 	osgen = os.walk(scandir)
 61 | 	lenscandir = len(scandir)
 62 | 	notfound = 0
 63 | 	total = 0
 64 | 
 65 | 	try:
 66 | 		while True:
 67 | 			i = osgen.next()
 68 | 			for p in i[2]:
 69 | 				if os.stat("%s/%s" % (i[0], p)).st_size == 0:
 70 | 					continue
 71 | 				p_nocase = p.lower()
 72 | 				for extension in extensions.keys():
 73 | 					if (p_nocase.endswith(extension)):
 74 | 						total = total + 1
 75 | 						scanfile = open("%s/%s" % (i[0], p), 'r')
 76 | 						h = hashlib.new('sha256')
 77 | 						h.update(scanfile.read())
 78 | 						scanfile.close()
 79 | 						filehash = h.hexdigest()
 80 | 						cursor.execute('''select checksum from processed_file where checksum=? limit 1''', (filehash,))
 81 | 						res = cursor.fetchall()
 82 | 						# there is at least one hit, so ignore
 83 | 						if len(res) != 0:
 84 | 							continue
 85 | 						# no hits, so this is an interesting file
 86 | 						else:
 87 | 							print "%s" % os.path.join(scandir, i[0][lenscandir:],p)
 88 | 							notfound = notfound + 1
 89 | 				pass
 90 | 	except StopIteration:
 91 | 		pass
 92 | 	print "Total files: %d" % total
 93 | 	print "Files not found in database: %d" % notfound
 94 | 
 95 | def main(argv):
 96 | 	parser = OptionParser()
 97 | 	parser.add_option("-d", "--database", action="store", dest="db", help="path to database", metavar="FILE")
 98 | 	parser.add_option("-f", "--filedir", action="store", dest="filedir", help="path to top level directory containing source tree", metavar="DIR")
 99 | 	(options, args) = parser.parse_args()
100 | 	if options.filedir == None:
101 | 		parser.error("Specify dir with files")
102 | 	if options.db == None:
103 | 		parser.error("Specify path to database")
104 | 
105 | 	sourceWalk(options.filedir, options.db)
106 | 
107 | if __name__ == "__main__":
108 | 	main(sys.argv)
109 | 


--------------------------------------------------------------------------------
/src/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_rpm]
2 | release = 1
3 | packager = Armijn Hemel <armijn@binaryanalysis.org>
4 | group = Development/Tools
5 | doc_files = LICENSE
6 | requires = python-magic, binutils, e2fsprogs, e2tools, squashfs-tools, coreutils, xz, xz-lzma-compat, zip, unzip, unrar, cabextract, unshield, p7zip, p7zip-plugins, cpio, tar, bzip2, mtd-utils, lzip, lzop, arj, icoutils, rpm, rpm-python, gettext, bat-extratools >= 27.0, ucl, upx, poppler-utils, netpbm-progs, libxml2, lrzip, ncompress, python-imaging, vorbis-tools, ctags, python-matplotlib, file, pydot, bsdiff, python-reportlab, liberation-sans-fonts, clamav, john, python-psycopg2, openssl
7 | 


--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | import glob
 5 | import os.path
 6 | 
 7 | setup(name='bat',
 8 |       version='37.0',
 9 |       description='Binary Analysis Tool',
10 |       author='Binary Analysis Project',
11 |       author_email='info@binaryanalysis.org',
12 |       url='http://www.binaryanalysis.org/',
13 |       packages=['bat'],
14 |       license="Apache 2.0",
15 |       scripts=['maintenance/busybox-appletname-extractor.py', 'maintenance/clonedbinit.py', 'bat-scan', 'busybox-compare-configs.py'],
16 |       data_files=[ ('/etc/bat',  ['bat-scan.config']),
17 |                  ],
18 |      long_description="""The Binary Analysis Tool is a modular framework that assists with auditing
19 | the contents of compiled software. It makes it easier and cheaper to look
20 | inside technology, and this helps compliance and due diligence activities.
21 | 
22 | The tool is freely available to everyone. The community can use it and
23 | participate in further development, and work together to help reduce errors
24 | when shipping devices or products containing Free and Open Source Software."""
25 |      )
26 | 


--------------------------------------------------------------------------------