├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── create_prazi_graph.sh ├── doc └── pdn_cdn.png ├── py-src ├── infer_dependency_network_from_callgraphs.py ├── log.py ├── merge_unified_callgraphs.py ├── prepare_unified_callgraphs.py └── prune_cg.py └── src ├── bin ├── prazi.rs └── ufi.rs └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | # Do not include the user config file 13 | conf.ini -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: rust 3 | 4 | rust: 5 | - stable 6 | - nightly 7 | 8 | script: | 9 | cargo build --bin ufi --verbose && 10 | cargo build --bin rustprazi --verbose 11 | 12 | matrix: 13 | allow_failures: 14 | - rust: nightly 15 | include: 16 | before_script: 17 | - rustup component add rustfmt-preview 18 | script: 19 | - cargo fmt --all -- --check 20 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rustprazi" 3 | version = "0.1.0" 4 | authors = ["Joseph Hejderup ", "Mortiz Beller ", "Georgios Gousios "] 5 | 6 | 7 | [[bin]] 8 | name = "rustprazi" 9 | path = "src/bin/prazi.rs" 10 | 11 | [[bin]] 12 | name = "ufi" 13 | path = "src/bin/ufi.rs" 14 | 15 | [dependencies] 16 | cargo = "0.31.0" 17 | clap = "2.32.0" 18 | crates-index = "0.12.0" 19 | filebuffer = "0.4.0" 20 | flate2 = "1.0.4" 21 | futures = "0.1.25" 22 | glob = "0.2.11" 23 | lazy_static = "1.2.0" 24 | quote = "0.4.2" 25 | rayon = "1.0" 26 | regex = "1.0.6" 27 | reqwest = "0.9.5" 28 | rust-ini = "0.13" 29 | serde = "1.0.80" 30 | serde_derive = "1.0.80" 31 | serde_json = "1.0" 32 | syn = {git = "https://github.com/jhejderup/syn", features = ["parsing", "printing", "clone-impls", "full", "extra-traits", "visit", "visit-mut"]} 33 | tar = "0.4.20" 34 | tokio-core = "0.1.17" 35 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 - onwards Joseph Hejderup, Moritz Beller, Georgios Gousios 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 - onwards Joseph Hejderup, Moritz Beller, Georgios Gousios 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RustPräzi ([rʌstpʁɛˈt͡siːz]) 2 | 3 | [![Build Status](https://travis-ci.org/praezi/rust.svg?branch=master)](https://travis-ci.org/praezi/rust) 4 | [![LOC](https://tokei.rs/b1/github/praezi/rust)](https://github.com/praezi/rust) 5 | [![Join the chat at https://gitter.im/praezi/rust](https://badges.gitter.im/praezi/rust.svg)](https://gitter.im/praezi/rust?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 6 | 7 | Constructing call-based dependency networks of [crates.io](https://crates.io) as conceptually described in 8 | 9 | >[Hejderup J, Beller M, Gousios G. Präzi: From Package-based to Precise Call-based Dependency Network Analyses. 2018.](https://pure.tudelft.nl/portal/files/46926997/main2.pdf) 10 | 11 | ## TL;DR: What does RustPräzi do? 12 | 13 | ### Description 14 | 15 | With RustPräzi, we go from coarse-grained package-based dependency networks (such as what GitHub uses for their [vulnerable package detection](https://help.github.com/articles/about-security-alerts-for-vulnerable-dependencies/)) to more fine-grained call-based dependency networks. These allow us to track, for example, whether a vulnerable function of a library is actually being used and whether a security warning really needs to be raised. This is much more precise than package-based dependency networks. In fact, RustPräzi makes such analyses a lot more precise (upto 3x). 16 | 17 | ![Package-based (PDN, above) versus Call-based Dependency Networks (CDN, below)](doc/pdn_cdn.png "Package-based (PDN, above) versus Call-based Dependency Networks (CDN, below)") 18 | 19 | ### Use cases 20 | 21 | RustPräzi opens the door to many new or more precise analyses: 22 | 23 | * Fine-grained security vulnerability propagation checking 24 | * Precise license compliance checking 25 | * Change impact and deprecation analysis ("Which clients break if I as a library maintainer remove this deprecated method?") 26 | * Health analyses of an entire ecosystem ("What are the most central functions?", "Where should we focus our testing efforts?", ...) 27 | * ... and more! 28 | 29 | ## Getting started 30 | 31 | ### Installation Prerequisites 32 | 33 | - The Rust toolchain with `rustup` (download at the [offical website](https://www.rust-lang.org/en-US/install.html)) 34 | - Python 2.7 or 3.7 35 | - GNU Parallel 36 | - A pre-built binary of LLVM 4.0 (download at [official website](http://releases.llvm.org/download.html#4.0.0)). In the `config.ini` (root of the repository), specify the path to the uncompressed LLVM binary. 37 | - Recommended OS: Ubuntu 16.04.3 LTS 38 | 39 | ### System Setup 40 | - :warning: Building crates can be dangerous as for some crates, this includes running the tests. Hence, it is advised to do it in a sandboxed environment. 41 | - 💻 We recommend running it on a very powerful system. Compiling 80k+ crates is no easy feat. 42 | 43 | 44 | ### 1. Create a `conf.ini` file at the root of the project with the following content 45 | 46 | ```ini 47 | encoding=utf-8 48 | 49 | [llvm] 50 | # specify the path to the untared LLVM binary folder. 51 | path=/path_where/clang+llvm-4.0.0-[your_platform] 52 | 53 | [compiler] 54 | stable=1.23.0 55 | nightly=1.24.0 56 | 57 | [storage] 58 | # all data will be stored in this folder 59 | path=/where/you/want/to/store/prazi/data 60 | ``` 61 | 62 | Since the bitcode generation changed in newer versions of Rust, we advise to stick to the compiler versions specified above. 63 | 64 | 65 | 66 | ### 2. Constructing call graphs of crates 67 | 68 | 1. Compile the tool 69 | 70 | ``` bash 71 | cargo build --bin prazi --release 72 | ``` 73 | 2. Download crates, the downloader will fetch the latest [index](https://github.com/rust-lang/crates.io-index) data, build a list of releases and then download/untar them 74 | 75 | ``` 76 | ./target/release/prazi downloader 77 | ``` 78 | 3. Rewriting manifests, the manifest rewriter will fix invalid `Cargo.toml` files (e.g., specifying a non-existent local dependency) by emulating a dry-run of `cargo publish` 79 | 80 | ``` bash 81 | ./target/release/prazi rewriter 82 | ``` 83 | 84 | 4. Building crates, it will first attempt to build all downloaded crates using a stable version of the compiler (as specified in `conf.ini`). To use a nightly version for failing builds, prepend the flag `--nightly` 85 | 86 | ``` bash 87 | ./target/release/prazi build-crates 88 | ``` 89 | 90 | 5. Building LLVM call graphs 91 | 92 | ``` bash 93 | ./target/release/prazi build-callgraphs 94 | ``` 95 | 96 | ### 2. Construct RustPräzi 97 | 98 | 1. Install `rustfilt` for demangling of Rust symbols 99 | 100 | ```bash 101 | cargo install rustfilt 102 | ``` 103 | 2. Run graph generator script 104 | 105 | ``` 106 | ./create_prazi_graph.sh 2> err.log 1> out.log 107 | ``` 108 | Two graphs are generated: 109 | - `../cdn/graphs/callgraph.ufi.merged.graph` -- the call-based dependency network (CDN) 110 | - `../cdn/graphs/crate.dependency.callgraph.graph` -- the packaged-based dependency network derived from the CDN 111 | 112 | ### 3. Graph analysis with RustPräzi 113 | 114 | 115 | 116 | 117 |
118 | 119 | 120 | Loading Präzi with NetworkX 121 | 122 | 123 | ``` python 124 | import networkx as nx 125 | import re 126 | 127 | regex = r"^(.*?) \[label:" 128 | 129 | def load_prazi(file): 130 | PRAZI = nx.DiGraph() 131 | with open(file) as f: #callgraph.ufi.merged.graph 132 | for line in f: 133 | if "->" not in line: 134 | g = re.match(regex, line) 135 | if g: 136 | PRAZI.add_node(g.group(1).strip('"')) 137 | else: 138 | print "error, could not extract node: %s" % line 139 | else: 140 | g = re.match('\W*"(.*)" -> "(.*)";', line) 141 | if g: 142 | PRAZI.add_edge(g.group(1), g.group(2)) 143 | else: 144 | print "error, could not extract edge: %s" % line 145 | return PRAZI 146 | 147 | def load_prazi_dep(file): 148 | PRAZI_DEP = nx.DiGraph() 149 | with open(file) as f: #crate.dependency.callgraph.graph 150 | for line in f: 151 | if "io :: crates :: " in line: 152 | if "->" not in line: 153 | PRAZI_DEP.add_node(line[:-2]) 154 | else: 155 | g = re.match('\W*"(.*)" -> "(.*)";', line) 156 | if g and ("io :: crates" in g.group(1) and "io :: crates" in g.group(2)): 157 | PRAZI_DEP.add_edge(g.group(1), g.group(2)) 158 | else: 159 | print "skip edge: %s" % line 160 | else: 161 | continue 162 | return PRAZI_DEP 163 | 164 | ``` 165 |
166 | 167 | # License 168 | 169 | This project is licensed under either of 170 | 171 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or 172 | http://www.apache.org/licenses/LICENSE-2.0) 173 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or 174 | http://opensource.org/licenses/MIT) 175 | 176 | at your option. 177 | 178 | ### Contribution 179 | 180 | Unless you explicitly state otherwise, any contribution intentionally submitted 181 | for inclusion in RustPräzi by you, as defined in the Apache-2.0 license, shall be 182 | dual licensed as above, without any additional terms or conditions. 183 | -------------------------------------------------------------------------------- /create_prazi_graph.sh: -------------------------------------------------------------------------------- 1 | CFG_FILE=conf.ini 2 | read_storage_config=($(awk -F '=' -v input="storage" '$1 ~ input{flag=1; next} $1 ~ /\[object/{flag=0; next} flag && NF {split($0,arr,"="); print arr[2] }' $CFG_FILE )) 3 | export PRAZI_DIR="${read_storage_config[0]}/crates/reg" 4 | export UFI_DIR="${read_storage_config[0]}/cdn/graphs" 5 | export BASH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 6 | export PYSRC_DIR="$BASH_DIR/py-src" 7 | export UFI_BIN="$BASH_DIR/target/release/ufi" 8 | 9 | cargo build --bin ufi --release 10 | mkdir -p $UFI_DIR 11 | cd $PRAZI_DIR 12 | export LD_LIBRARY_PATH=$(rustc --print sysroot)/lib:$LD_LIBRARY_PATH 13 | ls -d */* | parallel 'if [ -f {}/callgraph.dot ]; 14 | then rm {}/callgraph.unmangled.graph; 15 | rustfilt -i {}/callgraph.dot -o {}/callgraph.unmangled.graph; 16 | python $PYSRC_DIR/prune_cg.py $PRAZI_DIR/{} callgraph.unmangled.graph; 17 | $UFI_BIN $PRAZI_DIR/{}; 18 | python $PYSRC_DIR/prepare_unified_callgraphs.py $PRAZI_DIR/{}/callgraph.ufi.graph > $PRAZI_DIR/{}/callgraph.ufi.prepared.graph; fi' 19 | rm $UFI_DIR/callgraph.ufi.notmerged.graph 20 | find . -name "callgraph.ufi.prepared.graph" -print0 | parallel -j1 -0 "cat {} >> $UFI_DIR/callgraph.ufi.notmerged.graph" 21 | python $PYSRC_DIR/merge_unified_callgraphs.py $UFI_DIR/callgraph.ufi.notmerged.graph 1> $UFI_DIR/callgraph.ufi.merged.graph 2> $UFI_DIR/callgraph.ufi.merged.graph.log 22 | python $PYSRC_DIR/infer_dependency_network_from_callgraphs.py $UFI_DIR/callgraph.ufi.merged.graph 1> $UFI_DIR/crate.dependency.callgraph.graph 2> $UFI_DIR/crate.dependency.callgraph.graph.log -------------------------------------------------------------------------------- /doc/pdn_cdn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/praezi/rust/ec5aa0ed293a84c190e968e2f0c440a8796d087f/doc/pdn_cdn.png -------------------------------------------------------------------------------- /py-src/infer_dependency_network_from_callgraphs.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python 3 | # Reads-in a merged call graph and outputs a merged dependency graph 4 | # 5 | # (c) 2018 - onwards Moritz Beller 6 | # 7 | # MIT/APACHE licensed -- check LICENSE files in top dir 8 | 9 | import sys 10 | import re 11 | import os.path 12 | import json 13 | 14 | crates = set() 15 | ufid_to_crate = {} 16 | crate_dependencies = set() 17 | 18 | 19 | def escape_label(label): 20 | return '"' + label + '"' 21 | 22 | 23 | class Crate: 24 | def __init__(self): 25 | self.occurrences = 0 26 | self.source = [] 27 | self.ufi = "" 28 | 29 | def __str__(self): 30 | classMembers = vars(self) 31 | return ( 32 | escape_label(self.label) 33 | + " [" 34 | + ", ".join( 35 | '%s: "%s"' % (item, str(classMembers[item])) 36 | for item in sorted(classMembers) 37 | ) 38 | + "];" 39 | ) 40 | 41 | 42 | class CrateDependency: 43 | def __init__(self): 44 | self.source 45 | self.target 46 | self.occurrences = 0 47 | 48 | def __str__(self): 49 | classMembers = vars(self) 50 | return ( 51 | escape_label(self.label) 52 | + " [" 53 | + ", ".join( 54 | '%s: "%s"' % (item, str(classMembers[item])) 55 | for item in sorted(classMembers) 56 | ) 57 | + "];" 58 | ) 59 | 60 | 61 | file = sys.argv[1] 62 | if not os.path.exists(file): 63 | print >> sys.stderr, file + " does not exist!" 64 | exit(1) 65 | 66 | with open(file) as f: 67 | for line in f: 68 | m = re.match('\W*"(.*?)" \[(.*)\];', line) 69 | if m: 70 | node_name = m.group(1) 71 | 72 | deps = re.findall("(io :: crates :: (.+?) :: (.+?)) ", node_name) 73 | if deps is not None: 74 | deps = set(deps) 75 | if deps.__len__() > 0: 76 | for item in deps: 77 | source_crate = item[0] 78 | crates.add(source_crate) 79 | 80 | if deps.__len__() > 1: 81 | # If there is more than one source crate, we try and find it by its internal (defining) crate 82 | attributes = m.group(2) 83 | namespaces = re.match(r".*type: \"(.*)\"", attributes) 84 | namespace_list = json.loads(namespaces.group(1)) 85 | internal_crate = set( 86 | [ 87 | ns["path"] 88 | for ns in namespace_list 89 | if ns["symbol"] == "InternalCrate" 90 | ] 91 | ) 92 | 93 | if len(internal_crate) == 1: 94 | print >> sys.stderr, "Found multi source for '" + line + "'" 95 | source_crate = internal_crate.pop() 96 | ufid_to_crate[node_name] = source_crate 97 | # Certain function symbols depend on structs and traits from other packages 98 | external_crates = set( 99 | [ 100 | escape_label(source_crate) 101 | + " -> " 102 | + escape_label(ns["path"]) 103 | for ns in namespace_list 104 | if ns["symbol"] == "ExternalCrate" 105 | ] 106 | ) 107 | for dep in external_crates: 108 | if dep not in crate_dependencies: 109 | crate_dependencies.add(dep) 110 | else: 111 | # We could not find a source crate, hence we cannot embed this node 112 | print >> sys.stderr, "Could not find source for '" + line + "'" 113 | continue 114 | else: 115 | ufid_to_crate[node_name] = source_crate 116 | 117 | else: 118 | g = re.match('\W*"(.*)" -> "(.*)";', line) 119 | if g: 120 | if g.group(1) in ufid_to_crate and g.group(2) in ufid_to_crate: 121 | source = ufid_to_crate[g.group(1)] 122 | target = ufid_to_crate[g.group(2)] 123 | 124 | if source == target: 125 | # It is trivially true that a package depends on itself, skip such dependencies 126 | continue 127 | 128 | dep = escape_label(source) + " -> " + escape_label(target) 129 | if dep not in crate_dependencies: 130 | crate_dependencies.add(dep) 131 | else: 132 | print >> sys.stderr, "Could not find both dependencies '" + g.group( 133 | 1 134 | ) + "' and '" + g.group(2) + "'." 135 | 136 | else: 137 | print >> sys.stderr, "Could not match line '" + line + "'" 138 | 139 | 140 | for node in crates: 141 | print node + ";" 142 | 143 | for edge in crate_dependencies: 144 | print edge + ";" 145 | -------------------------------------------------------------------------------- /py-src/log.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python 3 | # Logging configuration 4 | # 5 | # (c) 2017 - onwards Georgios Gousios 6 | # 7 | # MIT/APACHE licensed -- check LICENSE files in top dir 8 | 9 | import sys 10 | import logging 11 | 12 | logging.basicConfig( 13 | format="%(asctime)s [%(process)d]%(filename)s:%(lineno)d(%(funcName)s) --- %(message)s", 14 | level=logging.DEBUG, 15 | stream=sys.stderr, 16 | ) 17 | 18 | from logging import debug, error, info 19 | -------------------------------------------------------------------------------- /py-src/merge_unified_callgraphs.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python 3 | # Reads-in a non-merged call graph file in Präzi syntax and outputs a merged call graph, with statistical information 4 | # annotated 5 | # 6 | # (c) 2018 - onwards Moritz Beller 7 | # 8 | # MIT/APACHE licensed -- check LICENSE files in top dir 9 | 10 | import sys 11 | import re 12 | import os.path 13 | 14 | nodes = {} 15 | edges = set() 16 | 17 | 18 | def escape_label(label): 19 | return '"' + label + '"' 20 | 21 | 22 | class CGFunction: 23 | """A simple data holder class for function nodes""" 24 | 25 | def __init__(self): 26 | # Number of nodes that are folded in this node, that had an internal crate type 27 | self.nodes_with_internal_crate = 0 28 | # Number of nodes that are folded in this node, that did not have an internal crate type 29 | self.nodes_without_internal_crate = 0 30 | # Number of nodes that are folded in this node, that are not null 31 | self.nodes_not_null = 0 32 | # Number of nodes that are folded in this node, that are null 33 | self.nodes_null = 0 34 | # Number of nodes that are folded in this node, that were flagged as external 35 | self.nodes_external = 0 36 | # Total number of nodes that are folded in this node (must be at least 1). Any number greater than 1 means nodes 37 | # have been merged 38 | self.nodes = 0 39 | self.label = "" 40 | self.type = "" 41 | 42 | def __str__(self): 43 | classMembers = vars(self) 44 | return ( 45 | escape_label(self.label) 46 | + " [" 47 | + ", ".join( 48 | '%s: "%s"' % (item, str(classMembers[item])) 49 | for item in sorted(classMembers) 50 | ) 51 | + "];" 52 | ) 53 | 54 | 55 | file = sys.argv[1] 56 | if not os.path.exists(file): 57 | print >> sys.stderr, file + " does not exist!" 58 | exit(1) 59 | 60 | with open(file) as f: 61 | for line in f: 62 | # example for a Node in Präzi-syntax: "{ core :: num :: < impl usize > :: wrapping_mul }" [shape=record,label="{ core :: num :: < impl usize > :: wrapping_mul }",ext="{False}",null="{False}",type="{[{"path":"core :: num :: < impl usize > :: wrapping_mul","symbol":"RustCrate"},{"path":"usize","symbol":"RustPrimitiveType"}]}"]; 63 | m = re.match('\W*"(.*?)" \[(.*)\];', line) 64 | if m: 65 | node_name = m.group(1) 66 | attributes = m.group(2) 67 | attr_m = re.match( 68 | '.*ext="{(\w+?)}",null="{(\w+?)}",type="{(.+)}"', attributes 69 | ) 70 | 71 | if node_name not in nodes: 72 | nodes[node_name] = CGFunction() 73 | nodes[node_name].label = node_name 74 | 75 | nodes[node_name].nodes += 1 76 | 77 | if attr_m: 78 | if "InternalCrate" in attr_m.group(3): 79 | nodes[node_name].nodes_with_internal_crate += 1 80 | nodes[node_name].type = attr_m.group(3) 81 | else: 82 | nodes[node_name].nodes_without_internal_crate += 1 83 | 84 | if nodes[node_name].type == "": 85 | nodes[node_name].type = attr_m.group(3) 86 | 87 | if attr_m.group(1) == "True": 88 | nodes[node_name].nodes_external += 1 89 | if attr_m.group(2) == "False": 90 | nodes[node_name].nodes_not_null += 1 91 | else: 92 | nodes[node_name].nodes_null += 1 93 | 94 | else: 95 | g = re.match('\W*"(.*)" -> "(.*)";', line) 96 | if g: 97 | edges.add(g.group(0)) 98 | else: 99 | print >> sys.stderr, "Could not match line '" + line + "'" 100 | 101 | total = CGFunction() 102 | 103 | nodes_with_internal_crate = 0 104 | nodes_without_internal_crate = 0 105 | nodes_merged_with_definition_expanded = 0 106 | 107 | for node in nodes: 108 | node = nodes[node] 109 | total.nodes += node.nodes 110 | total.nodes_not_null += node.nodes_not_null 111 | total.nodes_null += node.nodes_null 112 | total.nodes_external += node.nodes_external 113 | if node.nodes_with_internal_crate > 0 and nodes_without_internal_crate > 0: 114 | nodes_merged_with_definition_expanded += 1 115 | elif node.nodes_with_internal_crate > 0: 116 | total.nodes_with_internal_crate += 1 117 | nodes_with_internal_crate += node.nodes_with_internal_crate 118 | elif node.nodes_without_internal_crate > 0: 119 | total.nodes_without_internal_crate += 1 120 | nodes_without_internal_crate += node.nodes_without_internal_crate 121 | print node 122 | 123 | for edge in edges: 124 | print edge 125 | 126 | print >> sys.stderr, "I reduced to " + str(len(nodes)) + " nodes, starting from " + str( 127 | total.nodes 128 | ) + "." 129 | print >> sys.stderr, "Of these, " + str( 130 | total.nodes_not_null 131 | ) + " had a merged non-null node and " + str(total.nodes_null) + " did not." 132 | print >> sys.stderr, str( 133 | total.nodes_with_internal_crate 134 | ) + " internalcrate-type merged nodes and " + str( 135 | total.nodes_without_internal_crate 136 | ) + " without." 137 | print >> sys.stderr, "In total, " + str( 138 | nodes_with_internal_crate 139 | ) + " internalcrate-type nodes were merged and " + str( 140 | total.nodes_without_internal_crate 141 | ) + " without." 142 | print >> sys.stderr, "nodes_merged_with_definition_expanded: " + str( 143 | nodes_merged_with_definition_expanded 144 | ) 145 | print >> sys.stderr, "Moreover, " + str( 146 | total.nodes_external 147 | ) + " had an external merged node!" 148 | -------------------------------------------------------------------------------- /py-src/prepare_unified_callgraphs.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python 3 | # Reads-in an LLVM opt-generated, Präzi-annotated "dot file" and outputs it such that it can easily be merged 4 | # 5 | # (c) 2018 - onwards Moritz Beller 6 | # 7 | # MIT/APACHE licensed -- check LICENSE files in top dir 8 | import sys 9 | import re 10 | import pprint 11 | import os.path 12 | 13 | node_to_label = {} 14 | lines = [] 15 | 16 | 17 | def escape_label(label): 18 | return '"' + label + '"' 19 | 20 | 21 | file = sys.argv[1] 22 | if not os.path.exists(file): 23 | print >> sys.stderr, file + " does not exist!" 24 | exit(1) 25 | 26 | with open(file) as f: 27 | for line in f: 28 | # example for a Node in Präzi-syntax: Node0x580c640 [shape=record,label="{ core :: num :: < impl usize > :: wrapping_mul }",ext="{False}",null="{False}",type="{[{"path":"core :: num :: < impl usize > :: wrapping_mul","symbol":"RustCrate"},{"path":"usize","symbol":"RustPrimitiveType"}]}"]; 29 | m = re.match('\W*(Node0x.*) \[(.*),label="\{ (.*?) \}"(.*)];', line) 30 | if m: 31 | node_to_label[m.group(1)] = m.group(3) 32 | print escape_label(m.group(3)) + " [" + m.group(2) + m.group(4) + "];" 33 | else: 34 | no_wspace = re.match('\W*(Node0x.*) \[(.*),label="\{(.*?)\}"(.*)];', line) 35 | if no_wspace: 36 | node_to_label[no_wspace.group(1)] = no_wspace.group(3) 37 | print escape_label(no_wspace.group(3)) + " [" + no_wspace.group( 38 | 2 39 | ) + no_wspace.group(4) + "];" 40 | else: 41 | lines.append(line.rstrip("\n")) 42 | 43 | for line in lines: 44 | g = re.match("\W*(Node0x.*) -> (Node0x.*);", line) 45 | if g: 46 | label1 = node_to_label[g.group(1)] 47 | label2 = node_to_label[g.group(2)] 48 | print escape_label(label1) + " -> " + escape_label(label2) + ";" 49 | -------------------------------------------------------------------------------- /py-src/prune_cg.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python 3 | # Removing external and null nodes from an LLVM CG 4 | # 5 | # (c) 2017 - onwards Joseph Hejderup 6 | # 7 | # MIT/APACHE licensed -- check LICENSE files in top dir 8 | import re 9 | from log import * 10 | 11 | ### 12 | ### Regex definitions 13 | ### 14 | re_node_pattern = re.compile(r"\W*(Node0x.*) \[.*,label=\"\{(.*)\}\"\];") 15 | re_edge_pattern = re.compile(r"\W*(Node0x.*) -> (Node0x.*);") 16 | 17 | 18 | def parse(dotfile): 19 | edges = [] # [...(Node0xSSS, Node0xDDD),(Node0xAAA, Node0xCCC),..] 20 | nodes = {} # Node0x -> core::print... 21 | external_node = None 22 | 23 | with open(dotfile, "r") as dot: 24 | for line in dot: 25 | try_parse_node = re.search(re_node_pattern, line) 26 | if try_parse_node: 27 | if not nodes.has_key(try_parse_node.group(1)): 28 | nodes[try_parse_node.group(1)] = try_parse_node.group(2) 29 | if try_parse_node.group(2) == "external node": 30 | external_node = try_parse_node.group(1) 31 | else: 32 | info( 33 | "%s already exist in the lookup table, here is the diff in value %s - %s", 34 | try_parse_node.group(1), 35 | nodes[try_parse_node.group(1)], 36 | try_parse_node.group(2), 37 | ) 38 | else: 39 | try_parse_edge = re.search(re_edge_pattern, line) 40 | if try_parse_edge: 41 | edges.append((try_parse_edge.group(1), try_parse_edge.group(2))) 42 | if not nodes: 43 | info("%s: no nodes", dotfile) 44 | sys.exit() 45 | return edges, nodes, external_node 46 | 47 | 48 | def checkEqual(lst): 49 | return not lst or [lst[0]] * len(lst) == lst 50 | 51 | 52 | def find_null_node(nodes, edges): 53 | """ 54 | The null node does not have a node definition and has a node id that does not exist in the node lookup table 55 | """ 56 | null_nodes = [edge[1] for edge in edges if not nodes.has_key(edge[1])] 57 | 58 | if not null_nodes: 59 | info("there are no null nodes!") 60 | return None 61 | if not checkEqual(null_nodes): 62 | error("there are more null nodes, should only be one!") 63 | sys.exit() 64 | return null_nodes[0] 65 | 66 | 67 | def process(dotfolder, filename): 68 | edges, nodes, external_node = parse(dotfolder + "/" + filename) 69 | null_node = find_null_node(nodes, edges) 70 | pruned_edges = [] 71 | null_nodes = [] 72 | external_nodes = [] 73 | 74 | # Node0xAAA -> null node, external node -> Node0xAAA 75 | for edge in edges: 76 | if (edge[1] != null_node) and (edge[0] != external_node): 77 | pruned_edges.append(edge) 78 | if edge[1] == null_node: 79 | null_nodes.append(edge[0]) 80 | if edge[0] == external_node: 81 | external_nodes.append(edge[1]) 82 | f = open(dotfolder + "/callgraph.unmangled.pruned.graph", "w") 83 | f.write('digraph "Call graph" {\n') 84 | for key, value in nodes.iteritems(): 85 | if not value == "external node": 86 | f.write( 87 | "\t" 88 | + key 89 | + ' [shape=record,label="{' 90 | + value 91 | + '}",ext="{' 92 | + str(key in external_nodes) 93 | + '}",null="{' 94 | + str(key in null_nodes) 95 | + '}"];\n' 96 | ) 97 | for edge in pruned_edges: 98 | f.write("\t" + edge[0] + " -> " + edge[1] + ";\n") 99 | f.write("}\n") 100 | f.close() 101 | 102 | 103 | process(sys.argv[1], sys.argv[2]) 104 | -------------------------------------------------------------------------------- /src/bin/prazi.rs: -------------------------------------------------------------------------------- 1 | // Download package sources from crates.io, validates build manifests and construct LLVM call graphs 2 | // 3 | // (c) 2018 - onwards Joseph Hejderup 4 | // 5 | // MIT/APACHE licensed -- check LICENSE files in top dir 6 | extern crate clap; 7 | extern crate crates_index; 8 | extern crate flate2; 9 | extern crate futures; 10 | extern crate reqwest; 11 | extern crate serde_json; 12 | extern crate tar; 13 | extern crate tokio_core; 14 | #[macro_use] 15 | extern crate lazy_static; 16 | extern crate glob; 17 | extern crate ini; 18 | extern crate rayon; 19 | 20 | use clap::{App, Arg, SubCommand}; 21 | use crates_index::Index; 22 | use flate2::read::GzDecoder; 23 | use futures::{stream, Future, Stream}; 24 | use glob::glob; 25 | use ini::Ini; 26 | use rayon::prelude::*; 27 | use reqwest::r#async::{Client, Decoder}; 28 | use serde_json::{Error, Value}; 29 | use tar::Archive; 30 | 31 | use std::fs; 32 | use std::fs::File; 33 | use std::path::Path; 34 | use std::path::PathBuf; 35 | use std::process::Command; 36 | 37 | static CRATES_ROOT: &str = "https://crates-io.s3-us-west-1.amazonaws.com/crates"; 38 | 39 | lazy_static! { 40 | static ref CONFIG: Ini = { 41 | let dir = env!("CARGO_MANIFEST_DIR"); 42 | let conf = Ini::load_from_file(format!("{0}/{1}", dir, "conf.ini")).unwrap(); 43 | conf 44 | }; 45 | static ref PRAZI_DIR: String = { 46 | CONFIG 47 | .section(Some("storage")) 48 | .unwrap() 49 | .get("path") 50 | .unwrap() 51 | .to_string() 52 | }; 53 | } 54 | 55 | #[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Clone)] 56 | pub struct PraziCrate { 57 | pub name: String, 58 | pub version: String, 59 | } 60 | 61 | impl PraziCrate { 62 | pub fn url_src(&self) -> String { 63 | format!( 64 | "{0}/{1}/{1}-{2}.crate", 65 | CRATES_ROOT, self.name, self.version 66 | ) 67 | } 68 | 69 | pub fn dir(&self) -> String { 70 | format!( 71 | "{0}/crates/reg/{1}/{2}", 72 | &**PRAZI_DIR, self.name, self.version 73 | ) 74 | } 75 | 76 | pub fn dir_src(&self) -> String { 77 | format!("{0}/crates/reg/{1}", &**PRAZI_DIR, self.name) 78 | } 79 | 80 | pub fn has_bitcode(&self) -> bool { 81 | let res = glob(format!("{}/target/debug/deps/*.bc", self.dir()).as_str()) 82 | .expect("Failed to read glob pattern") 83 | .map(|v| v.is_ok()) 84 | .collect::>(); 85 | res.len() == 1 86 | } 87 | 88 | pub fn bitcode_path(&self) -> PathBuf { 89 | let res = glob(format!("{}/target/debug/deps/*.bc", self.dir()).as_str()) 90 | .expect("Failed to read glob pattern") 91 | .filter(|v| v.is_ok()) 92 | .map(|v| v.unwrap()) 93 | .collect::>(); 94 | res[0].to_path_buf() 95 | } 96 | } 97 | 98 | pub(crate) struct Registry { 99 | pub list: Vec, 100 | } 101 | 102 | type Result = std::result::Result>; 103 | 104 | const N: usize = 5; 105 | 106 | impl Registry { 107 | fn read(&mut self) { 108 | let index = Index::new(format!("{}/_index", &**PRAZI_DIR)); 109 | if !index.exists() { 110 | index 111 | .retrieve() 112 | .expect("Could not retrieve crates.io index"); 113 | } 114 | for krate in index.crates() { 115 | for version in krate.versions().iter().rev() { 116 | //we also consider yanked versions 117 | self.list.push(PraziCrate { 118 | name: krate.name().to_string(), 119 | version: version.version().to_string(), 120 | }); 121 | } 122 | } 123 | } 124 | 125 | fn update(&mut self) { 126 | let index = Index::new(format!("{}/_index", &**PRAZI_DIR)); 127 | index.retrieve_or_update().expect("should not fail"); 128 | for krate in index.crates() { 129 | for version in krate.versions().iter().rev() { 130 | //we also consider yanked versions 131 | self.list.push(PraziCrate { 132 | name: krate.name().to_string(), 133 | version: version.version().to_string(), 134 | }); 135 | } 136 | } 137 | } 138 | 139 | fn download_src(&self) -> Result<()> { 140 | let mut core = tokio_core::reactor::Core::new()?; 141 | let client = Client::new(); 142 | let responses = stream::iter_ok(self.list.iter().cloned()) 143 | .map(|krate| { 144 | client 145 | .get(&krate.url_src()) 146 | .send() 147 | .and_then(|mut res| { 148 | std::mem::replace(res.body_mut(), Decoder::empty()).concat2() 149 | }).map(move |body| { 150 | let mut archive = Archive::new(GzDecoder::new(body.as_ref())); 151 | let tar_dir = krate.dir_src(); 152 | let dst_dir = krate.dir(); 153 | archive.unpack(&tar_dir).unwrap(); 154 | fs::rename( 155 | format!("/{0}/{1}-{2}", &tar_dir, krate.name, krate.version), 156 | &dst_dir, 157 | ).unwrap(); 158 | println!("Untared: {:?}", &krate.url_src()); 159 | }) 160 | }).buffer_unordered(N); 161 | let work = responses.for_each(|_| Ok(())); 162 | core.run(work)?; 163 | Ok(()) 164 | } 165 | 166 | fn validate_manifests(&self) { 167 | self.list.par_iter().for_each(|krate| { 168 | let dir = krate.dir(); 169 | if Path::new(&dir).exists() { 170 | let output = Command::new("cargo") 171 | .arg("read-manifest") 172 | .current_dir(dir) 173 | .output() 174 | .expect("failed to execute read-manifest"); 175 | 176 | if output.status.success() { 177 | // println!("Valid manifest"); 178 | let data = String::from_utf8_lossy(&output.stdout); 179 | let v: Value = serde_json::from_str(&data).unwrap(); 180 | let targets = v["targets"].as_array().unwrap(); 181 | for target in targets.iter() { 182 | for t in target["crate_types"].as_array().unwrap().iter() { 183 | // println!("crate_type: {}", t); 184 | } 185 | } 186 | } else { 187 | println!("Not valid manifest"); 188 | println!("stderr: {}", String::from_utf8_lossy(&output.stderr)); 189 | } 190 | } 191 | }); 192 | } 193 | 194 | fn rewrite_manifests(&self) { 195 | self.list.par_iter().for_each(|krate| { 196 | let dir = krate.dir(); 197 | if Path::new(&dir).exists() && !Path::new(&format!("{}/Cargo.toml.orig", &dir)).exists() 198 | { 199 | let output = Command::new("cargo") 200 | .arg("publish") 201 | .args(&["--no-verify", "--dry-run", "--allow-dirty"]) 202 | .current_dir(&dir) 203 | .output() 204 | .expect("failed to execute dry-run publish"); 205 | 206 | if output.status.success() { 207 | let new_file = format!( 208 | "{0}/target/package/{1}-{2}.crate", 209 | &dir, krate.name, krate.version 210 | ); 211 | if Path::new(&new_file).exists() { 212 | let data = File::open(&new_file).unwrap(); 213 | let decompressed = GzDecoder::new(data); 214 | let mut archive = Archive::new(decompressed); 215 | let tar_dir = krate.dir_src(); 216 | let dst_dir = krate.dir(); 217 | archive.unpack(&tar_dir).unwrap(); 218 | fs::remove_dir_all(&dst_dir).unwrap(); 219 | fs::rename( 220 | format!("/{0}/{1}-{2}", &tar_dir, krate.name, krate.version), 221 | &dst_dir, 222 | ).unwrap(); 223 | println!("Repackaged: {:?}", &krate.url_src()); 224 | } 225 | } else { 226 | println!("Package not publishable with the running Cargo version"); 227 | } 228 | } 229 | }); 230 | } 231 | 232 | fn compile(&self, nightly: bool) { 233 | let mut rustup_args = vec!["run"]; 234 | let version = if nightly { 235 | rustup_args.push("nightly"); 236 | println!("running nightly compiler"); 237 | CONFIG 238 | .section(Some("compiler")) 239 | .unwrap() 240 | .get("nightly") 241 | .unwrap() 242 | } else { 243 | println!("running stable compiler"); 244 | CONFIG 245 | .section(Some("compiler")) 246 | .unwrap() 247 | .get("stable") 248 | .unwrap() 249 | }; 250 | 251 | self.list.par_iter().for_each(|krate| { 252 | let dir = krate.dir(); 253 | if Path::new(&dir).exists() { 254 | let output = Command::new("rustup") 255 | .args(&rustup_args) 256 | .arg(version) 257 | .args(&["cargo", "rustc", "--lib", "--", "--emit=llvm-bc"]) 258 | .current_dir(&dir) 259 | .output() 260 | .expect("failed to execute cargo build"); 261 | if output.status.success() { 262 | println!("build done!"); 263 | } else { 264 | println!("build failed"); 265 | println!("stderr: {}", String::from_utf8_lossy(&output.stderr)); 266 | } 267 | } 268 | }); 269 | } 270 | 271 | fn build_callgraph(&self) { 272 | let llvm_path = CONFIG.section(Some("llvm")).unwrap().get("path").unwrap(); 273 | self.list.par_iter().for_each(|krate| { 274 | let dir = krate.dir(); 275 | if krate.has_bitcode() { 276 | let output = Command::new(format!("{}/bin/opt", llvm_path)) 277 | .current_dir(&dir) 278 | .arg("-dot-callgraph") 279 | .arg(krate.bitcode_path()) 280 | .output() 281 | .expect("failed to execute llvm opt"); 282 | if output.status.success() { 283 | println!("callgraph built: {:?}", krate); 284 | } else { 285 | println!("callgraph failed failed"); 286 | println!("stderr: {}", String::from_utf8_lossy(&output.stderr)); 287 | } 288 | } else { 289 | println!("no bitcode: {:?}", krate) 290 | } 291 | }); 292 | } 293 | } 294 | 295 | fn main() { 296 | let mut reg = Registry { list: Vec::new() }; 297 | 298 | let matches = App::new("rustprazi") 299 | .version("0.1.0") 300 | .about("Rustpräzi: generate call-based dependency networks of crates.io registry") 301 | .arg(Arg::with_name("update").long("update").help("Update index")) 302 | .subcommand(SubCommand::with_name("download").about("download registry crate sources")) 303 | .subcommand(SubCommand::with_name("validate").about("validate Cargo.toml files")) 304 | .subcommand( 305 | SubCommand::with_name("rewrite") 306 | .about("rewrite Cargo.toml to remove local Path dependencies"), 307 | ).subcommand( 308 | SubCommand::with_name("build-callgraphs") 309 | .about("construct Crate-wide LLVM callgraphss"), 310 | ).subcommand( 311 | SubCommand::with_name("build-crates") 312 | .about("build all crates") 313 | .arg( 314 | Arg::with_name("nightly") 315 | .long("nightly") 316 | .short("n") 317 | .help("run nightly compiler"), 318 | ), 319 | ).get_matches(); 320 | 321 | if matches.is_present("update") { 322 | reg.update(); 323 | println!("Done with updating!"); 324 | } 325 | 326 | if let Some(matches) = matches.subcommand_matches("download") { 327 | reg.read(); 328 | reg.download_src().unwrap(); 329 | println!("Done with downloading!"); 330 | } 331 | 332 | if let Some(matches) = matches.subcommand_matches("validate") { 333 | reg.read(); 334 | reg.validate_manifests(); 335 | } 336 | 337 | if let Some(matches) = matches.subcommand_matches("rewrite") { 338 | reg.read(); 339 | reg.rewrite_manifests(); 340 | } 341 | 342 | if let Some(matches) = matches.subcommand_matches("build-callgraphs") { 343 | reg.read(); 344 | reg.build_callgraph(); 345 | } 346 | 347 | if let Some(matches) = matches.subcommand_matches("build-crates") { 348 | reg.read(); 349 | if matches.is_present("nightly") { 350 | reg.compile(true); 351 | } else { 352 | reg.compile(false); 353 | } 354 | } 355 | } 356 | -------------------------------------------------------------------------------- /src/bin/ufi.rs: -------------------------------------------------------------------------------- 1 | // Constructs a unique function identifier for Präzi 2 | // 3 | // (c) 2018 - onwards Joseph Hejderup 4 | // 5 | // MIT/APACHE licensed -- check LICENSE files in top dir 6 | extern crate cargo; 7 | extern crate filebuffer; 8 | extern crate quote; 9 | extern crate syn; 10 | #[macro_use] 11 | extern crate lazy_static; 12 | extern crate regex; 13 | #[macro_use] 14 | extern crate serde_json; 15 | #[macro_use] 16 | extern crate serde_derive; 17 | extern crate ini; 18 | 19 | use cargo::core::resolver::Resolve; 20 | use cargo::core::Package; 21 | use cargo::core::Workspace; 22 | use cargo::ops::load_pkg_lockfile; 23 | use cargo::util::Config; 24 | use filebuffer::FileBuffer; 25 | use ini::Ini; 26 | use quote::ToTokens; 27 | use regex::Regex; 28 | use std::collections::HashMap; 29 | use std::env; 30 | use std::fs::OpenOptions; 31 | use std::io::prelude::*; 32 | use std::path::Path; 33 | use std::path::PathBuf; 34 | use std::str; 35 | use syn::{Ident, PathSegment}; 36 | 37 | lazy_static! { 38 | static ref CONFIG: Ini = { 39 | let dir = env!("CARGO_MANIFEST_DIR"); 40 | let conf = Ini::load_from_file(format!("{0}/{1}", dir, "conf.ini")).unwrap(); 41 | conf 42 | }; 43 | static ref PRAZI_DIR: String = { 44 | CONFIG 45 | .section(Some("storage")) 46 | .unwrap() 47 | .get("path") 48 | .unwrap() 49 | .to_string() 50 | }; 51 | } 52 | 53 | fn is_a_node(text: &str) -> bool { 54 | lazy_static! { 55 | static ref RE: Regex = Regex::new(r"(Node.*?) \[").unwrap(); 56 | } 57 | RE.is_match(text) 58 | } 59 | 60 | fn extract_node_data(text: &str) -> Vec<&str> { 61 | lazy_static! { 62 | static ref RE: Regex = Regex::new(r#""\{(.*?)\}""#).unwrap(); 63 | } 64 | RE.captures_iter(text) 65 | .map(|caps| caps.get(1).map_or("", |m| m.as_str())) 66 | .collect::>() 67 | } 68 | 69 | fn build_valid_rust_ident(text: &str) -> String { 70 | let colon2 = str::replace(text, ".", "_"); 71 | let c = str::replace(colon2.as_str(), "-", "_"); 72 | str::replace(c.as_str(), "+", "_") 73 | } 74 | 75 | fn read_ws(path: &str) -> Result<(Package, Resolve), cargo::CargoError> { 76 | let config = Config::default().expect("Using Default config"); 77 | let ws = Workspace::new(Path::new(path), &config)?; 78 | let pkg = ws.current()?; 79 | let lock_file = load_pkg_lockfile(&ws)?; 80 | Ok((pkg.clone(), lock_file.unwrap())) 81 | } 82 | 83 | fn read_lib_name_from_path(path: &str) -> Result { 84 | let config = Config::default().expect("Should have config file"); 85 | let ws = Workspace::new(Path::new(path), &config)?; 86 | let targets = ws 87 | .current() 88 | .unwrap() 89 | .targets() 90 | .into_iter() 91 | .filter(|target| target.is_lib() || target.is_dylib() || target.is_cdylib()) 92 | .collect::>(); 93 | Ok(targets[0].name().to_string()) 94 | } 95 | 96 | fn read_lib_name_from_pkg(pkg: &Package) -> Result { 97 | let targets = pkg 98 | .targets() 99 | .into_iter() 100 | .filter(|target| target.is_lib() || target.is_dylib() || target.is_cdylib()) 101 | .collect::>(); 102 | Ok(targets[0].name().to_string()) 103 | } 104 | 105 | #[derive(Debug, Clone)] 106 | struct PkgIdentifier { 107 | pkg_name: String, 108 | lib_name: String, 109 | version: String, 110 | } 111 | 112 | impl PkgIdentifier { 113 | fn new(pkg_name: &str, lib_name: &str, version: &str) -> PkgIdentifier { 114 | PkgIdentifier { 115 | pkg_name: str::replace(pkg_name, "-", "_"), 116 | lib_name: str::replace(lib_name, "-", "_"), 117 | version: version.to_string(), 118 | } 119 | } 120 | 121 | fn pkg_name(&self) -> &str { 122 | &self.pkg_name 123 | } 124 | 125 | fn lib_name(&self) -> &str { 126 | &self.lib_name 127 | } 128 | 129 | fn version(&self) -> &str { 130 | &self.version 131 | } 132 | } 133 | 134 | fn fetch_deps( 135 | cargo_toml_path: &str, 136 | ) -> Result<(PkgIdentifier, Vec>), cargo::CargoError> { 137 | let (pkg, lock_file) = read_ws(cargo_toml_path)?; 138 | let int_lib_name = read_lib_name_from_pkg(&pkg)?; 139 | Ok(( 140 | PkgIdentifier::new( 141 | &pkg.name(), 142 | int_lib_name.as_str(), 143 | pkg.version().to_string().as_str(), 144 | ), 145 | lock_file 146 | .iter() 147 | .map(|dep| { 148 | let cargo_toml_path = format!( 149 | "{0}/crates/reg/{1}/{2}/Cargo.toml", 150 | &**PRAZI_DIR, 151 | dep.name(), 152 | dep.version() 153 | ); 154 | let lib_name = read_lib_name_from_path(cargo_toml_path.as_str()); 155 | if let Err(e) = lib_name { 156 | Err(e) 157 | } else { 158 | Ok(PkgIdentifier::new( 159 | &dep.name(), 160 | lib_name.unwrap().as_str(), 161 | dep.version().to_string().as_str(), 162 | )) 163 | } 164 | }).collect::>>(), 165 | )) 166 | } 167 | 168 | fn is_rust_crate_ident(input: &str) -> bool { 169 | input == "alloc" 170 | || input == "core" 171 | || input == "proc_macro" 172 | || input == "std" 173 | || input == "std_unicode" 174 | } 175 | 176 | //https://github.com/rust-lang/rust/blob/5430c0c5c0fbdfb8e89358a187d2f9a8d4b796d4/src/librustc_trans/back/symbol_export.rs 177 | fn is_rust_internal_symbol(input: &str) -> bool { 178 | input.starts_with("__rust_") 179 | || input.starts_with("__rdl_") 180 | || input.starts_with("rust_eh_") 181 | || input.starts_with("__rustc_derive_registrar") 182 | } 183 | 184 | fn is_llvm_symbol(input: &str) -> bool { 185 | input.starts_with("llvm.") 186 | } 187 | 188 | fn is_rust_type(input: &str) -> bool { 189 | input == "bool" 190 | || input == "u8" 191 | || input == "u16" 192 | || input == "u32" 193 | || input == "u64" 194 | || input == "i8" 195 | || input == "i16" 196 | || input == "i32" 197 | || input == "i64" 198 | || input == "binary32" 199 | || input == "binary64" 200 | || input == "f32" 201 | || input == "f64" 202 | || input == "usize" 203 | || input == "isize" 204 | || input == "char" 205 | || input == "String" 206 | || input == "str" 207 | } 208 | 209 | #[derive(Debug, Clone, Serialize, Deserialize)] 210 | enum Symbol { 211 | InternalCrate, 212 | ExternalCrate, 213 | RustCrate, 214 | LLVMSymbol, 215 | RustSymbol, 216 | Unknown { reason: SymbolError }, 217 | ExportedSymbol, //basically C symbol 218 | RustPrimitiveType, 219 | } 220 | 221 | #[derive(Debug, Clone, Serialize, Deserialize)] 222 | enum SymbolError { 223 | NoDepFound, 224 | ParseErrorAST, //use this one for parsing into AST error 225 | } 226 | 227 | #[derive(Debug, Clone, Serialize, Deserialize)] 228 | struct NamespacePath { 229 | path: String, 230 | symbol: Symbol, 231 | } 232 | 233 | #[derive(Debug, Clone)] 234 | struct PathVisitor<'a> { 235 | dependencies: &'a HashMap, 236 | krate: &'a PkgIdentifier, 237 | namespaces: Vec, 238 | update_qself_pos: bool, 239 | } 240 | 241 | impl<'a> syn::visit_mut::VisitMut for PathVisitor<'a> { 242 | fn visit_expr_path_mut(&mut self, _i: &mut syn::ExprPath) { 243 | for it in &mut _i.attrs { 244 | self.visit_attribute_mut(it) 245 | } 246 | if let Some(ref mut it) = _i.qself { 247 | self.visit_qself_mut(it) 248 | }; 249 | self.visit_path_mut(&mut _i.path); 250 | 251 | if let Some(ref mut qself) = _i.qself { 252 | //about the position https://docs.rs/syn/0.13.1/syn/struct.QSelf.html 253 | if self.update_qself_pos == true { 254 | qself.position += 4; // len(io :: crates :: pkg_name :: pkg_ver) 255 | } 256 | } 257 | } 258 | 259 | fn visit_type_path_mut(&mut self, _i: &mut syn::TypePath) { 260 | if let Some(ref mut it) = _i.qself { 261 | self.visit_qself_mut(it) 262 | }; 263 | 264 | if let Some(ref mut it) = _i.qself { 265 | self.visit_qself_mut(it) 266 | }; 267 | self.visit_path_mut(&mut _i.path); 268 | 269 | if let Some(ref mut qself) = _i.qself { 270 | //about the position https://docs.rs/syn/0.13.1/syn/struct.QSelf.html 271 | if self.update_qself_pos == true { 272 | qself.position += 4; // len(io :: crates :: pkg_name :: pkg_ver) 273 | } 274 | } 275 | } 276 | 277 | fn visit_qself_mut(&mut self, qself: &mut syn::QSelf) { 278 | self.visit_type_mut(&mut *qself.ty); 279 | //about the position https://docs.rs/syn/0.13.1/syn/struct.QSelf.html 280 | if self.update_qself_pos == true { 281 | qself.position += 4; // len(io :: crates :: pkg_name :: pkg_ver) 282 | } 283 | } 284 | 285 | fn visit_path_mut(&mut self, path: &mut syn::Path) { 286 | let namespace_ast = path.clone(); 287 | let first_segment = namespace_ast.segments.first().unwrap(); 288 | let first_seg_ident = first_segment.value().ident.as_ref(); 289 | if path.leading_colon.is_none() { 290 | //fn symbols that are just names and no :: (e.g namespace) 291 | if path.segments.len() == 1 && first_segment.punct().is_none() { 292 | if is_rust_internal_symbol(first_seg_ident) { 293 | self.namespaces.push(NamespacePath { 294 | path: path.clone().into_tokens().to_string(), 295 | symbol: Symbol::RustSymbol, 296 | }); 297 | return; 298 | } 299 | if is_llvm_symbol(first_seg_ident) { 300 | self.namespaces.push(NamespacePath { 301 | path: path.clone().into_tokens().to_string(), 302 | symbol: Symbol::LLVMSymbol, 303 | }); 304 | return; 305 | } 306 | if is_rust_type(first_seg_ident) { 307 | self.namespaces.push(NamespacePath { 308 | path: path.clone().into_tokens().to_string(), 309 | symbol: Symbol::RustPrimitiveType, 310 | }); 311 | return; 312 | } 313 | self.namespaces.push(NamespacePath { 314 | path: path.clone().into_tokens().to_string(), 315 | symbol: Symbol::ExportedSymbol, 316 | }); 317 | return; 318 | } else { 319 | //fn symbols that have namespaces: rust crates or non-rust crates 320 | if is_rust_crate_ident(first_seg_ident) { 321 | self.namespaces.push(NamespacePath { 322 | path: path.clone().into_tokens().to_string(), 323 | symbol: Symbol::RustCrate, 324 | }); 325 | } else { 326 | let pkg_name = self.dependencies.get(first_seg_ident); 327 | if pkg_name.is_some() { 328 | self.update_qself_pos = true; 329 | let id = pkg_name.unwrap(); 330 | path.segments.insert( 331 | 0, 332 | PathSegment::from(Ident::from(format!( 333 | "v_{}", 334 | build_valid_rust_ident(id.1.as_ref()) 335 | ))), 336 | ); 337 | path.segments.insert( 338 | 0, 339 | PathSegment::from(Ident::from(build_valid_rust_ident(id.0.as_ref()))), 340 | ); 341 | path.segments 342 | .insert(0, PathSegment::from(Ident::from("crates"))); 343 | path.segments 344 | .insert(0, PathSegment::from(Ident::from("io"))); 345 | if first_seg_ident == self.krate.lib_name() { 346 | self.namespaces.push(NamespacePath { 347 | path: format!( 348 | "io :: crates :: {} :: v_{}", 349 | build_valid_rust_ident(id.0.as_ref()), 350 | build_valid_rust_ident(id.1.as_ref()) 351 | ), 352 | symbol: Symbol::InternalCrate, 353 | }); 354 | } else { 355 | self.namespaces.push(NamespacePath { 356 | path: format!( 357 | "io :: crates :: {} :: v_{}", 358 | build_valid_rust_ident(id.0.as_ref()), 359 | build_valid_rust_ident(id.1.as_ref()) 360 | ), 361 | symbol: Symbol::ExternalCrate, 362 | }); 363 | } 364 | } else { 365 | eprintln!( 366 | "({},{}):SymbolError::NoDepFound:{}:{:?}:{:?}", 367 | self.krate.pkg_name(), 368 | self.krate.version(), 369 | path.clone().into_tokens().to_string(), 370 | self.krate, 371 | self.dependencies 372 | ); 373 | self.namespaces.push(NamespacePath { 374 | path: path.clone().into_tokens().to_string(), 375 | symbol: Symbol::Unknown { 376 | reason: SymbolError::NoDepFound, 377 | }, 378 | }); 379 | } 380 | } 381 | } 382 | } 383 | //visit each segment to discover more namespaces e.g Paths in ItemImpl, etc 384 | for mut el in syn::punctuated::Punctuated::pairs_mut(&mut path.segments) { 385 | let it = el.value_mut(); 386 | self.visit_path_segment_mut(it) 387 | } 388 | } 389 | } 390 | 391 | fn ufify( 392 | pkg: &PkgIdentifier, 393 | lookup: &HashMap, 394 | fn_signature: &str, 395 | ) -> Option<(String, Vec)> { 396 | let syntax_tree = syn::parse_file(&fn_signature); 397 | if let Ok(mut ast) = syntax_tree { 398 | let mut visitor = PathVisitor { 399 | dependencies: lookup, 400 | krate: pkg, 401 | namespaces: Vec::new(), 402 | update_qself_pos: false, 403 | }; 404 | // println!("before: {}", visitor.update_qself_pos); 405 | // println!("{:#?}", ast); 406 | syn::visit_mut::visit_file_mut(&mut visitor, &mut ast); 407 | // println!("after: {}", visitor.update_qself_pos); 408 | // println!("{:#?}", ast); 409 | Some((ast.into_tokens().to_string(), visitor.namespaces.clone())) 410 | } else { 411 | eprintln!( 412 | "({},{}): could not parse into AST:{}:{}", 413 | pkg.pkg_name(), 414 | pkg.version(), 415 | fn_signature, 416 | syntax_tree.unwrap_err() 417 | ); 418 | None 419 | } 420 | } 421 | 422 | fn make_lookup_table( 423 | cargo_dir: &PathBuf, 424 | ) -> Option<(PkgIdentifier, HashMap)> { 425 | let cargo_file = cargo_dir.join("Cargo.toml"); 426 | let deps = fetch_deps(cargo_file.as_path().to_str().unwrap()); 427 | if let Err(e) = deps { 428 | //This failed because we couldnt load the Cargo.toml/ws for the crate 429 | //Terminate here 430 | eprintln!("(error, no Cargo.toml):{:?}:{}", cargo_dir, e); 431 | None 432 | } else { 433 | let (internal, external) = deps.unwrap(); 434 | let mut lookup: HashMap = external 435 | .into_iter() 436 | .map(|dep| { 437 | if dep.is_err() { 438 | None 439 | } else { 440 | let depz = dep.unwrap(); 441 | Some(depz) 442 | } 443 | }).fold(HashMap::new(), |mut map, d| { 444 | if let Some(dep) = d { 445 | map.insert( 446 | dep.lib_name().to_string(), 447 | (dep.pkg_name().to_string(), dep.version().to_string()), 448 | ); 449 | } 450 | map 451 | }); 452 | lookup.insert( 453 | internal.lib_name().to_string(), 454 | ( 455 | internal.pkg_name().to_string(), 456 | internal.version().to_string(), 457 | ), 458 | ); 459 | println!( 460 | "({},{}): lookup table constructed with following entries: {:?}", 461 | internal.pkg_name(), 462 | internal.version(), 463 | lookup 464 | ); 465 | Some((internal, lookup)) 466 | } 467 | } 468 | 469 | fn main() { 470 | // 471 | // Create lookup table (fetching dependency data) 472 | // 473 | let base = PathBuf::from(env::args().nth(1).unwrap().as_str()); 474 | let lookup = make_lookup_table(&base); 475 | if lookup.is_none() { 476 | //no lookup table -> exit 477 | return; 478 | } 479 | let (pkg, dep_table) = lookup.unwrap(); 480 | let dot_file = base.join("callgraph.unmangled.pruned.graph"); 481 | let fbuffer = FileBuffer::open(&dot_file); 482 | if let Err(e) = fbuffer { 483 | eprintln!( 484 | "({},{}): missing callgraph file or empty callgraph!: {:?}:{}", 485 | pkg.pkg_name(), 486 | pkg.version(), 487 | dot_file, 488 | e 489 | ); //no cg -> exit 490 | return; 491 | } 492 | let file_buf = fbuffer.unwrap(); 493 | let buffer = str::from_utf8(&file_buf).expect("not valid UTF-8"); 494 | let mut file = OpenOptions::new() 495 | .write(true) 496 | .create(true) 497 | .open(base.join("callgraph.ufi.graph")) 498 | .unwrap(); 499 | buffer.lines().for_each(|line| { 500 | if is_a_node(line) { 501 | let node_data = extract_node_data(line); 502 | let label = format!("fn placeholder() {{ {} }}", node_data[0]); 503 | let ufied_symbol = ufify(&pkg, &dep_table, &label); 504 | if ufied_symbol.is_none() { 505 | let ns = [NamespacePath { 506 | path: node_data[0].to_string(), 507 | symbol: Symbol::Unknown { 508 | reason: SymbolError::ParseErrorAST, 509 | }, 510 | }]; 511 | if let Err(e) = writeln!( 512 | file, 513 | "{},type=\"{{{}}}\"];", 514 | line.split_at(line.len() - 2).0, 515 | json!(ns).to_string() 516 | ) { 517 | eprintln!("Couldn't write to file: {}", e); 518 | } 519 | return; 520 | } 521 | let (new_symbol, stats) = ufied_symbol.unwrap(); 522 | let (ufi_1, _right_brace) = new_symbol.split_at(new_symbol.len() - 1); 523 | let (_fn_main, ufi_2) = ufi_1.split_at(20); 524 | let new_line = str::replace(&line, node_data[0], ufi_2); 525 | if let Err(e) = writeln!( 526 | file, 527 | "{},type=\"{{{}}}\"];", 528 | new_line.split_at(new_line.len() - 2).0, 529 | json!(stats).to_string() 530 | ) { 531 | eprintln!("Couldn't write to file: {}", e); 532 | } 533 | } else { 534 | if let Err(e) = writeln!(file, "{}", line) { 535 | eprintln!("Couldn't write to file: {}", e); 536 | } 537 | } 538 | }); 539 | } 540 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------