├── LICENSE ├── README.md ├── dynamic_analysis.py ├── requirements.txt ├── static_analysis.py └── static_scan_rules.yml /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Ajin Abraham 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # package_scan 2 | Python package static and dynamic analysis to detect environment variable stealing 3 | 4 | More Info: https://ajinabraham.com/blog/detecting-zero-days-in-software-supply-chain-with-static-and-dynamic-analysis 5 | -------------------------------------------------------------------------------- /dynamic_analysis.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | from pathlib import Path 4 | 5 | 6 | EXEC = re.compile(r', \[.*\]') 7 | IP = re.compile(r'inet_addr\(\".+\"\)') 8 | PORT = re.compile(r'htons\([0-9]+\)') 9 | KNOWN_CONNECT = { 10 | '127.0.0.53:53', # DNS Lookup 11 | '151.101.0.223:443', # PyPI 12 | '151.101.128.223:443', # PyPI 13 | '151.101.64.223:443', # PyPI 14 | '151.101.188.223:443', # PyPI 15 | '151.101.192.223:443', # PyPI 16 | '151.101.0.223:443', # PyPI 17 | '151.101.189.63:443', # PyPI 18 | '151.101.197.63:443', # PyPI 19 | '192.30.255.113:9418', # Github 20 | '192.30.255.112:9418', # Github 21 | } 22 | ENV_LOCATIONS = { 23 | '/etc/environment', 24 | '/etc/profile', 25 | '/etc/bashrc', 26 | '~/.bash_profile', 27 | '~/.bashrc', 28 | '~/.profile', 29 | '~/.cshrc', 30 | '~/.zshrc', 31 | '~/.tcshrc', 32 | } 33 | BAD_COMMANDS = { 34 | '"set"', 35 | '"env"', 36 | } 37 | 38 | 39 | def check_path(pkg, syscall): 40 | path = syscall.split('"')[1] 41 | for loc in ENV_LOCATIONS: 42 | if Path(loc).expanduser().as_posix() == path: 43 | print(f'\033[93m{pkg} tried to access sensitive environment location [{path}] during installation.\033[0m') 44 | 45 | 46 | def check_cmd(pkg, syscall): 47 | args = EXEC.search(syscall) 48 | match_str = args.group() 49 | if any(cmd in match_str for cmd in BAD_COMMANDS): 50 | print(f'\033[91m{pkg} tried to access environment variables by executing {match_str} command during installation.\033[0m') 51 | 52 | 53 | def check_connect(pkg, syscall): 54 | ipo = IP.search(syscall) 55 | porto = PORT.search(syscall) 56 | ip_addr = ipo.group().replace('inet_addr(', '').replace('"', '').replace(')', '') 57 | port = porto.group().replace('htons(', '').replace(')', '') 58 | loc = f'{ip_addr}:{port}' 59 | if loc in KNOWN_CONNECT: 60 | return 61 | print(f'\033[94m{pkg} tried to connect to [{loc}] during installation.\033[0m') 62 | 63 | 64 | def lookup_env(pkg, syscalls): 65 | """Check syscalls for malicious activities.""" 66 | calls = syscalls.splitlines() 67 | for i in calls: 68 | if 'openat(' in i: 69 | check_path(pkg, i) 70 | elif 'execve(' in i: 71 | check_cmd(pkg, i) 72 | elif 'connect(' in i and 'sin_addr=' in i: 73 | check_connect(pkg, i) 74 | 75 | 76 | def collect_syscalls(pkg): 77 | print(f'Analyzing: {pkg}') 78 | """Collect sensitive system calls during installation.""" 79 | args = [ 80 | 'strace', '-s', '2000', '-fqqe', 81 | 'trace=openat,execve,connect','--seccomp-bpf', 82 | 'pip', 'install', '--no-cache'] + pkg.split() 83 | return subprocess.check_output(args, stderr=subprocess.STDOUT).decode('utf-8', 'ignore') 84 | 85 | 86 | def check_packages(): 87 | pkgs = Path('./requirements.txt').read_text().splitlines() 88 | for pkg in pkgs: 89 | # Handle comments in requirements file. 90 | if pkg.startswith('#'): 91 | continue 92 | if '# ' in pkg: 93 | pkg = pkg.split('# ')[0] 94 | syscalls = collect_syscalls(pkg) 95 | if '#egg=' in pkg: 96 | pkg = pkg.split('#egg=')[1] 97 | lookup_env(pkg, syscalls) 98 | 99 | 100 | if __name__ == "__main__": 101 | check_packages() 102 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rsa>=4.7 2 | biplist>=1.0.3 3 | bs4>=0.0.1 4 | colorlog>=4.7.2 5 | shelljob>=0.6 6 | -e git://github.com/ajinabraham/poc-rogue.git#egg=rogue 7 | -------------------------------------------------------------------------------- /static_analysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.system('rm -rf ./pkgs') 4 | print('Downloading Python packages in requirements.txt') 5 | os.system('pip download -r requirements.txt -d ./pkgs --no-binary :all: > /dev/null 2>&1') 6 | print('Collected the following files for static analysis\033[93m') 7 | os.system("find pkgs \( -name '*.tar.gz' -o -name '*.zip' \) -type f -printf '%f\n'") 8 | print('\033[0mExtracting source code') 9 | os.system("find pkgs -name '*.tar.gz' -execdir tar -xzvf '{}' \; > /dev/null 2>&1") 10 | os.system("find pkgs -name '*.zip' -execdir unzip -ou '{}' \; > /dev/null 2>&1") 11 | print('Static Analysis') 12 | os.system('pip install semgrep > /dev/null 2>&1') 13 | os.system('semgrep -f static_scan_rules.yml pkgs/') 14 | -------------------------------------------------------------------------------- /static_scan_rules.yml: -------------------------------------------------------------------------------- 1 | rules: 2 | - id: env-set 3 | patterns: 4 | - pattern-either: 5 | - pattern: | 6 | subprocess.check_output([..., "=~/env|set/", ...]) 7 | - pattern: | 8 | subprocess.run([..., "=~/env|set/", ...]) 9 | - pattern: | 10 | subprocess.Popen([..., "=~/env|set/", ...]) 11 | message: | 12 | Reading from env or set commands 13 | severity: ERROR 14 | languages: 15 | - python 16 | - id: python-os-environ 17 | patterns: 18 | - pattern-not-inside: os.environ.get(...) 19 | - pattern-not-inside: os.environ[...] 20 | - pattern-either: 21 | - pattern: | 22 | os.environ 23 | message: | 24 | Reading from python's os.environ() 25 | severity: ERROR 26 | languages: 27 | - python 28 | - id: python-proc-fs 29 | patterns: 30 | - pattern-either: 31 | - pattern: | 32 | pathlib.Path('/proc') / ... / 'environ' 33 | message: | 34 | Reading python /proc//environ 35 | severity: ERROR 36 | languages: 37 | - python 38 | - id: environ-files 39 | patterns: 40 | - pattern-inside: | 41 | $X = {..., "=~/\/etc\/environment|\/etc\/profile|\/etc\/bashrc|~\/.bash_profile|~\/.bashrc|~\/.profile|~\/.cshrc|~\/.zshrc|~\/.tcshrc/", ...} 42 | ... 43 | - pattern-either: 44 | - pattern: | 45 | Path(...) 46 | - pattern: | 47 | open(...) 48 | message: | 49 | Reading from sensitve files that contain environment variables 50 | severity: ERROR 51 | languages: 52 | - python 53 | - id: libc-environ 54 | patterns: 55 | - pattern-either: 56 | - pattern: | 57 | $LIB = ctypes.CDLL(...) 58 | ... 59 | $Y.in_dll($LIB, 'environ') 60 | message: | 61 | Reading from libc.environ 62 | severity: ERROR 63 | languages: 64 | - python 65 | --------------------------------------------------------------------------------