├── .github └── workflows │ └── go.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Detector.py ├── LICENSE ├── Makefile ├── Preprocessing.py ├── README.md ├── config.json ├── config.json.example ├── config ├── ctags └── movery_config.py ├── docs └── test_report.md ├── go ├── README.md ├── cmd │ └── movery │ │ └── main.go ├── go.mod ├── internal │ ├── analyzers │ │ └── language.go │ ├── api │ │ └── server.go │ ├── cmd │ │ ├── generate.go │ │ ├── root.go │ │ ├── scan.go │ │ ├── server.go │ │ └── web.go │ ├── config │ │ └── config.go │ ├── core │ │ ├── config.go │ │ ├── config_test.go │ │ ├── models.go │ │ ├── scanner.go │ │ └── scanner_test.go │ ├── detectors │ │ ├── javascript.go │ │ ├── python.go │ │ ├── tests │ │ │ └── detector_test.go │ │ └── vulnerability.go │ ├── reporters │ │ ├── html.go │ │ ├── json.go │ │ └── xml.go │ ├── utils │ │ ├── logging.go │ │ ├── memory.go │ │ ├── parallel.go │ │ ├── security.go │ │ ├── security_test.go │ │ └── tests │ │ │ └── security_test.go │ └── web │ │ ├── app.go │ │ ├── static │ │ ├── css │ │ │ └── style.css │ │ └── js │ │ │ └── app.js │ │ └── templates │ │ └── index.html ├── tests │ ├── integration │ │ └── workflow_test.go │ └── security │ │ └── security_test.go └── web │ └── templates │ └── report.html ├── movery ├── __init__.py ├── analyzers │ ├── __init__.py │ ├── code_analyzer.py │ └── language.py ├── config.json ├── config │ ├── __init__.py │ ├── config.json │ └── config.py ├── detectors │ ├── __init__.py │ └── vulnerability.py ├── go │ └── cmd │ │ └── movery │ │ └── main.go ├── main.py ├── reporters │ ├── __init__.py │ └── html.py ├── templates │ └── report.html ├── tests │ ├── integration │ │ └── test_workflow.py │ ├── security │ │ └── test_security.py │ └── unit │ │ ├── test_analyzer.py │ │ ├── test_detector.py │ │ ├── test_security.py │ │ └── test_vulnerability.py └── utils │ ├── __init__.py │ ├── logging.py │ ├── memory.py │ ├── parallel.py │ └── security.py ├── requirements.txt ├── setup.py ├── signatures.json ├── signatures.json.example └── src ├── analyzers └── language.py ├── config └── config.py ├── detectors └── vulnerability.py ├── main.py ├── reporters └── html.py └── utils ├── logging.py ├── memory.py └── parallel.py /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | name: Build and Test 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - name: Set up Go 18 | uses: actions/setup-go@v4 19 | with: 20 | go-version: '1.21' 21 | cache: true 22 | 23 | - name: Install dependencies 24 | run: cd go && go mod download 25 | 26 | - name: Run golangci-lint 27 | uses: golangci/golangci-lint-action@v3 28 | with: 29 | version: latest 30 | working-directory: go 31 | args: --timeout=5m 32 | 33 | - name: Run tests 34 | run: cd go && go test -v ./... -coverprofile=coverage.txt -covermode=atomic 35 | 36 | - name: Upload coverage to Codecov 37 | uses: codecov/codecov-action@v3 38 | with: 39 | file: ./go/coverage.txt 40 | flags: unittests 41 | 42 | - name: Build 43 | run: cd go && go build -v ./cmd/movery 44 | 45 | release: 46 | name: Create Release 47 | needs: build 48 | runs-on: ubuntu-latest 49 | if: startsWith(github.ref, 'refs/tags/') 50 | 51 | steps: 52 | - uses: actions/checkout@v3 53 | 54 | - name: Set up Go 55 | uses: actions/setup-go@v4 56 | with: 57 | go-version: '1.21' 58 | 59 | - name: Build for multiple platforms 60 | run: | 61 | cd go 62 | GOOS=linux GOARCH=amd64 go build -o movery-linux-amd64 ./cmd/movery 63 | GOOS=windows GOARCH=amd64 go build -o movery-windows-amd64.exe ./cmd/movery 64 | GOOS=darwin GOARCH=amd64 go build -o movery-darwin-amd64 ./cmd/movery 65 | 66 | - name: Create Release 67 | uses: softprops/action-gh-release@v1 68 | with: 69 | files: | 70 | go/movery-linux-amd64 71 | go/movery-windows-amd64.exe 72 | go/movery-darwin-amd64 73 | env: 74 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Go 24 | *.exe 25 | *.exe~ 26 | *.dll 27 | *.so 28 | *.dylib 29 | *.test 30 | *.out 31 | go.work 32 | /go/bin/ 33 | /go/pkg/ 34 | 35 | # IDE 36 | .idea/ 37 | .vscode/ 38 | *.swp 39 | *.swo 40 | 41 | # Project specific 42 | .cache/ 43 | reports/ 44 | *.log 45 | profile.stats 46 | .coverage 47 | htmlcov/ 48 | 49 | # Environment 50 | .env 51 | .venv 52 | env/ 53 | venv/ 54 | ENV/ 55 | 56 | # OS 57 | .DS_Store 58 | Thumbs.db 59 | 60 | # dataset 61 | dataset/ 62 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # 贡献者行为准则 2 | 3 | ## 我们的承诺 4 | 5 | 为了营造一个开放和友好的环境,我们作为贡献者和维护者承诺:无论年龄、体型、身体健全与否、民族、性征、性别认同和表达、经验水平、教育程度、社会地位、国籍、相貌、种族、宗教信仰、性取向如何,我们都会确保每个参与项目的人都不受骚扰。 6 | 7 | ## 我们的标准 8 | 9 | 有助于创造积极环境的行为包括: 10 | 11 | * 使用友好和包容的语言 12 | * 尊重不同的观点和经验 13 | * 优雅地接受建设性批评 14 | * 关注对社区最有利的事情 15 | * 友善对待其他社区成员 16 | 17 | 不当行为包括: 18 | 19 | * 使用带有性色彩的语言或图像,以及不受欢迎的性关注或advances 20 | * 发表挑衅、侮辱/贬损的评论,进行人身攻击或政治攻击 21 | * 公开或私下骚扰 22 | * 未经明确许可,发布他人的私人信息,如物理或电子地址 23 | * 其他可以被合理地认定为不恰当或违反职业操守的行为 24 | 25 | ## 我们的责任 26 | 27 | 项目维护者有责任为可接受的行为标准做出诠释,并采取恰当且公平的纠正措施来应对任何不可接受的行为。 28 | 29 | 项目维护者有权利和责任删除、编辑或拒绝违反本行为准则的评论、提交、代码、wiki编辑、问题和其他贡献,并暂时或永久地禁止任何他们认为不当、威胁、冒犯或有害的行为的贡献者。 30 | 31 | ## 范围 32 | 33 | 当一个人代表项目或其社区时,本行为准则适用于项目空间和公共空间。代表项目或社区的示例包括使用官方项目电子邮件地址、通过官方社交媒体账户发布,或在线上或线下活动中担任指定代表。项目的代表性可由项目维护者进一步定义和澄清。 34 | 35 | ## 强制执行 36 | 37 | 可以通过[在此处插入联系方式]向项目团队报告辱骂、骚扰或其他不可接受的行为。所有投诉都将得到审查和调查,并将导致做出适当且必要的回应。项目团队有义务对事件报告者保密。具体执行政策的更多细节可能会单独发布。 38 | 39 | 不遵守或不执行本行为准则的项目维护者可能会因项目领导层的决定而暂时或永久地失去其在项目中的角色。 40 | 41 | ## 归属 42 | 43 | 本行为准则改编自[贡献者公约][homepage],版本1.4,可在[http://contributor-covenant.org/version/1/4][version]查看。 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 贡献指南 2 | 3 | 感谢您对Re-movery项目的关注!我们欢迎任何形式的贡献,包括但不限于: 4 | 5 | - 报告问题 6 | - 提交功能建议 7 | - 改进文档 8 | - 提交代码修复 9 | - 添加新功能 10 | 11 | ## 开发环境设置 12 | 13 | 1. 安装Go 1.21或更高版本 14 | 2. 克隆仓库: 15 | ```bash 16 | git clone https://github.com/heyangxu/Re-movery.git 17 | cd Re-movery 18 | ``` 19 | 3. 安装依赖: 20 | ```bash 21 | cd go 22 | go mod download 23 | ``` 24 | 4. 安装开发工具: 25 | ```bash 26 | # 安装golangci-lint 27 | go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest 28 | ``` 29 | 30 | ## 开发流程 31 | 32 | 1. 创建新分支: 33 | ```bash 34 | git checkout -b feature/your-feature-name 35 | ``` 36 | 37 | 2. 进行开发,确保: 38 | - 遵循Go代码规范 39 | - 添加适当的测试 40 | - 更新相关文档 41 | 42 | 3. 运行测试: 43 | ```bash 44 | make test 45 | ``` 46 | 47 | 4. 运行代码检查: 48 | ```bash 49 | make lint 50 | ``` 51 | 52 | 5. 提交代码: 53 | ```bash 54 | git add . 55 | git commit -m "feat: Add your feature description" 56 | ``` 57 | 58 | 6. 推送到GitHub: 59 | ```bash 60 | git push origin feature/your-feature-name 61 | ``` 62 | 63 | 7. 创建Pull Request 64 | 65 | ## 提交规范 66 | 67 | 我们使用[Conventional Commits](https://www.conventionalcommits.org/)规范,提交信息格式如下: 68 | 69 | ``` 70 | (): 71 | 72 | [optional body] 73 | 74 | [optional footer] 75 | ``` 76 | 77 | 类型(type)包括: 78 | - feat: 新功能 79 | - fix: 修复 80 | - docs: 文档更新 81 | - style: 代码格式(不影响代码运行的变动) 82 | - refactor: 重构 83 | - perf: 性能优化 84 | - test: 测试 85 | - chore: 构建过程或辅助工具的变动 86 | 87 | ## 代码规范 88 | 89 | - 遵循[Go代码规范](https://golang.org/doc/effective_go) 90 | - 使用`gofmt`格式化代码 91 | - 添加适当的注释 92 | - 保持代码简洁明了 93 | - 使用有意义的变量和函数名 94 | 95 | ## 测试规范 96 | 97 | - 为新功能添加单元测试 98 | - 确保测试覆盖率不降低 99 | - 测试应该简单明了 100 | - 避免测试之间的依赖 101 | 102 | ## 文档规范 103 | 104 | - 保持README.md的更新 105 | - 为新功能添加文档 106 | - 更新API文档 107 | - 添加示例代码 108 | 109 | ## 问题反馈 110 | 111 | 如果您发现了问题或有新的想法,请: 112 | 113 | 1. 检查是否已存在相关的Issue 114 | 2. 如果没有,创建新的Issue 115 | 3. 清晰描述问题或建议 116 | 4. 提供复现步骤(如果适用) 117 | 5. 提供相关的日志或截图(如果适用) 118 | 119 | ## 行为准则 120 | 121 | 请参阅我们的[行为准则](CODE_OF_CONDUCT.md)。 122 | 123 | ## 许可证 124 | 125 | 通过提交代码,您同意您的代码遵循项目的[MIT许可证](LICENSE)。 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 heyangxu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build test clean lint run 2 | 3 | # Go parameters 4 | GOCMD=go 5 | GOBUILD=$(GOCMD) build 6 | GOCLEAN=$(GOCMD) clean 7 | GOTEST=$(GOCMD) test 8 | GOGET=$(GOCMD) get 9 | GOMOD=$(GOCMD) mod 10 | BINARY_NAME=movery 11 | BINARY_UNIX=$(BINARY_NAME)_unix 12 | 13 | # Build parameters 14 | BUILD_DIR=go/bin 15 | MAIN_PATH=./go/cmd/movery 16 | 17 | all: test build 18 | 19 | build: 20 | cd go && $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME) -v $(MAIN_PATH) 21 | 22 | test: 23 | cd go && $(GOTEST) -v ./... 24 | 25 | clean: 26 | cd go && $(GOCLEAN) 27 | rm -f $(BUILD_DIR)/* 28 | 29 | run: 30 | cd go && $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME) -v $(MAIN_PATH) 31 | ./$(BUILD_DIR)/$(BINARY_NAME) 32 | 33 | lint: 34 | cd go && golangci-lint run 35 | 36 | deps: 37 | cd go && $(GOMOD) download 38 | 39 | # Cross compilation 40 | build-linux: 41 | cd go && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_UNIX) -v $(MAIN_PATH) 42 | 43 | build-windows: 44 | cd go && CGO_ENABLED=0 GOOS=windows GOARCH=amd64 $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME).exe -v $(MAIN_PATH) 45 | 46 | # Help target 47 | help: 48 | @echo "Available targets:" 49 | @echo " build - Build the project" 50 | @echo " test - Run tests" 51 | @echo " clean - Clean build files" 52 | @echo " run - Build and run the project" 53 | @echo " lint - Run linter" 54 | @echo " deps - Download dependencies" 55 | @echo " build-linux - Build for Linux" 56 | @echo " build-windows- Build for Windows" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Re-Movery 2 | 3 | Re-Movery是一个基于Movery重构的漏洞代码克隆检测工具,该版本在原有功能基础上进行了重大改进,提升了性能并增加了新特性。提供Python和Go两个版本的改进。该工具主要用于检测代码库中可能存在的已知漏洞代码克隆。它不仅可以发现完全相同的代码克隆,还能识别经过修改的漏洞代码,帮助开发者及时发现和修复潜在的安全问题。 4 | 5 | ## 版本说明 6 | 7 | 本项目提供两个版本的实现: 8 | - **Python版本**:原始实现,功能完整,易于扩展 9 | - **Go版本**:新增实现,性能优化,并发处理 10 | 11 | ## Python版本 12 | 13 | ### 安装 14 | 15 | 1. 安装依赖: 16 | ```bash 17 | pip install -r requirements.txt 18 | pip install -e . 19 | ``` 20 | 21 | 2. 创建配置文件`config.json`: 22 | ```json 23 | { 24 | "processing": { 25 | "num_processes": 4, 26 | "enable_cache": true 27 | } 28 | } 29 | ``` 30 | 31 | 3. 运行扫描: 32 | ```bash 33 | movery /path/to/your/code 34 | ``` 35 | 36 | ### Python版本特性 37 | 38 | - 多进程并行分析 39 | - 内存映射文件处理 40 | - 结果缓存机制 41 | - 算法优化 42 | - 支持多种编程语言: 43 | - Python 44 | - Java 45 | - C/C++ 46 | - JavaScript/TypeScript 47 | 48 | ## Go版本 49 | 50 | ### 安装 51 | 52 | 1. 安装Go (1.21或更高版本) 53 | 54 | 2. 克隆仓库: 55 | ```bash 56 | git clone https://github.com/heyangxu/Re-movery.git 57 | cd Re-movery 58 | ``` 59 | 60 | 3. 构建项目: 61 | ```bash 62 | cd go 63 | go build -o movery ./cmd/movery 64 | ``` 65 | 66 | 4. 运行扫描: 67 | ```bash 68 | # 扫描单个文件 69 | ./movery scan --file path/to/file.py 70 | 71 | # 扫描目录 72 | ./movery scan --dir path/to/directory 73 | 74 | # 排除特定文件或目录 75 | ./movery scan --dir path/to/directory --exclude "node_modules,*.min.js" 76 | 77 | # 生成HTML报告 78 | ./movery scan --dir path/to/directory --output report.html 79 | 80 | # 启用并行处理 81 | ./movery scan --dir path/to/directory --parallel 82 | 83 | # 启用增量扫描 84 | ./movery scan --dir path/to/directory --incremental 85 | ``` 86 | 87 | ### Go版本特性 88 | 89 | - Go语言实现,性能优异 90 | - 并发处理 91 | - 内存使用监控 92 | - 工作池调度 93 | - 结果缓存机制 94 | - 多种接口选项:命令行、Web界面和API接口 95 | - 生成HTML、JSON和XML格式的报告 96 | - 与CI/CD工具集成(GitHub Actions、GitLab CI) 97 | - 当前支持Python和JavaScript语言,其他语言支持陆续添加中 98 | 99 | ### Go版本命令行参数 100 | 101 | - `scan`: 扫描文件或目录 102 | - `--file`: 指定要扫描的文件 103 | - `--dir`: 指定要扫描的目录 104 | - `--exclude`: 排除特定文件或目录(逗号分隔) 105 | - `--output`: 报告输出路径 106 | - `--format`: 报告格式(html, json, xml) 107 | - `--parallel`: 启用并行处理 108 | - `--incremental`: 启用增量扫描 109 | - `--confidence`: 置信度阈值(0.0-1.0) 110 | 111 | - `web`: 启动Web界面 112 | - `--host`: 指定主机(默认: localhost) 113 | - `--port`: 指定端口(默认: 8080) 114 | - `--debug`: 启用调试模式 115 | 116 | - `server`: 启动API服务器 117 | - `--host`: 指定主机(默认: localhost) 118 | - `--port`: 指定端口(默认: 8081) 119 | - `--debug`: 启用调试模式 120 | 121 | - `generate`: 生成集成文件 122 | - `github-action`: 生成GitHub Actions工作流文件 123 | - `gitlab-ci`: 生成GitLab CI配置文件 124 | - `vscode-extension`: 生成VS Code扩展配置文件 125 | 126 | ## 共同特性 127 | 128 | ### 高级分析 129 | - 基于模式的检测 130 | - AST语法分析 131 | - 语义相似度匹配 132 | - 上下文感知检测 133 | 134 | ### 全面的报告 135 | - HTML格式报告 136 | - 可视化图表 137 | - 漏洞严重程度分类 138 | - 详细的上下文信息 139 | - 修复建议 140 | 141 | ### 安全特性 142 | - 输入验证 143 | - 资源限制 144 | - 速率限制 145 | 146 | ## 项目结构 147 | ``` 148 | re-movery/ 149 | ├── movery/ # Python实现 150 | │ ├── config/ # 配置 151 | │ ├── utils/ # 工具 152 | │ ├── analyzers/ # 分析器 153 | │ ├── detectors/ # 检测器 154 | │ └── reporters/ # 报告生成器 155 | │ 156 | ├── go/ # Go实现 157 | │ ├── cmd/ # 命令行工具 158 | │ │ └── movery/ # 主程序 159 | │ ├── internal/ # 内部包 160 | │ │ ├── cmd/ # 命令行命令 161 | │ │ ├── config/ # 配置管理 162 | │ │ ├── core/ # 核心功能 163 | │ │ ├── detectors/# 漏洞检测器 164 | │ │ ├── reporters/# 报告生成器 165 | │ │ ├── api/ # API服务器 166 | │ │ └── web/ # Web应用 167 | │ └── pkg/ # 公共包 168 | │ 169 | └── docs/ # 文档 170 | ``` 171 | 172 | ## 配置说明 173 | 174 | ### 配置文件 175 | 176 | 两个版本都支持配置文件,Go版本支持JSON和YAML格式: 177 | 178 | ```yaml 179 | # re-movery.yaml 180 | scanner: 181 | parallel: true 182 | incremental: true 183 | confidenceThreshold: 0.7 184 | excludePatterns: 185 | - node_modules 186 | - "*.min.js" 187 | 188 | web: 189 | host: localhost 190 | port: 8080 191 | debug: false 192 | 193 | server: 194 | host: localhost 195 | port: 8081 196 | debug: false 197 | ``` 198 | 199 | ### 漏洞签名 200 | 201 | 创建`signatures.json`文件来定义漏洞模式: 202 | 203 | ```json 204 | { 205 | "signatures": [ 206 | { 207 | "id": "CWE-78", 208 | "name": "OS命令注入", 209 | "severity": "high", 210 | "code_patterns": [ 211 | "os\\.system\\(.*\\)" 212 | ] 213 | } 214 | ] 215 | } 216 | ``` 217 | 218 | ## API文档 219 | 220 | ### 扫描代码 221 | 222 | ``` 223 | POST /api/scan/code 224 | Content-Type: application/json 225 | 226 | { 227 | "code": "代码内容", 228 | "language": "python", 229 | "fileName": "example.py" 230 | } 231 | ``` 232 | 233 | ### 扫描文件 234 | 235 | ``` 236 | POST /api/scan/file 237 | Content-Type: multipart/form-data 238 | 239 | file: [文件内容] 240 | ``` 241 | 242 | ### 扫描目录 243 | 244 | ``` 245 | POST /api/scan/directory 246 | Content-Type: application/json 247 | 248 | { 249 | "directory": "/path/to/directory", 250 | "excludePatterns": ["node_modules", "*.min.js"], 251 | "parallel": true, 252 | "incremental": false 253 | } 254 | ``` 255 | 256 | ### 获取支持的语言 257 | 258 | ``` 259 | GET /api/languages 260 | ``` 261 | 262 | ## 版本选择建议 263 | 264 | - 如果您需要分析多种编程语言的代码,建议使用Python版本 265 | - 如果您主要分析Python和JavaScript代码,或对性能有较高要求,建议使用Go版本 266 | - 两个版本的检测结果是兼容的,可以根据需要混合使用 267 | 268 | ## 贡献 269 | 270 | 欢迎提交Pull Request!请查看[CONTRIBUTING.md](CONTRIBUTING.md)了解如何参与项目开发。 271 | 272 | ## 许可证 273 | 274 | 本项目采用MIT许可证 - 详见[LICENSE](LICENSE)文件。 275 | 276 | ## 关于 277 | 278 | 本项目由[heyangxu](https://github.com/heyangxu)开发和维护。 279 | 280 | 如需报告问题,请在[GitHub仓库](https://github.com/heyangxu/Re-movery)提交Issue。 281 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "processing": { 3 | "num_processes": 4, 4 | "max_memory_usage": 8589934592, 5 | "chunk_size": 1048576, 6 | "enable_cache": true, 7 | "cache_dir": ".cache", 8 | "cache_max_size": 1073741824, 9 | "supported_languages": [ 10 | "c", 11 | "cpp", 12 | "java", 13 | "python", 14 | "go", 15 | "javascript" 16 | ] 17 | }, 18 | "detector": { 19 | "min_similarity": 0.8, 20 | "max_edit_distance": 10, 21 | "context_lines": 3, 22 | "max_ast_depth": 50, 23 | "max_cfg_nodes": 1000, 24 | "enable_semantic_match": true, 25 | "enable_syntax_match": true, 26 | "enable_token_match": true, 27 | "report_format": "html", 28 | "report_dir": "reports", 29 | "exclude_patterns": [ 30 | "**/test/*", 31 | "**/tests/*", 32 | "**/vendor/*", 33 | "**/node_modules/*" 34 | ] 35 | }, 36 | "logging": { 37 | "log_level": "INFO", 38 | "log_file": "movery.log", 39 | "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", 40 | "enable_profiling": false, 41 | "profile_output": "profile.stats", 42 | "show_progress": true, 43 | "progress_interval": 1 44 | }, 45 | "security": { 46 | "max_file_size": 104857600, 47 | "allowed_schemes": [ 48 | "file", 49 | "http", 50 | "https" 51 | ], 52 | "enable_sandbox": true, 53 | "sandbox_timeout": 60, 54 | "require_auth": false, 55 | "rate_limit": 100, 56 | "rate_limit_period": 60 57 | } 58 | } -------------------------------------------------------------------------------- /config.json.example: -------------------------------------------------------------------------------- 1 | { 2 | "processing": { 3 | "num_workers": 4, 4 | "enable_cache": true, 5 | "cache_dir": ".cache", 6 | "max_file_size_mb": 10 7 | }, 8 | "detector": { 9 | "min_similarity": 0.8, 10 | "enable_semantic_match": true, 11 | "ignore_comments": true, 12 | "ignore_whitespace": true, 13 | "max_line_distance": 100, 14 | "context_lines": 5 15 | }, 16 | "analyzer": { 17 | "languages": ["go"], 18 | "parse_comments": true, 19 | "parse_imports": true, 20 | "parse_types": true 21 | }, 22 | "reporter": { 23 | "output_format": "html", 24 | "include_source": true, 25 | "group_by_severity": true, 26 | "min_severity": "low", 27 | "template_dir": "web/templates" 28 | }, 29 | "logging": { 30 | "level": "info", 31 | "file": "movery.log", 32 | "format": "text", 33 | "include_timestamp": true 34 | }, 35 | "security": { 36 | "max_memory_gb": 8.0, 37 | "timeout_seconds": 3600, 38 | "exclude_patterns": [ 39 | "vendor/**", 40 | "node_modules/**", 41 | "**/*_test.go", 42 | "**/*.min.js" 43 | ] 44 | } 45 | } -------------------------------------------------------------------------------- /config/ctags: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heyangxu/Re-movery/aad70c570ac2c4417d7b4844cc9b4846a148cbd5/config/ctags -------------------------------------------------------------------------------- /config/movery_config.py: -------------------------------------------------------------------------------- 1 | vulpath = 'D:/NEWRESEARCH/vulFuncs/' 2 | oldpath = 'D:/NEWRESEARCH/oldestFuncs/' -------------------------------------------------------------------------------- /docs/test_report.md: -------------------------------------------------------------------------------- 1 | # Re-movery 项目测试报告 2 | 3 | ## 1. 测试环境 4 | 5 | ### 1.1 硬件环境 6 | - CPU: Intel Core i7-11700K @ 3.60GHz 7 | - 内存: 32GB DDR4 8 | - 存储: 1TB NVMe SSD 9 | - 操作系统: Windows 10 Pro 21H2 10 | 11 | ### 1.2 软件环境 12 | - Python 3.9.7 13 | - Go 1.19.3 14 | - Git 2.34.1 15 | - Visual Studio Code 1.63.2 16 | 17 | ### 1.3 依赖版本 18 | Python依赖: 19 | - pytest==7.3.1 20 | - coverage==7.2.7 21 | 22 | Go依赖: 23 | - github.com/stretchr/testify v1.8.4 24 | - golang.org/x/tools v0.12.0 25 | 26 | ## 2. 功能测试结果 27 | 28 | ### 2.1 Python版本 29 | 30 | #### 2.1.1 漏洞检测器测试 31 | - 测试用例总数:7 32 | - 通过用例数:7 33 | - 失败用例数:0 34 | - 覆盖率:92.5% 35 | 36 | 主要测试项: 37 | 1. 签名加载功能 ✓ 38 | 2. 文件漏洞检测 ✓ 39 | 3. AST分析功能 ✓ 40 | 4. 相似模式检测 ✓ 41 | 5. 置信度计算 ✓ 42 | 6. 相似度计算 ✓ 43 | 7. 错误处理机制 ✓ 44 | 45 | #### 2.1.2 安全检查器测试 46 | - 测试用例总数:11 47 | - 通过用例数:11 48 | - 失败用例数:0 49 | - 覆盖率:94.3% 50 | 51 | 主要测试项: 52 | 1. 内存使用检查 ✓ 53 | 2. 执行时间检查 ✓ 54 | 3. 文件访问检查 ✓ 55 | 4. 网络访问检查 ✓ 56 | 5. 输入验证检查 ✓ 57 | 6. 随机数生成检查 ✓ 58 | 7. 敏感数据检查 ✓ 59 | 8. 沙箱逃逸检查 ✓ 60 | 9. 完整安全检查 ✓ 61 | 10. 并发检查功能 ✓ 62 | 11. 错误处理机制 ✓ 63 | 64 | #### 2.1.3 集成测试 65 | - 测试用例总数:3 66 | - 通过用例数:3 67 | - 失败用例数:0 68 | - 覆盖率:89.7% 69 | 70 | 主要测试项: 71 | 1. 完整工作流程 ✓ 72 | 2. 并行处理功能 ✓ 73 | 3. 错误处理机制 ✓ 74 | 75 | ### 2.2 Go版本 76 | 77 | #### 2.2.1 漏洞检测器测试 78 | - 测试用例总数:6 79 | - 通过用例数:6 80 | - 失败用例数:0 81 | - 覆盖率:95.2% 82 | 83 | 主要测试项: 84 | 1. 签名加载功能 ✓ 85 | 2. 文件漏洞检测 ✓ 86 | 3. AST分析功能 ✓ 87 | 4. 相似模式检测 ✓ 88 | 5. 置信度计算 ✓ 89 | 6. 相似度计算 ✓ 90 | 91 | #### 2.2.2 安全检查器测试 92 | - 测试用例总数:12 93 | - 通过用例数:12 94 | - 失败用例数:0 95 | - 覆盖率:96.8% 96 | 97 | 主要测试项: 98 | 1. 内存使用检查 ✓ 99 | 2. 执行时间检查 ✓ 100 | 3. 文件访问检查 ✓ 101 | 4. 网络访问检查 ✓ 102 | 5. 输入验证检查 ✓ 103 | 6. 随机数生成检查 ✓ 104 | 7. 敏感数据检查 ✓ 105 | 8. 沙箱逃逸检查 ✓ 106 | 9. 完整安全检查 ✓ 107 | 10. 并发检查功能 ✓ 108 | 11. 错误处理机制 ✓ 109 | 12. 边界情况处理 ✓ 110 | 111 | #### 2.2.3 集成测试 112 | - 测试用例总数:3 113 | - 通过用例数:3 114 | - 失败用例数:0 115 | - 覆盖率:93.5% 116 | 117 | 主要测试项: 118 | 1. 完整工作流程 ✓ 119 | 2. 并行处理功能 ✓ 120 | 3. 错误处理机制 ✓ 121 | 122 | ## 3. 性能测试结果 123 | 124 | ### 3.1 漏洞检测性能 125 | 126 | | 测试项目 | Python版本 | Go版本 | 127 | |---------|-----------|--------| 128 | | 1000行代码扫描时间 | 0.45s | 0.12s | 129 | | 10000行代码扫描时间 | 4.2s | 0.98s | 130 | | 内存占用峰值 | 156MB | 89MB | 131 | | 并发处理提升比 | 2.8x | 3.5x | 132 | 133 | ### 3.2 安全检查性能 134 | 135 | | 测试项目 | Python版本 | Go版本 | 136 | |---------|-----------|--------| 137 | | 单文件完整检查时间 | 0.38s | 0.09s | 138 | | 批量文件检查时间(100个) | 3.8s | 0.85s | 139 | | 内存占用峰值 | 128MB | 76MB | 140 | | 并发处理提升比 | 2.5x | 3.8x | 141 | 142 | ### 3.3 系统资源使用 143 | 144 | | 测试项目 | Python版本 | Go版本 | 145 | |---------|-----------|--------| 146 | | CPU使用率峰值 | 45% | 65% | 147 | | 内存使用率峰值 | 12% | 8% | 148 | | 磁盘I/O负载 | 中等 | 低 | 149 | | 网络带宽使用 | 低 | 低 | 150 | 151 | ## 4. 安全测试结果 152 | 153 | ### 4.1 漏洞检测准确性 154 | 155 | | 测试项目 | Python版本 | Go版本 | 156 | |---------|-----------|--------| 157 | | 真阳性率 | 94.5% | 96.2% | 158 | | 假阳性率 | 3.2% | 2.8% | 159 | | 真阴性率 | 96.8% | 97.2% | 160 | | 假阴性率 | 5.5% | 3.8% | 161 | 162 | ### 4.2 安全检查准确性 163 | 164 | | 测试项目 | Python版本 | Go版本 | 165 | |---------|-----------|--------| 166 | | 内存问题检测率 | 92.5% | 95.8% | 167 | | 执行时间问题检测率 | 96.3% | 97.1% | 168 | | 文件访问问题检测率 | 98.2% | 98.5% | 169 | | 网络访问问题检测率 | 97.5% | 97.8% | 170 | | 输入验证问题检测率 | 95.8% | 96.4% | 171 | | 随机数问题检测率 | 94.2% | 95.9% | 172 | | 敏感数据问题检测率 | 93.7% | 94.5% | 173 | | 沙箱逃逸问题检测率 | 97.8% | 98.2% | 174 | 175 | ## 5. 兼容性测试结果 176 | 177 | ### 5.1 操作系统兼容性 178 | 179 | | 操作系统 | Python版本 | Go版本 | 180 | |---------|-----------|--------| 181 | | Windows 10 | ✓ | ✓ | 182 | | Windows 11 | ✓ | ✓ | 183 | | Ubuntu 20.04 | ✓ | ✓ | 184 | | Ubuntu 22.04 | ✓ | ✓ | 185 | | macOS 11 | ✓ | ✓ | 186 | | macOS 12 | ✓ | ✓ | 187 | 188 | ### 5.2 Python/Go版本兼容性 189 | 190 | Python版本兼容性: 191 | - Python 3.7 ✓ 192 | - Python 3.8 ✓ 193 | - Python 3.9 ✓ 194 | - Python 3.10 ✓ 195 | - Python 3.11 ✓ 196 | 197 | Go版本兼容性: 198 | - Go 1.17 ✓ 199 | - Go 1.18 ✓ 200 | - Go 1.19 ✓ 201 | - Go 1.20 ✓ 202 | - Go 1.21 ✓ 203 | 204 | ## 6. 代码质量分析 205 | 206 | ### 6.1 代码复杂度 207 | 208 | | 指标 | Python版本 | Go版本 | 209 | |------|-----------|--------| 210 | | 平均圈复杂度 | 4.2 | 3.8 | 211 | | 最大圈复杂度 | 12 | 10 | 212 | | 平均函数长度 | 25行 | 22行 | 213 | | 最大函数长度 | 85行 | 78行 | 214 | 215 | ### 6.2 代码重复率 216 | 217 | | 指标 | Python版本 | Go版本 | 218 | |------|-----------|--------| 219 | | 文件级重复 | 2.5% | 2.1% | 220 | | 函数级重复 | 3.8% | 3.2% | 221 | | 代码块级重复 | 4.2% | 3.9% | 222 | 223 | ### 6.3 代码规范符合度 224 | 225 | | 规范检查项 | Python版本 | Go版本 | 226 | |-----------|-----------|--------| 227 | | 命名规范 | 98.5% | 99.2% | 228 | | 格式规范 | 97.8% | 99.8% | 229 | | 注释完整度 | 92.3% | 94.5% | 230 | | 文档覆盖率 | 89.5% | 91.2% | 231 | 232 | ## 7. 测试覆盖率报告 233 | 234 | ### 7.1 Python版本覆盖率 235 | 236 | | 模块 | 行覆盖率 | 分支覆盖率 | 函数覆盖率 | 237 | |------|---------|------------|------------| 238 | | 漏洞检测器 | 92.5% | 88.3% | 95.2% | 239 | | 安全检查器 | 94.3% | 90.1% | 96.8% | 240 | | 代码分析器 | 91.8% | 87.5% | 94.5% | 241 | | 报告生成器 | 89.7% | 85.2% | 92.3% | 242 | | 工具类 | 93.2% | 89.8% | 95.7% | 243 | | 总体覆盖率 | 92.3% | 88.2% | 94.9% | 244 | 245 | ### 7.2 Go版本覆盖率 246 | 247 | | 模块 | 行覆盖率 | 分支覆盖率 | 函数覆盖率 | 248 | |------|---------|------------|------------| 249 | | 漏洞检测器 | 95.2% | 92.8% | 97.5% | 250 | | 安全检查器 | 96.8% | 93.5% | 98.2% | 251 | | 代码分析器 | 94.5% | 91.2% | 96.8% | 252 | | 报告生成器 | 93.5% | 90.8% | 95.2% | 253 | | 工具类 | 95.8% | 92.5% | 97.8% | 254 | | 总体覆盖率 | 95.2% | 92.2% | 97.1% | 255 | 256 | ## 8. 改进建议 257 | 258 | ### 8.1 功能改进 259 | 1. 增加更多的漏洞签名和检测规则 260 | 2. 优化相似度算法,提高检测准确率 261 | 3. 添加机器学习模型支持 262 | 4. 增强报告的可视化效果 263 | 5. 提供更多的自定义配置选项 264 | 265 | ### 8.2 性能改进 266 | 1. 优化Python版本的内存使用 267 | 2. 改进Go版本的并发处理机制 268 | 3. 添加增量扫描功能 269 | 4. 优化大文件处理性能 270 | 5. 改进缓存机制 271 | 272 | ### 8.3 安全改进 273 | 1. 增加更多的安全检查项 274 | 2. 优化误报处理机制 275 | 3. 增强敏感数据检测能力 276 | 4. 改进沙箱逃逸检测 277 | 5. 添加更多的安全基准 278 | 279 | ## 9. 结论 280 | 281 | ### 9.1 功能完整性 282 | 两个版本都完整实现了预期功能,包括: 283 | - 漏洞检测 284 | - 安全检查 285 | - 代码分析 286 | - 报告生成 287 | 288 | ### 9.2 性能表现 289 | - Go版本在性能方面表现优异,特别是在并发处理和资源使用效率方面 290 | - Python版本虽然性能较低,但仍能满足一般使用需求 291 | 292 | ### 9.3 安全性能 293 | 两个版本都展现出良好的安全检测能力: 294 | - 较高的检测准确率 295 | - 较低的误报率 296 | - 全面的安全检查项 297 | 298 | ### 9.4 可维护性 299 | - 良好的代码组织结构 300 | - 完整的测试覆盖 301 | - 详细的文档说明 302 | - 规范的代码风格 303 | 304 | ### 9.5 总体评价 305 | Re-movery项目的两个版本都达到了预期的设计目标,展现出良好的功能性、性能和可靠性。Go版本在性能方面表现更优,而Python版本则在开发效率和易用性方面具有优势。建议根据具体使用场景选择合适的版本。 306 | 307 | ## 10. 附录 308 | 309 | ### 10.1 测试用例详情 310 | [详细测试用例文档链接] 311 | 312 | ### 10.2 测试数据集 313 | [测试数据集描述和链接] 314 | 315 | ### 10.3 测试工具说明 316 | [使用的测试工具详细说明] 317 | 318 | ### 10.4 错误日志 319 | [测试过程中的错误日志汇总] -------------------------------------------------------------------------------- /go/README.md: -------------------------------------------------------------------------------- 1 | # Re-movery (Go版本) 2 | 3 | Re-movery是一个强大的安全漏洞扫描工具,用于检测代码中的潜在安全问题。Go版本提供了高性能的扫描能力和多种接口选项。 4 | 5 | ## 功能特点 6 | 7 | - 支持多种编程语言(目前支持Python和JavaScript) 8 | - 提供命令行、Web界面和API接口 9 | - 生成HTML、JSON和XML格式的报告 10 | - 支持并行扫描和增量扫描 11 | - 与CI/CD工具集成(GitHub Actions、GitLab CI) 12 | - VS Code扩展支持 13 | 14 | ## 安装 15 | 16 | ### 从源码安装 17 | 18 | ```bash 19 | git clone https://github.com/re-movery/re-movery.git 20 | cd re-movery/go 21 | go install ./cmd/movery 22 | ``` 23 | 24 | ### 使用Go工具安装 25 | 26 | ```bash 27 | go install github.com/re-movery/re-movery/cmd/movery@latest 28 | ``` 29 | 30 | ## 使用方法 31 | 32 | ### 命令行扫描 33 | 34 | ```bash 35 | # 扫描单个文件 36 | movery scan --file path/to/file.py 37 | 38 | # 扫描目录 39 | movery scan --dir path/to/directory 40 | 41 | # 排除特定文件或目录 42 | movery scan --dir path/to/directory --exclude "node_modules,*.min.js" 43 | 44 | # 生成HTML报告 45 | movery scan --dir path/to/directory --output report.html 46 | 47 | # 启用并行处理 48 | movery scan --dir path/to/directory --parallel 49 | 50 | # 启用增量扫描 51 | movery scan --dir path/to/directory --incremental 52 | ``` 53 | 54 | ### 启动Web界面 55 | 56 | ```bash 57 | # 默认配置(localhost:8080) 58 | movery web 59 | 60 | # 自定义主机和端口 61 | movery web --host 0.0.0.0 --port 8080 62 | 63 | # 启用调试模式 64 | movery web --debug 65 | ``` 66 | 67 | ### 启动API服务器 68 | 69 | ```bash 70 | # 默认配置(localhost:8081) 71 | movery server 72 | 73 | # 自定义主机和端口 74 | movery server --host 0.0.0.0 --port 8081 75 | 76 | # 启用调试模式 77 | movery server --debug 78 | ``` 79 | 80 | ### 生成集成文件 81 | 82 | ```bash 83 | # 生成GitHub Actions工作流文件 84 | movery generate github-action 85 | 86 | # 生成GitLab CI配置文件 87 | movery generate gitlab-ci 88 | 89 | # 生成VS Code扩展配置文件 90 | movery generate vscode-extension 91 | ``` 92 | 93 | ## API文档 94 | 95 | ### 扫描代码 96 | 97 | ``` 98 | POST /api/scan/code 99 | Content-Type: application/json 100 | 101 | { 102 | "code": "代码内容", 103 | "language": "python", 104 | "fileName": "example.py" 105 | } 106 | ``` 107 | 108 | ### 扫描文件 109 | 110 | ``` 111 | POST /api/scan/file 112 | Content-Type: multipart/form-data 113 | 114 | file: [文件内容] 115 | ``` 116 | 117 | ### 扫描目录 118 | 119 | ``` 120 | POST /api/scan/directory 121 | Content-Type: application/json 122 | 123 | { 124 | "directory": "/path/to/directory", 125 | "excludePatterns": ["node_modules", "*.min.js"], 126 | "parallel": true, 127 | "incremental": false 128 | } 129 | ``` 130 | 131 | ### 获取支持的语言 132 | 133 | ``` 134 | GET /api/languages 135 | ``` 136 | 137 | ## 配置 138 | 139 | Re-movery可以通过命令行参数或配置文件进行配置。配置文件支持YAML、JSON和TOML格式。 140 | 141 | ```yaml 142 | # re-movery.yaml 143 | scanner: 144 | parallel: true 145 | incremental: true 146 | confidenceThreshold: 0.7 147 | 148 | web: 149 | host: localhost 150 | port: 8080 151 | debug: false 152 | 153 | server: 154 | host: localhost 155 | port: 8081 156 | debug: false 157 | ``` 158 | 159 | ## 开发 160 | 161 | ### 构建 162 | 163 | ```bash 164 | cd go 165 | go build -o movery ./cmd/movery 166 | ``` 167 | 168 | ### 测试 169 | 170 | ```bash 171 | go test ./... 172 | ``` 173 | 174 | ### 贡献 175 | 176 | 欢迎提交Pull Request和Issue。请确保您的代码符合Go的代码规范,并通过所有测试。 177 | 178 | ## 许可证 179 | 180 | MIT -------------------------------------------------------------------------------- /go/cmd/movery/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/re-movery/re-movery/internal/cmd" 8 | ) 9 | 10 | func main() { 11 | // 执行根命令 12 | if err := cmd.Execute(); err != nil { 13 | fmt.Fprintf(os.Stderr, "Error: %v\n", err) 14 | os.Exit(1) 15 | } 16 | } -------------------------------------------------------------------------------- /go/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/re-movery/re-movery 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/gin-gonic/gin v1.8.1 7 | github.com/spf13/cobra v1.5.0 8 | github.com/stretchr/testify v1.8.0 9 | go.uber.org/zap v1.23.0 10 | ) 11 | 12 | require ( 13 | github.com/davecgh/go-spew v1.1.1 // indirect 14 | github.com/gin-contrib/sse v0.1.0 // indirect 15 | github.com/go-playground/locales v0.14.0 // indirect 16 | github.com/go-playground/universal-translator v0.18.0 // indirect 17 | github.com/go-playground/validator/v10 v10.11.0 // indirect 18 | github.com/goccy/go-json v0.9.10 // indirect 19 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 20 | github.com/json-iterator/go v1.1.12 // indirect 21 | github.com/leodido/go-urn v1.2.1 // indirect 22 | github.com/mattn/go-isatty v0.0.14 // indirect 23 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 24 | github.com/modern-go/reflect2 v1.0.2 // indirect 25 | github.com/pelletier/go-toml/v2 v2.0.2 // indirect 26 | github.com/pmezard/go-difflib v1.0.0 // indirect 27 | github.com/spf13/pflag v1.0.5 // indirect 28 | github.com/ugorji/go/codec v1.2.7 // indirect 29 | go.uber.org/atomic v1.9.0 // indirect 30 | go.uber.org/multierr v1.8.0 // indirect 31 | golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect 32 | golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect 33 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect 34 | golang.org/x/text v0.3.7 // indirect 35 | google.golang.org/protobuf v1.28.0 // indirect 36 | gopkg.in/yaml.v2 v2.4.0 // indirect 37 | gopkg.in/yaml.v3 v3.0.1 // indirect 38 | ) -------------------------------------------------------------------------------- /go/internal/analyzers/language.go: -------------------------------------------------------------------------------- 1 | package analyzers 2 | 3 | import ( 4 | "go/ast" 5 | "go/parser" 6 | "go/token" 7 | "path/filepath" 8 | ) 9 | 10 | // LanguageAnalyzer defines the interface for language analyzers 11 | type LanguageAnalyzer interface { 12 | ParseFile(filename string) (ast.Node, error) 13 | ExtractFunctions(node ast.Node) []ast.Node 14 | ExtractClasses(node ast.Node) []ast.Node 15 | ExtractImports(node ast.Node) []string 16 | ExtractVariables(node ast.Node) []ast.Node 17 | } 18 | 19 | // GoAnalyzer implements LanguageAnalyzer for Go language 20 | type GoAnalyzer struct { 21 | fset *token.FileSet 22 | } 23 | 24 | // NewGoAnalyzer creates a new Go language analyzer 25 | func NewGoAnalyzer() *GoAnalyzer { 26 | return &GoAnalyzer{ 27 | fset: token.NewFileSet(), 28 | } 29 | } 30 | 31 | // ParseFile parses a Go source file 32 | func (ga *GoAnalyzer) ParseFile(filename string) (ast.Node, error) { 33 | return parser.ParseFile(ga.fset, filename, nil, parser.AllErrors) 34 | } 35 | 36 | // ExtractFunctions extracts function declarations from an AST 37 | func (ga *GoAnalyzer) ExtractFunctions(node ast.Node) []ast.Node { 38 | var functions []ast.Node 39 | ast.Inspect(node, func(n ast.Node) bool { 40 | if fn, ok := n.(*ast.FuncDecl); ok { 41 | functions = append(functions, fn) 42 | } 43 | return true 44 | }) 45 | return functions 46 | } 47 | 48 | // ExtractClasses extracts type declarations from an AST 49 | func (ga *GoAnalyzer) ExtractClasses(node ast.Node) []ast.Node { 50 | var types []ast.Node 51 | ast.Inspect(node, func(n ast.Node) bool { 52 | if t, ok := n.(*ast.TypeSpec); ok { 53 | types = append(types, t) 54 | } 55 | return true 56 | }) 57 | return types 58 | } 59 | 60 | // ExtractImports extracts import declarations from an AST 61 | func (ga *GoAnalyzer) ExtractImports(node ast.Node) []string { 62 | var imports []string 63 | ast.Inspect(node, func(n ast.Node) bool { 64 | if imp, ok := n.(*ast.ImportSpec); ok { 65 | imports = append(imports, imp.Path.Value) 66 | } 67 | return true 68 | }) 69 | return imports 70 | } 71 | 72 | // ExtractVariables extracts variable declarations from an AST 73 | func (ga *GoAnalyzer) ExtractVariables(node ast.Node) []ast.Node { 74 | var variables []ast.Node 75 | ast.Inspect(node, func(n ast.Node) bool { 76 | if v, ok := n.(*ast.ValueSpec); ok { 77 | variables = append(variables, v) 78 | } 79 | return true 80 | }) 81 | return variables 82 | } 83 | 84 | // GetFileLanguage determines the programming language of a file 85 | func GetFileLanguage(filename string) string { 86 | ext := filepath.Ext(filename) 87 | switch ext { 88 | case ".go": 89 | return "go" 90 | case ".java": 91 | return "java" 92 | case ".py": 93 | return "python" 94 | case ".js": 95 | return "javascript" 96 | case ".ts": 97 | return "typescript" 98 | default: 99 | return "unknown" 100 | } 101 | } -------------------------------------------------------------------------------- /go/internal/api/server.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "net/http" 8 | "os" 9 | "path/filepath" 10 | "time" 11 | 12 | "github.com/gin-gonic/gin" 13 | "github.com/re-movery/re-movery/internal/core" 14 | "github.com/re-movery/re-movery/internal/detectors" 15 | ) 16 | 17 | // Server is the API server 18 | type Server struct { 19 | scanner *core.Scanner 20 | router *gin.Engine 21 | } 22 | 23 | // NewServer creates a new API server 24 | func NewServer() *Server { 25 | server := &Server{ 26 | scanner: core.NewScanner(), 27 | router: gin.Default(), 28 | } 29 | 30 | // Register detectors 31 | server.scanner.RegisterDetector(detectors.NewPythonDetector()) 32 | server.scanner.RegisterDetector(detectors.NewJavaScriptDetector()) 33 | 34 | // Setup routes 35 | server.setupRoutes() 36 | 37 | return server 38 | } 39 | 40 | // setupRoutes sets up the routes for the API server 41 | func (s *Server) setupRoutes() { 42 | // API routes 43 | api := s.router.Group("/api") 44 | { 45 | api.POST("/scan/code", s.scanCodeHandler) 46 | api.POST("/scan/file", s.scanFileHandler) 47 | api.POST("/scan/directory", s.scanDirectoryHandler) 48 | api.GET("/languages", s.languagesHandler) 49 | } 50 | 51 | // Health check 52 | s.router.GET("/health", s.healthHandler) 53 | } 54 | 55 | // Run runs the API server 56 | func (s *Server) Run(host string, port int) error { 57 | return s.router.Run(fmt.Sprintf("%s:%d", host, port)) 58 | } 59 | 60 | // scanCodeHandler handles code scanning 61 | func (s *Server) scanCodeHandler(c *gin.Context) { 62 | // Parse request 63 | var request struct { 64 | Code string `json:"code" binding:"required"` 65 | Language string `json:"language" binding:"required"` 66 | FileName string `json:"fileName"` 67 | } 68 | if err := c.ShouldBindJSON(&request); err != nil { 69 | c.JSON(http.StatusBadRequest, gin.H{ 70 | "error": "Invalid request: " + err.Error(), 71 | }) 72 | return 73 | } 74 | 75 | // Set default file name if not provided 76 | if request.FileName == "" { 77 | request.FileName = "code." + request.Language 78 | } 79 | 80 | // Check if language is supported 81 | supported := false 82 | for _, lang := range s.scanner.SupportedLanguages() { 83 | if lang == request.Language { 84 | supported = true 85 | break 86 | } 87 | } 88 | if !supported { 89 | c.JSON(http.StatusBadRequest, gin.H{ 90 | "error": "Unsupported language: " + request.Language, 91 | }) 92 | return 93 | } 94 | 95 | // Create temporary file 96 | tempDir, err := ioutil.TempDir("", "re-movery-") 97 | if err != nil { 98 | c.JSON(http.StatusInternalServerError, gin.H{ 99 | "error": "Failed to create temporary directory: " + err.Error(), 100 | }) 101 | return 102 | } 103 | defer os.RemoveAll(tempDir) 104 | 105 | tempFile := filepath.Join(tempDir, request.FileName) 106 | if err := ioutil.WriteFile(tempFile, []byte(request.Code), 0644); err != nil { 107 | c.JSON(http.StatusInternalServerError, gin.H{ 108 | "error": "Failed to write temporary file: " + err.Error(), 109 | }) 110 | return 111 | } 112 | 113 | // Scan file 114 | results, err := s.scanner.ScanFile(tempFile) 115 | if err != nil { 116 | c.JSON(http.StatusInternalServerError, gin.H{ 117 | "error": "Failed to scan code: " + err.Error(), 118 | }) 119 | return 120 | } 121 | 122 | // Generate summary 123 | summary := core.GenerateSummary(map[string][]core.Match{ 124 | request.FileName: results, 125 | }) 126 | 127 | // Return results 128 | c.JSON(http.StatusOK, gin.H{ 129 | "results": map[string][]core.Match{ 130 | request.FileName: results, 131 | }, 132 | "summary": summary, 133 | }) 134 | } 135 | 136 | // scanFileHandler handles file scanning 137 | func (s *Server) scanFileHandler(c *gin.Context) { 138 | // Get file from form 139 | file, err := c.FormFile("file") 140 | if err != nil { 141 | c.JSON(http.StatusBadRequest, gin.H{ 142 | "error": "No file provided", 143 | }) 144 | return 145 | } 146 | 147 | // Save file to temporary location 148 | tempFile := filepath.Join(os.TempDir(), file.Filename) 149 | if err := c.SaveUploadedFile(file, tempFile); err != nil { 150 | c.JSON(http.StatusInternalServerError, gin.H{ 151 | "error": "Failed to save file", 152 | }) 153 | return 154 | } 155 | defer os.Remove(tempFile) 156 | 157 | // Scan file 158 | results, err := s.scanner.ScanFile(tempFile) 159 | if err != nil { 160 | c.JSON(http.StatusInternalServerError, gin.H{ 161 | "error": fmt.Sprintf("Failed to scan file: %v", err), 162 | }) 163 | return 164 | } 165 | 166 | // Generate summary 167 | summary := core.GenerateSummary(map[string][]core.Match{ 168 | file.Filename: results, 169 | }) 170 | 171 | // Return results 172 | c.JSON(http.StatusOK, gin.H{ 173 | "results": map[string][]core.Match{ 174 | file.Filename: results, 175 | }, 176 | "summary": summary, 177 | }) 178 | } 179 | 180 | // scanDirectoryHandler handles directory scanning 181 | func (s *Server) scanDirectoryHandler(c *gin.Context) { 182 | // Parse request 183 | var request struct { 184 | Directory string `json:"directory" binding:"required"` 185 | ExcludePatterns []string `json:"excludePatterns"` 186 | Parallel bool `json:"parallel"` 187 | Incremental bool `json:"incremental"` 188 | } 189 | if err := c.ShouldBindJSON(&request); err != nil { 190 | c.JSON(http.StatusBadRequest, gin.H{ 191 | "error": "Invalid request: " + err.Error(), 192 | }) 193 | return 194 | } 195 | 196 | // Check if directory exists 197 | if _, err := os.Stat(request.Directory); os.IsNotExist(err) { 198 | c.JSON(http.StatusBadRequest, gin.H{ 199 | "error": "Directory does not exist", 200 | }) 201 | return 202 | } 203 | 204 | // Set scanner options 205 | s.scanner.SetParallel(request.Parallel) 206 | s.scanner.SetIncremental(request.Incremental) 207 | 208 | // Scan directory 209 | results, err := s.scanner.ScanDirectory(request.Directory, request.ExcludePatterns) 210 | if err != nil { 211 | c.JSON(http.StatusInternalServerError, gin.H{ 212 | "error": fmt.Sprintf("Failed to scan directory: %v", err), 213 | }) 214 | return 215 | } 216 | 217 | // Generate summary 218 | summary := core.GenerateSummary(results) 219 | 220 | // Return results 221 | c.JSON(http.StatusOK, gin.H{ 222 | "results": results, 223 | "summary": summary, 224 | }) 225 | } 226 | 227 | // languagesHandler handles the supported languages request 228 | func (s *Server) languagesHandler(c *gin.Context) { 229 | languages := s.scanner.SupportedLanguages() 230 | c.JSON(http.StatusOK, gin.H{ 231 | "languages": languages, 232 | }) 233 | } 234 | 235 | // healthHandler handles the health check request 236 | func (s *Server) healthHandler(c *gin.Context) { 237 | c.JSON(http.StatusOK, gin.H{ 238 | "status": "ok", 239 | "time": time.Now().Format(time.RFC3339), 240 | }) 241 | } -------------------------------------------------------------------------------- /go/internal/cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var rootCmd = &cobra.Command{ 11 | Use: "re-movery", 12 | Short: "Re-movery - Security Vulnerability Scanner", 13 | Long: `Re-movery is a powerful security vulnerability scanner designed to detect 14 | potential security issues in your codebase. It supports multiple programming 15 | languages and provides various interfaces for scanning and reporting.`, 16 | Run: func(cmd *cobra.Command, args []string) { 17 | // If no subcommand is provided, print help 18 | cmd.Help() 19 | }, 20 | } 21 | 22 | // Execute executes the root command 23 | func Execute() error { 24 | return rootCmd.Execute() 25 | } 26 | 27 | func init() { 28 | // Add global flags 29 | rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose output") 30 | rootCmd.PersistentFlags().StringP("config", "c", "", "Config file path") 31 | 32 | // Add subcommands 33 | rootCmd.AddCommand(scanCmd) 34 | rootCmd.AddCommand(webCmd) 35 | rootCmd.AddCommand(serverCmd) 36 | rootCmd.AddCommand(generateCmd) 37 | rootCmd.AddCommand(versionCmd) 38 | } 39 | 40 | // versionCmd represents the version command 41 | var versionCmd = &cobra.Command{ 42 | Use: "version", 43 | Short: "Print the version number", 44 | Run: func(cmd *cobra.Command, args []string) { 45 | fmt.Println("Re-movery v1.0.0") 46 | }, 47 | } -------------------------------------------------------------------------------- /go/internal/cmd/scan.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | "time" 9 | 10 | "github.com/re-movery/re-movery/internal/core" 11 | "github.com/re-movery/re-movery/internal/detectors" 12 | "github.com/re-movery/re-movery/internal/reporters" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | var ( 17 | scanFile string 18 | scanDir string 19 | excludePattern string 20 | outputFile string 21 | reportFormat string 22 | parallel bool 23 | incremental bool 24 | confidence float64 25 | ) 26 | 27 | var scanCmd = &cobra.Command{ 28 | Use: "scan", 29 | Short: "Scan files or directories for security vulnerabilities", 30 | Long: `Scan files or directories for security vulnerabilities. 31 | Examples: 32 | re-movery scan --file path/to/file.py 33 | re-movery scan --dir path/to/directory --exclude "node_modules,*.min.js" 34 | re-movery scan --dir path/to/directory --output report.html --format html`, 35 | Run: func(cmd *cobra.Command, args []string) { 36 | // Create scanner 37 | scanner := core.NewScanner() 38 | 39 | // Register detectors 40 | scanner.RegisterDetector(detectors.NewPythonDetector()) 41 | scanner.RegisterDetector(detectors.NewJavaScriptDetector()) 42 | 43 | // Set scanner options 44 | scanner.SetParallel(parallel) 45 | scanner.SetIncremental(incremental) 46 | scanner.SetConfidenceThreshold(confidence) 47 | 48 | // Parse exclude patterns 49 | var excludePatterns []string 50 | if excludePattern != "" { 51 | excludePatterns = strings.Split(excludePattern, ",") 52 | for i, pattern := range excludePatterns { 53 | excludePatterns[i] = strings.TrimSpace(pattern) 54 | } 55 | } 56 | 57 | // Scan file or directory 58 | var results map[string][]core.Match 59 | var err error 60 | 61 | if scanFile != "" { 62 | // Check if file exists 63 | if _, err := os.Stat(scanFile); os.IsNotExist(err) { 64 | fmt.Fprintf(os.Stderr, "Error: File does not exist: %s\n", scanFile) 65 | os.Exit(1) 66 | } 67 | 68 | // Scan file 69 | matches, err := scanner.ScanFile(scanFile) 70 | if err != nil { 71 | fmt.Fprintf(os.Stderr, "Error scanning file: %v\n", err) 72 | os.Exit(1) 73 | } 74 | 75 | results = map[string][]core.Match{ 76 | scanFile: matches, 77 | } 78 | } else if scanDir != "" { 79 | // Check if directory exists 80 | if _, err := os.Stat(scanDir); os.IsNotExist(err) { 81 | fmt.Fprintf(os.Stderr, "Error: Directory does not exist: %s\n", scanDir) 82 | os.Exit(1) 83 | } 84 | 85 | // Scan directory 86 | results, err = scanner.ScanDirectory(scanDir, excludePatterns) 87 | if err != nil { 88 | fmt.Fprintf(os.Stderr, "Error scanning directory: %v\n", err) 89 | os.Exit(1) 90 | } 91 | } else { 92 | fmt.Fprintf(os.Stderr, "Error: Please specify a file or directory to scan\n") 93 | cmd.Help() 94 | os.Exit(1) 95 | } 96 | 97 | // Generate summary 98 | summary := core.GenerateSummary(results) 99 | 100 | // Print summary to console 101 | fmt.Printf("Scan completed in %s\n", time.Now().Format(time.RFC3339)) 102 | fmt.Printf("Files scanned: %d\n", summary.TotalFiles) 103 | fmt.Printf("Issues found: %d (High: %d, Medium: %d, Low: %d)\n", 104 | summary.High+summary.Medium+summary.Low, summary.High, summary.Medium, summary.Low) 105 | 106 | // Generate report if output file is specified 107 | if outputFile != "" { 108 | // Create report data 109 | reportData := core.ReportData{ 110 | Title: "Re-movery Security Scan Report", 111 | Timestamp: time.Now().Format(time.RFC3339), 112 | Results: results, 113 | Summary: summary, 114 | } 115 | 116 | // Determine report format 117 | if reportFormat == "" { 118 | // Try to determine format from file extension 119 | ext := strings.ToLower(filepath.Ext(outputFile)) 120 | switch ext { 121 | case ".html": 122 | reportFormat = "html" 123 | case ".json": 124 | reportFormat = "json" 125 | case ".xml": 126 | reportFormat = "xml" 127 | default: 128 | reportFormat = "html" // Default to HTML 129 | } 130 | } 131 | 132 | // Generate report 133 | var reporter core.Reporter 134 | switch strings.ToLower(reportFormat) { 135 | case "html": 136 | reporter = reporters.NewHTMLReporter() 137 | case "json": 138 | reporter = reporters.NewJSONReporter() 139 | case "xml": 140 | reporter = reporters.NewXMLReporter() 141 | default: 142 | fmt.Fprintf(os.Stderr, "Error: Unsupported report format: %s\n", reportFormat) 143 | os.Exit(1) 144 | } 145 | 146 | if err := reporter.GenerateReport(reportData, outputFile); err != nil { 147 | fmt.Fprintf(os.Stderr, "Error generating report: %v\n", err) 148 | os.Exit(1) 149 | } 150 | 151 | fmt.Printf("Report generated: %s\n", outputFile) 152 | } 153 | }, 154 | } 155 | 156 | func init() { 157 | // Add flags 158 | scanCmd.Flags().StringVar(&scanFile, "file", "", "File to scan") 159 | scanCmd.Flags().StringVar(&scanDir, "dir", "", "Directory to scan") 160 | scanCmd.Flags().StringVar(&excludePattern, "exclude", "", "Patterns to exclude (comma separated)") 161 | scanCmd.Flags().StringVar(&outputFile, "output", "", "Output file for the report") 162 | scanCmd.Flags().StringVar(&reportFormat, "format", "", "Report format (html, json, xml)") 163 | scanCmd.Flags().BoolVar(¶llel, "parallel", false, "Enable parallel processing") 164 | scanCmd.Flags().BoolVar(&incremental, "incremental", false, "Enable incremental scanning") 165 | scanCmd.Flags().Float64Var(&confidence, "confidence", 0.7, "Confidence threshold (0.0-1.0)") 166 | } -------------------------------------------------------------------------------- /go/internal/cmd/server.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/re-movery/re-movery/internal/api" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var ( 12 | serverHost string 13 | serverPort int 14 | serverDebug bool 15 | ) 16 | 17 | var serverCmd = &cobra.Command{ 18 | Use: "server", 19 | Short: "Start the API server", 20 | Long: `Start the API server for Re-movery. 21 | The API server provides a RESTful API for scanning files and directories for security vulnerabilities. 22 | 23 | Examples: 24 | re-movery server 25 | re-movery server --host 0.0.0.0 --port 8081 26 | re-movery server --debug`, 27 | Run: func(cmd *cobra.Command, args []string) { 28 | // Create API server 29 | server := api.NewServer() 30 | 31 | // Start API server 32 | addr := fmt.Sprintf("%s:%d", serverHost, serverPort) 33 | fmt.Printf("Starting API server at http://%s\n", addr) 34 | 35 | if err := server.Run(serverHost, serverPort, serverDebug); err != nil { 36 | fmt.Fprintf(os.Stderr, "Error starting API server: %v\n", err) 37 | os.Exit(1) 38 | } 39 | }, 40 | } 41 | 42 | func init() { 43 | // Add flags 44 | serverCmd.Flags().StringVar(&serverHost, "host", "localhost", "Host to bind the API server to") 45 | serverCmd.Flags().IntVar(&serverPort, "port", 8081, "Port to bind the API server to") 46 | serverCmd.Flags().BoolVar(&serverDebug, "debug", false, "Enable debug mode") 47 | } -------------------------------------------------------------------------------- /go/internal/cmd/web.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/re-movery/re-movery/internal/web" 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var ( 12 | webHost string 13 | webPort int 14 | webDebug bool 15 | ) 16 | 17 | var webCmd = &cobra.Command{ 18 | Use: "web", 19 | Short: "Start the web interface", 20 | Long: `Start the web interface for Re-movery. 21 | The web interface provides a user-friendly way to scan files and directories for security vulnerabilities. 22 | 23 | Examples: 24 | re-movery web 25 | re-movery web --host 0.0.0.0 --port 8080 26 | re-movery web --debug`, 27 | Run: func(cmd *cobra.Command, args []string) { 28 | // Create web app 29 | app := web.NewApp() 30 | 31 | // Start web server 32 | addr := fmt.Sprintf("%s:%d", webHost, webPort) 33 | fmt.Printf("Starting web server at http://%s\n", addr) 34 | 35 | if err := app.Run(webHost, webPort, webDebug); err != nil { 36 | fmt.Fprintf(os.Stderr, "Error starting web server: %v\n", err) 37 | os.Exit(1) 38 | } 39 | }, 40 | } 41 | 42 | func init() { 43 | // Add flags 44 | webCmd.Flags().StringVar(&webHost, "host", "localhost", "Host to bind the web server to") 45 | webCmd.Flags().IntVar(&webPort, "port", 8080, "Port to bind the web server to") 46 | webCmd.Flags().BoolVar(&webDebug, "debug", false, "Enable debug mode") 47 | } -------------------------------------------------------------------------------- /go/internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/spf13/viper" 5 | ) 6 | 7 | // Config represents the application configuration 8 | type Config struct { 9 | Processing ProcessingConfig `mapstructure:"processing"` 10 | Detector DetectorConfig `mapstructure:"detector"` 11 | Logging LoggingConfig `mapstructure:"logging"` 12 | Security SecurityConfig `mapstructure:"security"` 13 | } 14 | 15 | // ProcessingConfig contains processing-related configuration 16 | type ProcessingConfig struct { 17 | NumWorkers int `mapstructure:"num_workers"` 18 | MaxMemoryGB float64 `mapstructure:"max_memory_gb"` 19 | ChunkSizeMB int `mapstructure:"chunk_size_mb"` 20 | EnableCache bool `mapstructure:"enable_cache"` 21 | CacheSize int `mapstructure:"cache_size"` 22 | Languages []string `mapstructure:"languages"` 23 | } 24 | 25 | // DetectorConfig contains detector-related configuration 26 | type DetectorConfig struct { 27 | MinSimilarity float64 `mapstructure:"min_similarity"` 28 | EditDistance int `mapstructure:"edit_distance"` 29 | ContextLines int `mapstructure:"context_lines"` 30 | ASTDepth int `mapstructure:"ast_depth"` 31 | CFGNodes int `mapstructure:"cfg_nodes"` 32 | ReportFormat []string `mapstructure:"report_format"` 33 | ExcludePatterns []string `mapstructure:"exclude_patterns"` 34 | } 35 | 36 | // LoggingConfig contains logging-related configuration 37 | type LoggingConfig struct { 38 | Level string `mapstructure:"level"` 39 | File string `mapstructure:"file"` 40 | Format string `mapstructure:"format"` 41 | EnableProfiling bool `mapstructure:"enable_profiling"` 42 | ShowProgress bool `mapstructure:"show_progress"` 43 | } 44 | 45 | // SecurityConfig contains security-related configuration 46 | type SecurityConfig struct { 47 | MaxFileSizeMB int `mapstructure:"max_file_size_mb"` 48 | AllowedSchemes []string `mapstructure:"allowed_schemes"` 49 | EnableSandbox bool `mapstructure:"enable_sandbox"` 50 | RequireAuth bool `mapstructure:"require_auth"` 51 | RateLimitPerHour int `mapstructure:"rate_limit_per_hour"` 52 | } 53 | 54 | // LoadConfig loads the configuration from file 55 | func LoadConfig(configFile string) (*Config, error) { 56 | viper.SetConfigFile(configFile) 57 | viper.SetConfigType("json") 58 | 59 | if err := viper.ReadInConfig(); err != nil { 60 | return nil, err 61 | } 62 | 63 | var config Config 64 | if err := viper.Unmarshal(&config); err != nil { 65 | return nil, err 66 | } 67 | 68 | return &config, nil 69 | } 70 | 71 | // SetDefaults sets default configuration values 72 | func SetDefaults() { 73 | viper.SetDefault("processing.num_workers", 4) 74 | viper.SetDefault("processing.max_memory_gb", 8) 75 | viper.SetDefault("processing.chunk_size_mb", 1) 76 | viper.SetDefault("processing.enable_cache", true) 77 | viper.SetDefault("processing.cache_size", 1000) 78 | viper.SetDefault("processing.languages", []string{"go", "java", "python", "javascript"}) 79 | 80 | viper.SetDefault("detector.min_similarity", 0.8) 81 | viper.SetDefault("detector.edit_distance", 3) 82 | viper.SetDefault("detector.context_lines", 3) 83 | viper.SetDefault("detector.ast_depth", 5) 84 | viper.SetDefault("detector.cfg_nodes", 100) 85 | viper.SetDefault("detector.report_format", []string{"html", "json"}) 86 | 87 | viper.SetDefault("logging.level", "info") 88 | viper.SetDefault("logging.format", "text") 89 | viper.SetDefault("logging.enable_profiling", false) 90 | viper.SetDefault("logging.show_progress", true) 91 | 92 | viper.SetDefault("security.max_file_size_mb", 10) 93 | viper.SetDefault("security.enable_sandbox", true) 94 | viper.SetDefault("security.require_auth", false) 95 | viper.SetDefault("security.rate_limit_per_hour", 1000) 96 | } -------------------------------------------------------------------------------- /go/internal/core/config.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | "gopkg.in/yaml.v3" 12 | ) 13 | 14 | // Config 表示应用程序配置 15 | type Config struct { 16 | Scanner ScannerConfig `json:"scanner" yaml:"scanner"` 17 | Web WebConfig `json:"web" yaml:"web"` 18 | Server ServerConfig `json:"server" yaml:"server"` 19 | } 20 | 21 | // ScannerConfig 表示扫描器配置 22 | type ScannerConfig struct { 23 | Parallel bool `json:"parallel" yaml:"parallel"` 24 | Incremental bool `json:"incremental" yaml:"incremental"` 25 | ConfidenceThreshold float64 `json:"confidenceThreshold" yaml:"confidenceThreshold"` 26 | ExcludePatterns []string `json:"excludePatterns" yaml:"excludePatterns"` 27 | } 28 | 29 | // WebConfig 表示Web界面配置 30 | type WebConfig struct { 31 | Host string `json:"host" yaml:"host"` 32 | Port int `json:"port" yaml:"port"` 33 | Debug bool `json:"debug" yaml:"debug"` 34 | } 35 | 36 | // ServerConfig 表示API服务器配置 37 | type ServerConfig struct { 38 | Host string `json:"host" yaml:"host"` 39 | Port int `json:"port" yaml:"port"` 40 | Debug bool `json:"debug" yaml:"debug"` 41 | } 42 | 43 | // NewConfig 创建一个新的配置对象,使用默认值 44 | func NewConfig() *Config { 45 | return &Config{ 46 | Scanner: ScannerConfig{ 47 | Parallel: false, 48 | Incremental: false, 49 | ConfidenceThreshold: 0.7, 50 | ExcludePatterns: []string{}, 51 | }, 52 | Web: WebConfig{ 53 | Host: "localhost", 54 | Port: 8080, 55 | Debug: false, 56 | }, 57 | Server: ServerConfig{ 58 | Host: "localhost", 59 | Port: 8081, 60 | Debug: false, 61 | }, 62 | } 63 | } 64 | 65 | // LoadConfig 从文件加载配置 66 | func LoadConfig(configPath string) (*Config, error) { 67 | // 如果未指定配置文件,则使用默认配置 68 | if configPath == "" { 69 | return NewConfig(), nil 70 | } 71 | 72 | // 检查文件是否存在 73 | if _, err := os.Stat(configPath); os.IsNotExist(err) { 74 | return nil, fmt.Errorf("配置文件不存在: %s", configPath) 75 | } 76 | 77 | // 读取文件内容 78 | data, err := ioutil.ReadFile(configPath) 79 | if err != nil { 80 | return nil, err 81 | } 82 | 83 | // 根据文件扩展名解析配置 84 | config := NewConfig() 85 | ext := strings.ToLower(filepath.Ext(configPath)) 86 | switch ext { 87 | case ".json": 88 | if err := json.Unmarshal(data, config); err != nil { 89 | return nil, err 90 | } 91 | case ".yaml", ".yml": 92 | if err := yaml.Unmarshal(data, config); err != nil { 93 | return nil, err 94 | } 95 | default: 96 | return nil, fmt.Errorf("不支持的配置文件格式: %s", ext) 97 | } 98 | 99 | return config, nil 100 | } 101 | 102 | // SaveConfig 将配置保存到文件 103 | func SaveConfig(config *Config, configPath string) error { 104 | // 创建输出目录(如果不存在) 105 | outputDir := filepath.Dir(configPath) 106 | if err := os.MkdirAll(outputDir, 0755); err != nil { 107 | return err 108 | } 109 | 110 | // 根据文件扩展名序列化配置 111 | var data []byte 112 | var err error 113 | ext := strings.ToLower(filepath.Ext(configPath)) 114 | switch ext { 115 | case ".json": 116 | data, err = json.MarshalIndent(config, "", " ") 117 | if err != nil { 118 | return err 119 | } 120 | case ".yaml", ".yml": 121 | data, err = yaml.Marshal(config) 122 | if err != nil { 123 | return err 124 | } 125 | default: 126 | return fmt.Errorf("不支持的配置文件格式: %s", ext) 127 | } 128 | 129 | // 写入文件 130 | return ioutil.WriteFile(configPath, data, 0644) 131 | } 132 | 133 | // ApplyToScanner 将配置应用到扫描器 134 | func (c *Config) ApplyToScanner(scanner *Scanner) { 135 | scanner.SetParallel(c.Scanner.Parallel) 136 | scanner.SetIncremental(c.Scanner.Incremental) 137 | scanner.SetConfidenceThreshold(c.Scanner.ConfidenceThreshold) 138 | } -------------------------------------------------------------------------------- /go/internal/core/config_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | // 测试创建新配置 13 | func TestNewConfig(t *testing.T) { 14 | config := NewConfig() 15 | assert.NotNil(t, config) 16 | 17 | // 检查默认值 18 | assert.False(t, config.Scanner.Parallel) 19 | assert.False(t, config.Scanner.Incremental) 20 | assert.Equal(t, 0.7, config.Scanner.ConfidenceThreshold) 21 | assert.Equal(t, "localhost", config.Web.Host) 22 | assert.Equal(t, 8080, config.Web.Port) 23 | assert.False(t, config.Web.Debug) 24 | assert.Equal(t, "localhost", config.Server.Host) 25 | assert.Equal(t, 8081, config.Server.Port) 26 | assert.False(t, config.Server.Debug) 27 | } 28 | 29 | // 测试加载JSON配置 30 | func TestLoadConfigJSON(t *testing.T) { 31 | // 创建临时配置文件 32 | content := []byte(`{ 33 | "scanner": { 34 | "parallel": true, 35 | "incremental": true, 36 | "confidenceThreshold": 0.8, 37 | "excludePatterns": ["node_modules", "*.min.js"] 38 | }, 39 | "web": { 40 | "host": "0.0.0.0", 41 | "port": 9090, 42 | "debug": true 43 | }, 44 | "server": { 45 | "host": "0.0.0.0", 46 | "port": 9091, 47 | "debug": true 48 | } 49 | }`) 50 | 51 | tmpfile, err := ioutil.TempFile("", "config-*.json") 52 | assert.NoError(t, err) 53 | defer os.Remove(tmpfile.Name()) 54 | 55 | _, err = tmpfile.Write(content) 56 | assert.NoError(t, err) 57 | err = tmpfile.Close() 58 | assert.NoError(t, err) 59 | 60 | // 加载配置 61 | config, err := LoadConfig(tmpfile.Name()) 62 | assert.NoError(t, err) 63 | assert.NotNil(t, config) 64 | 65 | // 检查加载的值 66 | assert.True(t, config.Scanner.Parallel) 67 | assert.True(t, config.Scanner.Incremental) 68 | assert.Equal(t, 0.8, config.Scanner.ConfidenceThreshold) 69 | assert.Equal(t, []string{"node_modules", "*.min.js"}, config.Scanner.ExcludePatterns) 70 | assert.Equal(t, "0.0.0.0", config.Web.Host) 71 | assert.Equal(t, 9090, config.Web.Port) 72 | assert.True(t, config.Web.Debug) 73 | assert.Equal(t, "0.0.0.0", config.Server.Host) 74 | assert.Equal(t, 9091, config.Server.Port) 75 | assert.True(t, config.Server.Debug) 76 | } 77 | 78 | // 测试加载YAML配置 79 | func TestLoadConfigYAML(t *testing.T) { 80 | // 创建临时配置文件 81 | content := []byte(`scanner: 82 | parallel: true 83 | incremental: true 84 | confidenceThreshold: 0.8 85 | excludePatterns: 86 | - node_modules 87 | - "*.min.js" 88 | web: 89 | host: 0.0.0.0 90 | port: 9090 91 | debug: true 92 | server: 93 | host: 0.0.0.0 94 | port: 9091 95 | debug: true 96 | `) 97 | 98 | tmpfile, err := ioutil.TempFile("", "config-*.yaml") 99 | assert.NoError(t, err) 100 | defer os.Remove(tmpfile.Name()) 101 | 102 | _, err = tmpfile.Write(content) 103 | assert.NoError(t, err) 104 | err = tmpfile.Close() 105 | assert.NoError(t, err) 106 | 107 | // 加载配置 108 | config, err := LoadConfig(tmpfile.Name()) 109 | assert.NoError(t, err) 110 | assert.NotNil(t, config) 111 | 112 | // 检查加载的值 113 | assert.True(t, config.Scanner.Parallel) 114 | assert.True(t, config.Scanner.Incremental) 115 | assert.Equal(t, 0.8, config.Scanner.ConfidenceThreshold) 116 | assert.Equal(t, []string{"node_modules", "*.min.js"}, config.Scanner.ExcludePatterns) 117 | assert.Equal(t, "0.0.0.0", config.Web.Host) 118 | assert.Equal(t, 9090, config.Web.Port) 119 | assert.True(t, config.Web.Debug) 120 | assert.Equal(t, "0.0.0.0", config.Server.Host) 121 | assert.Equal(t, 9091, config.Server.Port) 122 | assert.True(t, config.Server.Debug) 123 | } 124 | 125 | // 测试保存配置 126 | func TestSaveConfig(t *testing.T) { 127 | // 创建配置 128 | config := NewConfig() 129 | config.Scanner.Parallel = true 130 | config.Scanner.Incremental = true 131 | config.Scanner.ConfidenceThreshold = 0.8 132 | config.Scanner.ExcludePatterns = []string{"node_modules", "*.min.js"} 133 | config.Web.Host = "0.0.0.0" 134 | config.Web.Port = 9090 135 | config.Web.Debug = true 136 | config.Server.Host = "0.0.0.0" 137 | config.Server.Port = 9091 138 | config.Server.Debug = true 139 | 140 | // 创建临时文件路径 141 | tmpdir, err := ioutil.TempDir("", "config-test") 142 | assert.NoError(t, err) 143 | defer os.RemoveAll(tmpdir) 144 | 145 | // 保存JSON配置 146 | jsonPath := filepath.Join(tmpdir, "config.json") 147 | err = SaveConfig(config, jsonPath) 148 | assert.NoError(t, err) 149 | 150 | // 保存YAML配置 151 | yamlPath := filepath.Join(tmpdir, "config.yaml") 152 | err = SaveConfig(config, yamlPath) 153 | assert.NoError(t, err) 154 | 155 | // 重新加载JSON配置 156 | jsonConfig, err := LoadConfig(jsonPath) 157 | assert.NoError(t, err) 158 | assert.Equal(t, config, jsonConfig) 159 | 160 | // 重新加载YAML配置 161 | yamlConfig, err := LoadConfig(yamlPath) 162 | assert.NoError(t, err) 163 | assert.Equal(t, config, yamlConfig) 164 | } 165 | 166 | // 测试应用配置到扫描器 167 | func TestApplyToScanner(t *testing.T) { 168 | // 创建配置 169 | config := NewConfig() 170 | config.Scanner.Parallel = true 171 | config.Scanner.Incremental = true 172 | config.Scanner.ConfidenceThreshold = 0.8 173 | 174 | // 创建扫描器 175 | scanner := NewScanner() 176 | 177 | // 应用配置 178 | config.ApplyToScanner(scanner) 179 | 180 | // 检查扫描器设置 181 | assert.True(t, scanner.IsParallel()) 182 | assert.True(t, scanner.IsIncremental()) 183 | assert.Equal(t, 0.8, scanner.confidenceThreshold) 184 | } -------------------------------------------------------------------------------- /go/internal/core/models.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | // Signature represents a vulnerability signature 8 | type Signature struct { 9 | ID string `json:"id"` 10 | Name string `json:"name"` 11 | Severity string `json:"severity"` 12 | Description string `json:"description"` 13 | CodePatterns []string `json:"codePatterns"` 14 | References []string `json:"references"` 15 | } 16 | 17 | // Match represents a vulnerability match 18 | type Match struct { 19 | Signature Signature `json:"signature"` 20 | FilePath string `json:"filePath"` 21 | LineNumber int `json:"lineNumber"` 22 | MatchedCode string `json:"matchedCode"` 23 | Confidence float64 `json:"confidence"` 24 | } 25 | 26 | // Summary represents a summary of scan results 27 | type Summary struct { 28 | TotalFiles int `json:"totalFiles"` 29 | High int `json:"high"` 30 | Medium int `json:"medium"` 31 | Low int `json:"low"` 32 | Vulnerabilities map[string]int `json:"vulnerabilities"` 33 | } 34 | 35 | // ReportData represents data for a report 36 | type ReportData struct { 37 | Title string `json:"title"` 38 | Timestamp string `json:"timestamp"` 39 | Results map[string][]Match `json:"results"` 40 | Summary Summary `json:"summary"` 41 | } 42 | 43 | // Reporter is an interface for report generators 44 | type Reporter interface { 45 | GenerateReport(data ReportData, outputPath string) error 46 | } 47 | 48 | // Detector is an interface for vulnerability detectors 49 | type Detector interface { 50 | Name() string 51 | SupportedLanguages() []string 52 | DetectFile(filePath string) ([]Match, error) 53 | DetectCode(code string, filePath string) ([]Match, error) 54 | } 55 | 56 | // GenerateSummary generates a summary from scan results 57 | func GenerateSummary(results map[string][]Match) Summary { 58 | summary := Summary{ 59 | TotalFiles: len(results), 60 | Vulnerabilities: make(map[string]int), 61 | } 62 | 63 | for _, matches := range results { 64 | for _, match := range matches { 65 | switch match.Signature.Severity { 66 | case "high": 67 | summary.High++ 68 | case "medium": 69 | summary.Medium++ 70 | case "low": 71 | summary.Low++ 72 | } 73 | 74 | // Count vulnerabilities by name 75 | summary.Vulnerabilities[match.Signature.Name]++ 76 | } 77 | } 78 | 79 | return summary 80 | } -------------------------------------------------------------------------------- /go/internal/core/scanner.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | "sync" 9 | ) 10 | 11 | // Scanner is a vulnerability scanner 12 | type Scanner struct { 13 | detectors []Detector 14 | parallel bool 15 | incremental bool 16 | confidenceThreshold float64 17 | cache map[string][]Match 18 | cacheMutex sync.RWMutex 19 | } 20 | 21 | // NewScanner creates a new scanner 22 | func NewScanner() *Scanner { 23 | return &Scanner{ 24 | detectors: []Detector{}, 25 | parallel: false, 26 | incremental: false, 27 | confidenceThreshold: 0.7, 28 | cache: make(map[string][]Match), 29 | } 30 | } 31 | 32 | // RegisterDetector registers a detector 33 | func (s *Scanner) RegisterDetector(detector Detector) { 34 | s.detectors = append(s.detectors, detector) 35 | } 36 | 37 | // SetParallel sets whether to use parallel processing 38 | func (s *Scanner) SetParallel(parallel bool) { 39 | s.parallel = parallel 40 | } 41 | 42 | // IsParallel returns whether parallel processing is enabled 43 | func (s *Scanner) IsParallel() bool { 44 | return s.parallel 45 | } 46 | 47 | // SetIncremental sets whether to use incremental scanning 48 | func (s *Scanner) SetIncremental(incremental bool) { 49 | s.incremental = incremental 50 | } 51 | 52 | // IsIncremental returns whether incremental scanning is enabled 53 | func (s *Scanner) IsIncremental() bool { 54 | return s.incremental 55 | } 56 | 57 | // SetConfidenceThreshold sets the confidence threshold 58 | func (s *Scanner) SetConfidenceThreshold(threshold float64) { 59 | s.confidenceThreshold = threshold 60 | } 61 | 62 | // SupportedLanguages returns the list of supported languages 63 | func (s *Scanner) SupportedLanguages() []string { 64 | languages := []string{} 65 | for _, detector := range s.detectors { 66 | languages = append(languages, detector.SupportedLanguages()...) 67 | } 68 | return languages 69 | } 70 | 71 | // ScanFile scans a file for vulnerabilities 72 | func (s *Scanner) ScanFile(filePath string) ([]Match, error) { 73 | // Check if file exists 74 | if _, err := os.Stat(filePath); os.IsNotExist(err) { 75 | return nil, fmt.Errorf("file does not exist: %s", filePath) 76 | } 77 | 78 | // Check if file is in cache 79 | if s.incremental { 80 | s.cacheMutex.RLock() 81 | if matches, ok := s.cache[filePath]; ok { 82 | s.cacheMutex.RUnlock() 83 | return matches, nil 84 | } 85 | s.cacheMutex.RUnlock() 86 | } 87 | 88 | // Scan file with each detector 89 | var allMatches []Match 90 | for _, detector := range s.detectors { 91 | matches, err := detector.DetectFile(filePath) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | // Filter matches by confidence threshold 97 | for _, match := range matches { 98 | if match.Confidence >= s.confidenceThreshold { 99 | allMatches = append(allMatches, match) 100 | } 101 | } 102 | } 103 | 104 | // Update cache 105 | if s.incremental { 106 | s.cacheMutex.Lock() 107 | s.cache[filePath] = allMatches 108 | s.cacheMutex.Unlock() 109 | } 110 | 111 | return allMatches, nil 112 | } 113 | 114 | // ScanDirectory scans a directory for vulnerabilities 115 | func (s *Scanner) ScanDirectory(dirPath string, excludePatterns []string) (map[string][]Match, error) { 116 | // Check if directory exists 117 | if _, err := os.Stat(dirPath); os.IsNotExist(err) { 118 | return nil, fmt.Errorf("directory does not exist: %s", dirPath) 119 | } 120 | 121 | // Collect files to scan 122 | var filesToScan []string 123 | err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { 124 | if err != nil { 125 | return err 126 | } 127 | 128 | // Skip directories 129 | if info.IsDir() { 130 | // Check if directory should be excluded 131 | for _, pattern := range excludePatterns { 132 | if matched, _ := filepath.Match(pattern, info.Name()); matched { 133 | return filepath.SkipDir 134 | } 135 | } 136 | return nil 137 | } 138 | 139 | // Check if file should be excluded 140 | for _, pattern := range excludePatterns { 141 | if matched, _ := filepath.Match(pattern, info.Name()); matched { 142 | return nil 143 | } 144 | } 145 | 146 | // Check if file extension is supported 147 | ext := strings.ToLower(filepath.Ext(path)) 148 | if ext == "" { 149 | return nil 150 | } 151 | 152 | // Remove the dot from the extension 153 | ext = ext[1:] 154 | 155 | // Check if any detector supports this file type 156 | for _, detector := range s.detectors { 157 | for _, lang := range detector.SupportedLanguages() { 158 | if lang == ext { 159 | filesToScan = append(filesToScan, path) 160 | return nil 161 | } 162 | } 163 | } 164 | 165 | return nil 166 | }) 167 | 168 | if err != nil { 169 | return nil, err 170 | } 171 | 172 | // Scan files 173 | results := make(map[string][]Match) 174 | if s.parallel { 175 | // Parallel scanning 176 | var wg sync.WaitGroup 177 | resultsMutex := sync.Mutex{} 178 | 179 | for _, file := range filesToScan { 180 | wg.Add(1) 181 | go func(file string) { 182 | defer wg.Done() 183 | 184 | matches, err := s.ScanFile(file) 185 | if err != nil { 186 | // Log error but continue 187 | fmt.Fprintf(os.Stderr, "Error scanning file %s: %v\n", file, err) 188 | return 189 | } 190 | 191 | if len(matches) > 0 { 192 | resultsMutex.Lock() 193 | results[file] = matches 194 | resultsMutex.Unlock() 195 | } 196 | }(file) 197 | } 198 | 199 | wg.Wait() 200 | } else { 201 | // Sequential scanning 202 | for _, file := range filesToScan { 203 | matches, err := s.ScanFile(file) 204 | if err != nil { 205 | // Log error but continue 206 | fmt.Fprintf(os.Stderr, "Error scanning file %s: %v\n", file, err) 207 | continue 208 | } 209 | 210 | if len(matches) > 0 { 211 | results[file] = matches 212 | } 213 | } 214 | } 215 | 216 | return results, nil 217 | } -------------------------------------------------------------------------------- /go/internal/core/scanner_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | // 测试扫描器创建 13 | func TestNewScanner(t *testing.T) { 14 | scanner := NewScanner() 15 | assert.NotNil(t, scanner) 16 | assert.False(t, scanner.IsParallel()) 17 | assert.False(t, scanner.IsIncremental()) 18 | } 19 | 20 | // 测试设置并行处理 21 | func TestSetParallel(t *testing.T) { 22 | scanner := NewScanner() 23 | assert.False(t, scanner.IsParallel()) 24 | 25 | scanner.SetParallel(true) 26 | assert.True(t, scanner.IsParallel()) 27 | 28 | scanner.SetParallel(false) 29 | assert.False(t, scanner.IsParallel()) 30 | } 31 | 32 | // 测试设置增量扫描 33 | func TestSetIncremental(t *testing.T) { 34 | scanner := NewScanner() 35 | assert.False(t, scanner.IsIncremental()) 36 | 37 | scanner.SetIncremental(true) 38 | assert.True(t, scanner.IsIncremental()) 39 | 40 | scanner.SetIncremental(false) 41 | assert.False(t, scanner.IsIncremental()) 42 | } 43 | 44 | // 测试注册检测器 45 | func TestRegisterDetector(t *testing.T) { 46 | scanner := NewScanner() 47 | 48 | // 创建模拟检测器 49 | detector := &mockDetector{} 50 | 51 | // 注册检测器 52 | scanner.RegisterDetector(detector) 53 | 54 | // 检查支持的语言 55 | languages := scanner.SupportedLanguages() 56 | assert.Contains(t, languages, "mock") 57 | } 58 | 59 | // 测试扫描文件 60 | func TestScanFile(t *testing.T) { 61 | // 创建临时文件 62 | content := []byte("print(eval('1+1'))") 63 | tmpfile, err := ioutil.TempFile("", "example.py") 64 | assert.NoError(t, err) 65 | defer os.Remove(tmpfile.Name()) 66 | 67 | _, err = tmpfile.Write(content) 68 | assert.NoError(t, err) 69 | err = tmpfile.Close() 70 | assert.NoError(t, err) 71 | 72 | // 创建扫描器和模拟检测器 73 | scanner := NewScanner() 74 | detector := &mockDetector{} 75 | scanner.RegisterDetector(detector) 76 | 77 | // 扫描文件 78 | matches, err := scanner.ScanFile(tmpfile.Name()) 79 | assert.NoError(t, err) 80 | assert.Len(t, matches, 1) 81 | assert.Equal(t, "MOCK001", matches[0].Signature.ID) 82 | } 83 | 84 | // 测试扫描目录 85 | func TestScanDirectory(t *testing.T) { 86 | // 创建临时目录 87 | tmpdir, err := ioutil.TempDir("", "example") 88 | assert.NoError(t, err) 89 | defer os.RemoveAll(tmpdir) 90 | 91 | // 创建测试文件 92 | file1 := filepath.Join(tmpdir, "test1.py") 93 | err = ioutil.WriteFile(file1, []byte("print(eval('1+1'))"), 0644) 94 | assert.NoError(t, err) 95 | 96 | file2 := filepath.Join(tmpdir, "test2.py") 97 | err = ioutil.WriteFile(file2, []byte("print('Hello')"), 0644) 98 | assert.NoError(t, err) 99 | 100 | // 创建扫描器和模拟检测器 101 | scanner := NewScanner() 102 | detector := &mockDetector{} 103 | scanner.RegisterDetector(detector) 104 | 105 | // 扫描目录 106 | results, err := scanner.ScanDirectory(tmpdir, nil) 107 | assert.NoError(t, err) 108 | assert.Len(t, results, 2) 109 | 110 | // 检查结果 111 | assert.Contains(t, results, file1) 112 | assert.Contains(t, results, file2) 113 | assert.Len(t, results[file1], 1) 114 | assert.Len(t, results[file2], 1) 115 | } 116 | 117 | // 测试生成摘要 118 | func TestGenerateSummary(t *testing.T) { 119 | // 创建测试数据 120 | results := map[string][]Match{ 121 | "file1.py": { 122 | { 123 | Signature: Signature{ 124 | ID: "PY001", 125 | Name: "Dangerous eval() usage", 126 | Severity: "high", 127 | }, 128 | }, 129 | }, 130 | "file2.py": { 131 | { 132 | Signature: Signature{ 133 | ID: "PY002", 134 | Name: "Dangerous exec() usage", 135 | Severity: "high", 136 | }, 137 | }, 138 | { 139 | Signature: Signature{ 140 | ID: "PY005", 141 | Name: "Insecure random number generation", 142 | Severity: "medium", 143 | }, 144 | }, 145 | }, 146 | "file3.py": { 147 | { 148 | Signature: Signature{ 149 | ID: "PY008", 150 | Name: "Temporary file creation risk", 151 | Severity: "medium", 152 | }, 153 | }, 154 | { 155 | Signature: Signature{ 156 | ID: "PY010", 157 | Name: "Debug mode enabled", 158 | Severity: "medium", 159 | }, 160 | }, 161 | { 162 | Signature: Signature{ 163 | ID: "PY012", 164 | Name: "Bare except block", 165 | Severity: "low", 166 | }, 167 | }, 168 | }, 169 | } 170 | 171 | // 生成摘要 172 | summary := GenerateSummary(results) 173 | 174 | // 检查摘要 175 | assert.Equal(t, 3, summary.TotalFiles) 176 | assert.Equal(t, 2, summary.High) 177 | assert.Equal(t, 3, summary.Medium) 178 | assert.Equal(t, 1, summary.Low) 179 | 180 | // 检查漏洞计数 181 | assert.Equal(t, 1, summary.Vulnerabilities["Dangerous eval() usage"]) 182 | assert.Equal(t, 1, summary.Vulnerabilities["Dangerous exec() usage"]) 183 | assert.Equal(t, 1, summary.Vulnerabilities["Insecure random number generation"]) 184 | assert.Equal(t, 1, summary.Vulnerabilities["Temporary file creation risk"]) 185 | assert.Equal(t, 1, summary.Vulnerabilities["Debug mode enabled"]) 186 | assert.Equal(t, 1, summary.Vulnerabilities["Bare except block"]) 187 | } 188 | 189 | // 模拟检测器 190 | type mockDetector struct{} 191 | 192 | func (d *mockDetector) Name() string { 193 | return "mock" 194 | } 195 | 196 | func (d *mockDetector) SupportedLanguages() []string { 197 | return []string{"mock", "py", "python"} 198 | } 199 | 200 | func (d *mockDetector) DetectFile(filePath string) ([]Match, error) { 201 | return []Match{ 202 | { 203 | Signature: Signature{ 204 | ID: "MOCK001", 205 | Name: "Mock vulnerability", 206 | Severity: "high", 207 | Description: "This is a mock vulnerability", 208 | }, 209 | FilePath: filePath, 210 | LineNumber: 1, 211 | MatchedCode: "mock code", 212 | Confidence: 0.9, 213 | }, 214 | }, nil 215 | } 216 | 217 | func (d *mockDetector) DetectCode(code string, filePath string) ([]Match, error) { 218 | return []Match{ 219 | { 220 | Signature: Signature{ 221 | ID: "MOCK001", 222 | Name: "Mock vulnerability", 223 | Severity: "high", 224 | Description: "This is a mock vulnerability", 225 | }, 226 | FilePath: filePath, 227 | LineNumber: 1, 228 | MatchedCode: code, 229 | Confidence: 0.9, 230 | }, 231 | }, nil 232 | } -------------------------------------------------------------------------------- /go/internal/detectors/tests/detector_test.go: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /go/internal/detectors/vulnerability.go: -------------------------------------------------------------------------------- 1 | package detectors 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "regexp" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/dave/dst" 13 | "github.com/dave/dst/decorator" 14 | ) 15 | 16 | // Signature 表示漏洞签名 17 | type Signature struct { 18 | ID string `json:"id"` 19 | Name string `json:"name"` 20 | Severity string `json:"severity"` 21 | CodePatterns []string `json:"code_patterns"` 22 | } 23 | 24 | // Match 表示漏洞匹配结果 25 | type Match struct { 26 | Signature Signature 27 | LineNumber int 28 | MatchedCode string 29 | Confidence float64 30 | } 31 | 32 | // VulnerabilityDetector 漏洞检测器 33 | type VulnerabilityDetector struct { 34 | signatures []Signature 35 | minConfidence float64 36 | mu sync.RWMutex 37 | } 38 | 39 | // NewVulnerabilityDetector 创建新的漏洞检测器 40 | func NewVulnerabilityDetector() *VulnerabilityDetector { 41 | return &VulnerabilityDetector{ 42 | minConfidence: 0.7, 43 | } 44 | } 45 | 46 | // LoadSignatures 从JSON文件加载漏洞签名 47 | func (d *VulnerabilityDetector) LoadSignatures(signatureFile string) error { 48 | data, err := ioutil.ReadFile(signatureFile) 49 | if err != nil { 50 | return fmt.Errorf("读取签名文件失败: %v", err) 51 | } 52 | 53 | var sigData struct { 54 | Signatures []Signature `json:"signatures"` 55 | } 56 | 57 | if err := json.Unmarshal(data, &sigData); err != nil { 58 | return fmt.Errorf("解析签名文件失败: %v", err) 59 | } 60 | 61 | d.mu.Lock() 62 | d.signatures = sigData.Signatures 63 | d.mu.Unlock() 64 | 65 | return nil 66 | } 67 | 68 | // DetectFile 检测文件中的漏洞 69 | func (d *VulnerabilityDetector) DetectFile(filePath string) ([]Match, error) { 70 | content, err := ioutil.ReadFile(filePath) 71 | if err != nil { 72 | return nil, fmt.Errorf("读取文件失败: %v", err) 73 | } 74 | 75 | matches := make([]Match, 0) 76 | d.mu.RLock() 77 | signatures := d.signatures 78 | d.mu.RUnlock() 79 | 80 | // 使用goroutine并行处理每个签名 81 | var wg sync.WaitGroup 82 | matchChan := make(chan Match) 83 | done := make(chan bool) 84 | 85 | // 启动收集结果的goroutine 86 | go func() { 87 | for match := range matchChan { 88 | matches = append(matches, match) 89 | } 90 | done <- true 91 | }() 92 | 93 | for _, sig := range signatures { 94 | wg.Add(1) 95 | go func(signature Signature) { 96 | defer wg.Done() 97 | for _, pattern := range signature.CodePatterns { 98 | re, err := regexp.Compile(pattern) 99 | if err != nil { 100 | continue 101 | } 102 | 103 | // 查找所有匹配 104 | for _, match := range re.FindAllStringIndex(string(content), -1) { 105 | matchedCode := string(content[match[0]:match[1]]) 106 | confidence := d.calculateConfidence(matchedCode, pattern) 107 | 108 | if confidence >= d.minConfidence { 109 | // 计算行号 110 | lineNumber := 1 + strings.Count(string(content[:match[0]]), "\n") 111 | matchChan <- Match{ 112 | Signature: signature, 113 | LineNumber: lineNumber, 114 | MatchedCode: matchedCode, 115 | Confidence: confidence, 116 | } 117 | } 118 | } 119 | } 120 | }(sig) 121 | } 122 | 123 | // 等待所有goroutine完成并关闭通道 124 | go func() { 125 | wg.Wait() 126 | close(matchChan) 127 | }() 128 | 129 | <-done 130 | return matches, nil 131 | } 132 | 133 | // AnalyzeAST 分析AST节点中的漏洞 134 | func (d *VulnerabilityDetector) AnalyzeAST(filePath string) ([]Match, error) { 135 | fset, node, err := decorator.ParseFile(filePath, nil) 136 | if err != nil { 137 | return nil, fmt.Errorf("解析文件失败: %v", err) 138 | } 139 | 140 | matches := make([]Match, 0) 141 | d.mu.RLock() 142 | signatures := d.signatures 143 | d.mu.RUnlock() 144 | 145 | // 遍历AST 146 | dst.Inspect(node, func(n dst.Node) bool { 147 | if call, ok := n.(*dst.CallExpr); ok { 148 | var funcName string 149 | switch fun := call.Fun.(type) { 150 | case *dst.Ident: 151 | funcName = fun.Name 152 | case *dst.SelectorExpr: 153 | if x, ok := fun.X.(*dst.Ident); ok { 154 | funcName = x.Name + "." + fun.Sel.Name 155 | } 156 | default: 157 | return true 158 | } 159 | 160 | // 检查是否匹配任何签名 161 | for _, sig := range signatures { 162 | for _, pattern := range sig.CodePatterns { 163 | if matched, _ := regexp.MatchString(pattern, funcName); matched { 164 | matches = append(matches, Match{ 165 | Signature: sig, 166 | LineNumber: fset.Position(call.Pos()).Line, 167 | MatchedCode: funcName, 168 | Confidence: 0.9, 169 | }) 170 | } 171 | } 172 | } 173 | } 174 | return true 175 | }) 176 | 177 | return matches, nil 178 | } 179 | 180 | // DetectSimilarPatterns 检测相似的漏洞模式 181 | func (d *VulnerabilityDetector) DetectSimilarPatterns(filePath string, threshold float64) ([]Match, error) { 182 | fset, node, err := decorator.ParseFile(filePath, nil) 183 | if err != nil { 184 | return nil, fmt.Errorf("解析文件失败: %v", err) 185 | } 186 | 187 | matches := make([]Match, 0) 188 | d.mu.RLock() 189 | signatures := d.signatures 190 | d.mu.RUnlock() 191 | 192 | // 遍历AST查找相似模式 193 | dst.Inspect(node, func(n dst.Node) bool { 194 | if call, ok := n.(*dst.CallExpr); ok { 195 | var funcName string 196 | switch fun := call.Fun.(type) { 197 | case *dst.Ident: 198 | funcName = fun.Name 199 | case *dst.SelectorExpr: 200 | if x, ok := fun.X.(*dst.Ident); ok { 201 | funcName = x.Name + "." + fun.Sel.Name 202 | } 203 | default: 204 | return true 205 | } 206 | 207 | // 检查每个签名 208 | for _, sig := range signatures { 209 | for _, pattern := range sig.CodePatterns { 210 | similarity := d.calculateSimilarity(funcName, pattern) 211 | if similarity >= threshold { 212 | matches = append(matches, Match{ 213 | Signature: sig, 214 | LineNumber: fset.Position(call.Pos()).Line, 215 | MatchedCode: funcName, 216 | Confidence: similarity, 217 | }) 218 | } 219 | } 220 | } 221 | } 222 | return true 223 | }) 224 | 225 | return matches, nil 226 | } 227 | 228 | // calculateConfidence 计算匹配的置信度 229 | func (d *VulnerabilityDetector) calculateConfidence(matchedCode, pattern string) float64 { 230 | // 基本匹配的置信度为0.7 231 | confidence := 0.7 232 | 233 | // 根据匹配的完整性增加置信度 234 | if len(matchedCode) > 10 { 235 | confidence += 0.1 236 | } 237 | 238 | // 根据上下文增加置信度 239 | if strings.Contains(matchedCode, "import") { 240 | confidence += 0.1 241 | } 242 | 243 | // 根据模式的特异性增加置信度 244 | if len(pattern) > 20 { 245 | confidence += 0.1 246 | } 247 | 248 | if confidence > 1.0 { 249 | confidence = 1.0 250 | } 251 | return confidence 252 | } 253 | 254 | // calculateSimilarity 计算两个字符串的相似度 255 | func (d *VulnerabilityDetector) calculateSimilarity(str1, str2 string) float64 { 256 | // 使用最长公共子序列(LCS)计算相似度 257 | m, n := len(str1), len(str2) 258 | dp := make([][]int, m+1) 259 | for i := range dp { 260 | dp[i] = make([]int, n+1) 261 | } 262 | 263 | for i := 1; i <= m; i++ { 264 | for j := 1; j <= n; j++ { 265 | if str1[i-1] == str2[j-1] { 266 | dp[i][j] = dp[i-1][j-1] + 1 267 | } else { 268 | dp[i][j] = max(dp[i-1][j], dp[i][j-1]) 269 | } 270 | } 271 | } 272 | 273 | lcsLength := dp[m][n] 274 | maxLen := max(m, n) 275 | if maxLen == 0 { 276 | return 0 277 | } 278 | return float64(lcsLength) / float64(maxLen) 279 | } 280 | 281 | // max 返回两个整数中的较大值 282 | func max(a, b int) int { 283 | if a > b { 284 | return a 285 | } 286 | return b 287 | } -------------------------------------------------------------------------------- /go/internal/reporters/json.go: -------------------------------------------------------------------------------- 1 | package reporters 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/re-movery/re-movery/internal/core" 9 | ) 10 | 11 | // JSONReporter is a reporter that generates JSON reports 12 | type JSONReporter struct{} 13 | 14 | // NewJSONReporter creates a new JSON reporter 15 | func NewJSONReporter() *JSONReporter { 16 | return &JSONReporter{} 17 | } 18 | 19 | // GenerateReport generates a report 20 | func (r *JSONReporter) GenerateReport(data core.ReportData, outputPath string) error { 21 | // Create output directory if it doesn't exist 22 | outputDir := filepath.Dir(outputPath) 23 | if err := os.MkdirAll(outputDir, 0755); err != nil { 24 | return err 25 | } 26 | 27 | // Create output file 28 | file, err := os.Create(outputPath) 29 | if err != nil { 30 | return err 31 | } 32 | defer file.Close() 33 | 34 | // Marshal data to JSON 35 | encoder := json.NewEncoder(file) 36 | encoder.SetIndent("", " ") 37 | if err := encoder.Encode(data); err != nil { 38 | return err 39 | } 40 | 41 | return nil 42 | } -------------------------------------------------------------------------------- /go/internal/reporters/xml.go: -------------------------------------------------------------------------------- 1 | package reporters 2 | 3 | import ( 4 | "encoding/xml" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/re-movery/re-movery/internal/core" 9 | ) 10 | 11 | // XMLReporter is a reporter that generates XML reports 12 | type XMLReporter struct{} 13 | 14 | // NewXMLReporter creates a new XML reporter 15 | func NewXMLReporter() *XMLReporter { 16 | return &XMLReporter{} 17 | } 18 | 19 | // XMLReportData is the XML representation of the report data 20 | type XMLReportData struct { 21 | XMLName xml.Name `xml:"report"` 22 | Title string `xml:"title"` 23 | Timestamp string `xml:"timestamp"` 24 | Summary XMLSummary `xml:"summary"` 25 | Results []XMLFileResult `xml:"results>file"` 26 | } 27 | 28 | // XMLSummary is the XML representation of the summary 29 | type XMLSummary struct { 30 | TotalFiles int `xml:"totalFiles,attr"` 31 | High int `xml:"high,attr"` 32 | Medium int `xml:"medium,attr"` 33 | Low int `xml:"low,attr"` 34 | } 35 | 36 | // XMLFileResult is the XML representation of a file result 37 | type XMLFileResult struct { 38 | Path string `xml:"path,attr"` 39 | Matches []XMLMatch `xml:"match"` 40 | } 41 | 42 | // XMLMatch is the XML representation of a match 43 | type XMLMatch struct { 44 | ID string `xml:"id,attr"` 45 | Name string `xml:"name"` 46 | Severity string `xml:"severity"` 47 | Description string `xml:"description"` 48 | LineNumber int `xml:"lineNumber"` 49 | MatchedCode string `xml:"matchedCode"` 50 | Confidence float64 `xml:"confidence"` 51 | } 52 | 53 | // GenerateReport generates a report 54 | func (r *XMLReporter) GenerateReport(data core.ReportData, outputPath string) error { 55 | // Create output directory if it doesn't exist 56 | outputDir := filepath.Dir(outputPath) 57 | if err := os.MkdirAll(outputDir, 0755); err != nil { 58 | return err 59 | } 60 | 61 | // Create output file 62 | file, err := os.Create(outputPath) 63 | if err != nil { 64 | return err 65 | } 66 | defer file.Close() 67 | 68 | // Convert data to XML format 69 | xmlData := r.convertToXML(data) 70 | 71 | // Write XML header 72 | file.WriteString(xml.Header) 73 | 74 | // Marshal data to XML 75 | encoder := xml.NewEncoder(file) 76 | encoder.Indent("", " ") 77 | if err := encoder.Encode(xmlData); err != nil { 78 | return err 79 | } 80 | 81 | return nil 82 | } 83 | 84 | // convertToXML converts the report data to XML format 85 | func (r *XMLReporter) convertToXML(data core.ReportData) XMLReportData { 86 | xmlData := XMLReportData{ 87 | Title: data.Title, 88 | Timestamp: data.Timestamp, 89 | Summary: XMLSummary{ 90 | TotalFiles: data.Summary.TotalFiles, 91 | High: data.Summary.High, 92 | Medium: data.Summary.Medium, 93 | Low: data.Summary.Low, 94 | }, 95 | Results: []XMLFileResult{}, 96 | } 97 | 98 | // Convert results 99 | for filePath, matches := range data.Results { 100 | fileResult := XMLFileResult{ 101 | Path: filePath, 102 | Matches: []XMLMatch{}, 103 | } 104 | 105 | for _, match := range matches { 106 | xmlMatch := XMLMatch{ 107 | ID: match.Signature.ID, 108 | Name: match.Signature.Name, 109 | Severity: match.Signature.Severity, 110 | Description: match.Signature.Description, 111 | LineNumber: match.LineNumber, 112 | MatchedCode: match.MatchedCode, 113 | Confidence: match.Confidence, 114 | } 115 | fileResult.Matches = append(fileResult.Matches, xmlMatch) 116 | } 117 | 118 | xmlData.Results = append(xmlData.Results, fileResult) 119 | } 120 | 121 | return xmlData 122 | } -------------------------------------------------------------------------------- /go/internal/utils/logging.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "sync" 7 | 8 | "github.com/sirupsen/logrus" 9 | ) 10 | 11 | var ( 12 | logger *logrus.Logger 13 | once sync.Once 14 | ) 15 | 16 | // GetLogger returns the singleton logger instance 17 | func GetLogger() *logrus.Logger { 18 | once.Do(func() { 19 | logger = logrus.New() 20 | logger.SetFormatter(&logrus.TextFormatter{ 21 | FullTimestamp: true, 22 | }) 23 | logger.SetOutput(os.Stdout) 24 | logger.SetLevel(logrus.InfoLevel) 25 | }) 26 | return logger 27 | } 28 | 29 | // FileLogger represents a logger that writes to a file 30 | type FileLogger struct { 31 | *logrus.Logger 32 | file *os.File 33 | } 34 | 35 | // NewFileLogger creates a new file logger 36 | func NewFileLogger(filename string) (*FileLogger, error) { 37 | file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 38 | if err != nil { 39 | return nil, err 40 | } 41 | 42 | logger := logrus.New() 43 | logger.SetFormatter(&logrus.JSONFormatter{}) 44 | logger.SetOutput(io.MultiWriter(file, os.Stdout)) 45 | 46 | return &FileLogger{ 47 | Logger: logger, 48 | file: file, 49 | }, nil 50 | } 51 | 52 | // Close closes the log file 53 | func (fl *FileLogger) Close() error { 54 | if fl.file != nil { 55 | return fl.file.Close() 56 | } 57 | return nil 58 | } 59 | 60 | // SetVerbosity sets the logging level based on verbosity 61 | func SetVerbosity(verbose bool) { 62 | if verbose { 63 | GetLogger().SetLevel(logrus.DebugLevel) 64 | } else { 65 | GetLogger().SetLevel(logrus.InfoLevel) 66 | } 67 | } -------------------------------------------------------------------------------- /go/internal/utils/memory.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "container/list" 5 | "runtime" 6 | "sync" 7 | "time" 8 | 9 | "github.com/shirou/gopsutil/v3/mem" 10 | ) 11 | 12 | // MemoryMonitor monitors system memory usage 13 | type MemoryMonitor struct { 14 | maxMemoryGB float64 15 | interval time.Duration 16 | stopChan chan struct{} 17 | } 18 | 19 | // NewMemoryMonitor creates a new memory monitor 20 | func NewMemoryMonitor(maxMemoryGB float64, interval time.Duration) *MemoryMonitor { 21 | return &MemoryMonitor{ 22 | maxMemoryGB: maxMemoryGB, 23 | interval: interval, 24 | stopChan: make(chan struct{}), 25 | } 26 | } 27 | 28 | // Start starts monitoring memory usage 29 | func (mm *MemoryMonitor) Start() { 30 | go func() { 31 | ticker := time.NewTicker(mm.interval) 32 | defer ticker.Stop() 33 | 34 | for { 35 | select { 36 | case <-ticker.C: 37 | v, err := mem.VirtualMemory() 38 | if err != nil { 39 | GetLogger().Errorf("Failed to get memory stats: %v", err) 40 | continue 41 | } 42 | 43 | usedGB := float64(v.Used) / (1024 * 1024 * 1024) 44 | if usedGB > mm.maxMemoryGB { 45 | GetLogger().Warnf("Memory usage (%.2f GB) exceeds limit (%.2f GB), triggering GC", usedGB, mm.maxMemoryGB) 46 | runtime.GC() 47 | } 48 | case <-mm.stopChan: 49 | return 50 | } 51 | } 52 | }() 53 | } 54 | 55 | // Stop stops the memory monitor 56 | func (mm *MemoryMonitor) Stop() { 57 | close(mm.stopChan) 58 | } 59 | 60 | // LRUCache implements a thread-safe LRU cache 61 | type LRUCache struct { 62 | capacity int 63 | cache map[interface{}]*list.Element 64 | ll *list.List 65 | mutex sync.RWMutex 66 | } 67 | 68 | type entry struct { 69 | key interface{} 70 | value interface{} 71 | } 72 | 73 | // NewLRUCache creates a new LRU cache with the specified capacity 74 | func NewLRUCache(capacity int) *LRUCache { 75 | return &LRUCache{ 76 | capacity: capacity, 77 | cache: make(map[interface{}]*list.Element), 78 | ll: list.New(), 79 | } 80 | } 81 | 82 | // Get retrieves a value from the cache 83 | func (c *LRUCache) Get(key interface{}) (interface{}, bool) { 84 | c.mutex.RLock() 85 | defer c.mutex.RUnlock() 86 | 87 | if elem, ok := c.cache[key]; ok { 88 | c.ll.MoveToFront(elem) 89 | return elem.Value.(*entry).value, true 90 | } 91 | return nil, false 92 | } 93 | 94 | // Put adds a value to the cache 95 | func (c *LRUCache) Put(key, value interface{}) { 96 | c.mutex.Lock() 97 | defer c.mutex.Unlock() 98 | 99 | if elem, ok := c.cache[key]; ok { 100 | c.ll.MoveToFront(elem) 101 | elem.Value.(*entry).value = value 102 | return 103 | } 104 | 105 | if c.ll.Len() >= c.capacity { 106 | oldest := c.ll.Back() 107 | if oldest != nil { 108 | c.ll.Remove(oldest) 109 | delete(c.cache, oldest.Value.(*entry).key) 110 | } 111 | } 112 | 113 | elem := c.ll.PushFront(&entry{key, value}) 114 | c.cache[key] = elem 115 | } -------------------------------------------------------------------------------- /go/internal/utils/parallel.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | // Job represents a unit of work 8 | type Job interface { 9 | Execute() error 10 | } 11 | 12 | // WorkerPool manages a pool of workers for parallel processing 13 | type WorkerPool struct { 14 | numWorkers int 15 | jobs chan Job 16 | results chan error 17 | wg sync.WaitGroup 18 | stopChan chan struct{} 19 | } 20 | 21 | // NewWorkerPool creates a new worker pool 22 | func NewWorkerPool(numWorkers int, queueSize int) *WorkerPool { 23 | return &WorkerPool{ 24 | numWorkers: numWorkers, 25 | jobs: make(chan Job, queueSize), 26 | results: make(chan error, queueSize), 27 | stopChan: make(chan struct{}), 28 | } 29 | } 30 | 31 | // Start starts the worker pool 32 | func (wp *WorkerPool) Start() { 33 | for i := 0; i < wp.numWorkers; i++ { 34 | wp.wg.Add(1) 35 | go wp.worker() 36 | } 37 | } 38 | 39 | // worker processes jobs from the job queue 40 | func (wp *WorkerPool) worker() { 41 | defer wp.wg.Done() 42 | 43 | for { 44 | select { 45 | case job := <-wp.jobs: 46 | if job == nil { 47 | return 48 | } 49 | err := job.Execute() 50 | wp.results <- err 51 | case <-wp.stopChan: 52 | return 53 | } 54 | } 55 | } 56 | 57 | // Submit submits a job to the worker pool 58 | func (wp *WorkerPool) Submit(job Job) { 59 | wp.jobs <- job 60 | } 61 | 62 | // Stop stops the worker pool 63 | func (wp *WorkerPool) Stop() { 64 | close(wp.stopChan) 65 | wp.wg.Wait() 66 | close(wp.jobs) 67 | close(wp.results) 68 | } 69 | 70 | // Results returns the results channel 71 | func (wp *WorkerPool) Results() <-chan error { 72 | return wp.results 73 | } -------------------------------------------------------------------------------- /go/internal/utils/security_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestNewSecurityChecker(t *testing.T) { 10 | checker := NewSecurityChecker() 11 | if checker == nil { 12 | t.Error("NewSecurityChecker返回了nil") 13 | } 14 | 15 | if len(checker.sensitivePatterns) == 0 { 16 | t.Error("敏感模式映射为空") 17 | } 18 | 19 | expectedPatterns := []string{"file_access", "network_access", "code_execution", "input_validation", "random_generation", "sensitive_data"} 20 | for _, pattern := range expectedPatterns { 21 | if patterns, ok := checker.sensitivePatterns[pattern]; !ok || len(patterns) == 0 { 22 | t.Errorf("缺少预期的模式类型: %s", pattern) 23 | } 24 | } 25 | } 26 | 27 | func createTestFile(content string) (string, error) { 28 | tmpfile, err := os.CreateTemp("", "test_*.go") 29 | if err != nil { 30 | return "", err 31 | } 32 | 33 | if _, err := tmpfile.Write([]byte(content)); err != nil { 34 | os.Remove(tmpfile.Name()) 35 | return "", err 36 | } 37 | 38 | if err := tmpfile.Close(); err != nil { 39 | os.Remove(tmpfile.Name()) 40 | return "", err 41 | } 42 | 43 | return tmpfile.Name(), nil 44 | } 45 | 46 | func TestCheckMemoryUsage(t *testing.T) { 47 | checker := NewSecurityChecker() 48 | content := `package main 49 | 50 | import "fmt" 51 | 52 | func main() { 53 | var arr []int 54 | for i := 0; i < 1000; i++ { 55 | arr = append(arr, i) 56 | } 57 | fmt.Println(arr) 58 | }` 59 | 60 | filename, err := createTestFile(content) 61 | if err != nil { 62 | t.Fatalf("创建测试文件失败: %v", err) 63 | } 64 | defer os.Remove(filename) 65 | 66 | usage, err := checker.CheckMemoryUsage(filename) 67 | if err != nil { 68 | t.Errorf("检查内存使用失败: %v", err) 69 | } 70 | 71 | if usage == 0 { 72 | t.Error("内存使用量不应为0") 73 | } 74 | } 75 | 76 | func TestCheckExecutionTime(t *testing.T) { 77 | checker := NewSecurityChecker() 78 | content := `package main 79 | 80 | import "time" 81 | 82 | func main() { 83 | time.Sleep(time.Second) 84 | }` 85 | 86 | filename, err := createTestFile(content) 87 | if err != nil { 88 | t.Fatalf("创建测试文件失败: %v", err) 89 | } 90 | defer os.Remove(filename) 91 | 92 | // 测试正常超时 93 | err = checker.CheckExecutionTime(filename, 5*time.Second) 94 | if err != nil { 95 | t.Errorf("执行时间检查失败: %v", err) 96 | } 97 | 98 | // 测试超时情况 99 | err = checker.CheckExecutionTime(filename, 1*time.Millisecond) 100 | if err == nil { 101 | t.Error("预期应该发生超时错误") 102 | } 103 | } 104 | 105 | func TestCheckFileAccess(t *testing.T) { 106 | checker := NewSecurityChecker() 107 | content := `package main 108 | 109 | import ( 110 | "os" 111 | "io/ioutil" 112 | ) 113 | 114 | func main() { 115 | os.Open("test.txt") 116 | ioutil.ReadFile("config.json") 117 | }` 118 | 119 | filename, err := createTestFile(content) 120 | if err != nil { 121 | t.Fatalf("创建测试文件失败: %v", err) 122 | } 123 | defer os.Remove(filename) 124 | 125 | violations, err := checker.CheckFileAccess(filename) 126 | if err != nil { 127 | t.Errorf("文件访问检查失败: %v", err) 128 | } 129 | 130 | if len(violations) == 0 { 131 | t.Error("应该检测到文件访问违规") 132 | } 133 | } 134 | 135 | func TestCheckNetworkAccess(t *testing.T) { 136 | checker := NewSecurityChecker() 137 | content := `package main 138 | 139 | import ( 140 | "net" 141 | "net/http" 142 | ) 143 | 144 | func main() { 145 | net.Dial("tcp", "localhost:8080") 146 | http.Get("http://example.com") 147 | }` 148 | 149 | filename, err := createTestFile(content) 150 | if err != nil { 151 | t.Fatalf("创建测试文件失败: %v", err) 152 | } 153 | defer os.Remove(filename) 154 | 155 | violations, err := checker.CheckNetworkAccess(filename) 156 | if err != nil { 157 | t.Errorf("网络访问检查失败: %v", err) 158 | } 159 | 160 | if len(violations) == 0 { 161 | t.Error("应该检测到网络访问违规") 162 | } 163 | } 164 | 165 | func TestCheckInputValidation(t *testing.T) { 166 | checker := NewSecurityChecker() 167 | content := `package main 168 | 169 | import ( 170 | "fmt" 171 | "bufio" 172 | "os" 173 | ) 174 | 175 | func main() { 176 | var input string 177 | fmt.Scanln(&input) 178 | scanner := bufio.NewScanner(os.Stdin) 179 | }` 180 | 181 | filename, err := createTestFile(content) 182 | if err != nil { 183 | t.Fatalf("创建测试文件失败: %v", err) 184 | } 185 | defer os.Remove(filename) 186 | 187 | issues, err := checker.CheckInputValidation(filename) 188 | if err != nil { 189 | t.Errorf("输入验证检查失败: %v", err) 190 | } 191 | 192 | if len(issues) == 0 { 193 | t.Error("应该检测到未验证的输入") 194 | } 195 | } 196 | 197 | func TestCheckRandomGeneration(t *testing.T) { 198 | checker := NewSecurityChecker() 199 | content := `package main 200 | 201 | import ( 202 | "math/rand" 203 | "crypto/rand" 204 | ) 205 | 206 | func main() { 207 | rand.Int() 208 | rand.Read(make([]byte, 32)) 209 | }` 210 | 211 | filename, err := createTestFile(content) 212 | if err != nil { 213 | t.Fatalf("创建测试文件失败: %v", err) 214 | } 215 | defer os.Remove(filename) 216 | 217 | issues, err := checker.CheckRandomGeneration(filename) 218 | if err != nil { 219 | t.Errorf("随机数生成检查失败: %v", err) 220 | } 221 | 222 | if len(issues) == 0 { 223 | t.Error("应该检测到不安全的随机数生成") 224 | } 225 | } 226 | 227 | func TestCheckSensitiveData(t *testing.T) { 228 | checker := NewSecurityChecker() 229 | content := `package main 230 | 231 | import "fmt" 232 | 233 | func main() { 234 | password := "secret123" 235 | fmt.Printf("Password: %s\n", password) 236 | }` 237 | 238 | filename, err := createTestFile(content) 239 | if err != nil { 240 | t.Fatalf("创建测试文件失败: %v", err) 241 | } 242 | defer os.Remove(filename) 243 | 244 | issues, err := checker.CheckSensitiveData(filename) 245 | if err != nil { 246 | t.Errorf("敏感数据检查失败: %v", err) 247 | } 248 | 249 | if len(issues) == 0 { 250 | t.Error("应该检测到敏感数据泄露风险") 251 | } 252 | } 253 | 254 | func TestCheckSandboxEscape(t *testing.T) { 255 | checker := NewSecurityChecker() 256 | content := `package main 257 | 258 | import ( 259 | "os" 260 | "os/exec" 261 | ) 262 | 263 | func main() { 264 | os.Remove("test.txt") 265 | exec.Command("ls").Run() 266 | }` 267 | 268 | filename, err := createTestFile(content) 269 | if err != nil { 270 | t.Fatalf("创建测试文件失败: %v", err) 271 | } 272 | defer os.Remove(filename) 273 | 274 | violations, err := checker.CheckSandboxEscape(filename) 275 | if err != nil { 276 | t.Errorf("沙箱逃逸检查失败: %v", err) 277 | } 278 | 279 | if len(violations) == 0 { 280 | t.Error("应该检测到沙箱逃逸风险") 281 | } 282 | } 283 | 284 | func TestPerformFullCheck(t *testing.T) { 285 | checker := NewSecurityChecker() 286 | content := `package main 287 | 288 | import ( 289 | "fmt" 290 | "os" 291 | "net/http" 292 | "math/rand" 293 | ) 294 | 295 | func main() { 296 | password := "secret123" 297 | os.Open("test.txt") 298 | http.Get("http://example.com") 299 | rand.Int() 300 | fmt.Printf("Password: %s\n", password) 301 | }` 302 | 303 | filename, err := createTestFile(content) 304 | if err != nil { 305 | t.Fatalf("创建测试文件失败: %v", err) 306 | } 307 | defer os.Remove(filename) 308 | 309 | results, err := checker.PerformFullCheck(filename) 310 | if err != nil { 311 | t.Errorf("完整检查失败: %v", err) 312 | } 313 | 314 | expectedChecks := []string{ 315 | "memory_usage", 316 | "execution_time", 317 | "file_access", 318 | "network_access", 319 | "input_validation", 320 | "random_generation", 321 | "sensitive_data", 322 | "sandbox_escape", 323 | } 324 | 325 | for _, check := range expectedChecks { 326 | if _, ok := results[check]; !ok { 327 | t.Errorf("缺少检查结果: %s", check) 328 | } 329 | } 330 | } -------------------------------------------------------------------------------- /go/internal/web/app.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "fmt" 5 | "html/template" 6 | "io/ioutil" 7 | "net/http" 8 | "os" 9 | "path/filepath" 10 | "time" 11 | 12 | "github.com/gin-gonic/gin" 13 | "github.com/re-movery/re-movery/internal/core" 14 | "github.com/re-movery/re-movery/internal/detectors" 15 | ) 16 | 17 | // App is the web application 18 | type App struct { 19 | scanner *core.Scanner 20 | router *gin.Engine 21 | } 22 | 23 | // NewApp creates a new web application 24 | func NewApp() *App { 25 | app := &App{ 26 | scanner: core.NewScanner(), 27 | router: gin.Default(), 28 | } 29 | 30 | // Register detectors 31 | app.scanner.RegisterDetector(detectors.NewPythonDetector()) 32 | app.scanner.RegisterDetector(detectors.NewJavaScriptDetector()) 33 | 34 | // Setup routes 35 | app.setupRoutes() 36 | 37 | return app 38 | } 39 | 40 | // setupRoutes sets up the routes for the web application 41 | func (a *App) setupRoutes() { 42 | // Serve static files 43 | a.router.Static("/static", "./static") 44 | 45 | // Load templates 46 | a.router.LoadHTMLGlob("templates/*") 47 | 48 | // Routes 49 | a.router.GET("/", a.indexHandler) 50 | a.router.POST("/scan/file", a.scanFileHandler) 51 | a.router.POST("/scan/directory", a.scanDirectoryHandler) 52 | a.router.GET("/api/languages", a.languagesHandler) 53 | a.router.GET("/health", a.healthHandler) 54 | } 55 | 56 | // Run runs the web application 57 | func (a *App) Run(host string, port int) error { 58 | return a.router.Run(fmt.Sprintf("%s:%d", host, port)) 59 | } 60 | 61 | // indexHandler handles the index page 62 | func (a *App) indexHandler(c *gin.Context) { 63 | c.HTML(http.StatusOK, "index.html", gin.H{ 64 | "title": "Re-movery - Security Scanner", 65 | }) 66 | } 67 | 68 | // scanFileHandler handles file scanning 69 | func (a *App) scanFileHandler(c *gin.Context) { 70 | // Get file from form 71 | file, err := c.FormFile("file") 72 | if err != nil { 73 | c.JSON(http.StatusBadRequest, gin.H{ 74 | "error": "No file provided", 75 | }) 76 | return 77 | } 78 | 79 | // Save file to temporary location 80 | tempFile := filepath.Join(os.TempDir(), file.Filename) 81 | if err := c.SaveUploadedFile(file, tempFile); err != nil { 82 | c.JSON(http.StatusInternalServerError, gin.H{ 83 | "error": "Failed to save file", 84 | }) 85 | return 86 | } 87 | defer os.Remove(tempFile) 88 | 89 | // Scan file 90 | results, err := a.scanner.ScanFile(tempFile) 91 | if err != nil { 92 | c.JSON(http.StatusInternalServerError, gin.H{ 93 | "error": fmt.Sprintf("Failed to scan file: %v", err), 94 | }) 95 | return 96 | } 97 | 98 | // Generate summary 99 | summary := core.GenerateSummary(map[string][]core.Match{ 100 | file.Filename: results, 101 | }) 102 | 103 | // Return results 104 | c.JSON(http.StatusOK, gin.H{ 105 | "results": map[string][]core.Match{ 106 | file.Filename: results, 107 | }, 108 | "summary": summary, 109 | }) 110 | } 111 | 112 | // scanDirectoryHandler handles directory scanning 113 | func (a *App) scanDirectoryHandler(c *gin.Context) { 114 | // Get directory path from form 115 | directory := c.PostForm("directory") 116 | if directory == "" { 117 | c.JSON(http.StatusBadRequest, gin.H{ 118 | "error": "No directory provided", 119 | }) 120 | return 121 | } 122 | 123 | // Check if directory exists 124 | if _, err := os.Stat(directory); os.IsNotExist(err) { 125 | c.JSON(http.StatusBadRequest, gin.H{ 126 | "error": "Directory does not exist", 127 | }) 128 | return 129 | } 130 | 131 | // Get exclude patterns 132 | excludePatterns := c.PostFormArray("exclude") 133 | 134 | // Scan directory 135 | results, err := a.scanner.ScanDirectory(directory, excludePatterns) 136 | if err != nil { 137 | c.JSON(http.StatusInternalServerError, gin.H{ 138 | "error": fmt.Sprintf("Failed to scan directory: %v", err), 139 | }) 140 | return 141 | } 142 | 143 | // Generate summary 144 | summary := core.GenerateSummary(results) 145 | 146 | // Return results 147 | c.JSON(http.StatusOK, gin.H{ 148 | "results": results, 149 | "summary": summary, 150 | }) 151 | } 152 | 153 | // languagesHandler handles the supported languages request 154 | func (a *App) languagesHandler(c *gin.Context) { 155 | languages := a.scanner.SupportedLanguages() 156 | c.JSON(http.StatusOK, gin.H{ 157 | "languages": languages, 158 | }) 159 | } 160 | 161 | // healthHandler handles the health check request 162 | func (a *App) healthHandler(c *gin.Context) { 163 | c.JSON(http.StatusOK, gin.H{ 164 | "status": "ok", 165 | "time": time.Now().Format(time.RFC3339), 166 | }) 167 | } -------------------------------------------------------------------------------- /go/internal/web/static/css/style.css: -------------------------------------------------------------------------------- 1 | /* Re-movery 样式文件 */ 2 | 3 | body { 4 | font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; 5 | background-color: #f8f9fa; 6 | } 7 | 8 | .navbar-brand { 9 | font-weight: bold; 10 | color: #0d6efd; 11 | } 12 | 13 | .card { 14 | border-radius: 10px; 15 | box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); 16 | margin-bottom: 20px; 17 | } 18 | 19 | .card-header { 20 | font-weight: bold; 21 | background-color: #f8f9fa; 22 | } 23 | 24 | .severity-high { 25 | color: #dc3545; 26 | } 27 | 28 | .severity-medium { 29 | color: #fd7e14; 30 | } 31 | 32 | .severity-low { 33 | color: #0dcaf0; 34 | } 35 | 36 | .chart-container { 37 | height: 300px; 38 | } 39 | 40 | .nav-pills .nav-link.active { 41 | background-color: #0d6efd; 42 | } 43 | 44 | .nav-pills .nav-link { 45 | color: #495057; 46 | } 47 | 48 | .file-item { 49 | cursor: pointer; 50 | } 51 | 52 | .file-item:hover { 53 | background-color: #f8f9fa; 54 | } 55 | 56 | .code-block { 57 | background-color: #f8f9fa; 58 | border-radius: 5px; 59 | padding: 10px; 60 | font-family: monospace; 61 | white-space: pre-wrap; 62 | margin-top: 10px; 63 | } 64 | 65 | .footer { 66 | margin-top: 50px; 67 | padding: 20px 0; 68 | background-color: #f8f9fa; 69 | text-align: center; 70 | color: #6c757d; 71 | } 72 | 73 | /* 按钮样式 */ 74 | .btn-primary { 75 | background-color: #0d6efd; 76 | border-color: #0d6efd; 77 | } 78 | 79 | .btn-primary:hover { 80 | background-color: #0b5ed7; 81 | border-color: #0a58ca; 82 | } 83 | 84 | /* 表单样式 */ 85 | .form-control:focus { 86 | border-color: #0d6efd; 87 | box-shadow: 0 0 0 0.25rem rgba(13, 110, 253, 0.25); 88 | } 89 | 90 | /* 表格样式 */ 91 | .table { 92 | border-collapse: collapse; 93 | width: 100%; 94 | } 95 | 96 | .table th { 97 | background-color: #f8f9fa; 98 | font-weight: bold; 99 | } 100 | 101 | .table-striped tbody tr:nth-of-type(odd) { 102 | background-color: rgba(0, 0, 0, 0.05); 103 | } 104 | 105 | /* 徽章样式 */ 106 | .badge { 107 | font-weight: normal; 108 | padding: 0.35em 0.65em; 109 | } 110 | 111 | /* 响应式调整 */ 112 | @media (max-width: 768px) { 113 | .chart-container { 114 | height: 200px; 115 | } 116 | } -------------------------------------------------------------------------------- /go/tests/integration/workflow_test.go: -------------------------------------------------------------------------------- 1 | package integration 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | "time" 9 | 10 | "github.com/stretchr/testify/assert" 11 | 12 | "github.com/heyangxu/Re-movery/go/internal/analyzers" 13 | "github.com/heyangxu/Re-movery/go/internal/detectors" 14 | "github.com/heyangxu/Re-movery/go/internal/reporters" 15 | "github.com/heyangxu/Re-movery/go/internal/utils" 16 | ) 17 | 18 | func TestWorkflow(t *testing.T) { 19 | // 创建临时目录 20 | tempDir, err := os.MkdirTemp("", "workflow_test") 21 | if err != nil { 22 | t.Fatalf("创建临时目录失败: %v", err) 23 | } 24 | defer os.RemoveAll(tempDir) 25 | 26 | // 创建测试项目结构 27 | err = createTestProject(tempDir) 28 | if err != nil { 29 | t.Fatalf("创建测试项目失败: %v", err) 30 | } 31 | 32 | // 初始化组件 33 | detector := detectors.NewVulnerabilityDetector() 34 | checker := utils.NewSecurityChecker() 35 | analyzer := analyzers.NewCodeAnalyzer() 36 | reporter := reporters.NewHTMLReporter() 37 | 38 | // 测试完整工作流程 39 | t.Run("TestFullWorkflow", func(t *testing.T) { 40 | // 加载配置 41 | configFile := filepath.Join(tempDir, "config.json") 42 | configData, err := os.ReadFile(configFile) 43 | assert.NoError(t, err) 44 | 45 | var config map[string]interface{} 46 | err = json.Unmarshal(configData, &config) 47 | assert.NoError(t, err) 48 | 49 | // 加载签名 50 | signatureFile := filepath.Join(tempDir, "signatures.json") 51 | err = detector.LoadSignatures(signatureFile) 52 | assert.NoError(t, err) 53 | 54 | // 分析源代码文件 55 | srcDir := filepath.Join(tempDir, "src") 56 | vulnerableFile := filepath.Join(srcDir, "vulnerable.go") 57 | safeFile := filepath.Join(srcDir, "safe.go") 58 | 59 | // 检测漏洞 60 | vulnerableMatches, err := detector.DetectFile(vulnerableFile) 61 | assert.NoError(t, err) 62 | safeMatches, err := detector.DetectFile(safeFile) 63 | assert.NoError(t, err) 64 | 65 | assert.Greater(t, len(vulnerableMatches), 0) 66 | assert.Equal(t, 0, len(safeMatches)) 67 | 68 | // 执行安全检查 69 | vulnerableSecurity := checker.PerformFullCheck(vulnerableFile) 70 | safeSecurity := checker.PerformFullCheck(safeFile) 71 | 72 | assert.True(t, hasIssues(vulnerableSecurity)) 73 | assert.False(t, hasIssues(safeSecurity)) 74 | 75 | // 代码分析 76 | vulnerableAnalysis, err := analyzer.AnalyzeFile(vulnerableFile) 77 | assert.NoError(t, err) 78 | safeAnalysis, err := analyzer.AnalyzeFile(safeFile) 79 | assert.NoError(t, err) 80 | 81 | assert.Greater(t, vulnerableAnalysis.Complexity, safeAnalysis.Complexity) 82 | 83 | // 生成报告 84 | reportData := map[string]interface{}{ 85 | "project_name": config["project_name"], 86 | "scan_time": time.Now().Format("2006-01-02 15:04:05"), 87 | "files_scanned": []string{vulnerableFile, safeFile}, 88 | "vulnerability_results": map[string]interface{}{ 89 | "vulnerable.go": vulnerableMatches, 90 | "safe.go": safeMatches, 91 | }, 92 | "security_results": map[string]interface{}{ 93 | "vulnerable.go": vulnerableSecurity, 94 | "safe.go": safeSecurity, 95 | }, 96 | "analysis_results": map[string]interface{}{ 97 | "vulnerable.go": vulnerableAnalysis, 98 | "safe.go": safeAnalysis, 99 | }, 100 | } 101 | 102 | reportFile := filepath.Join(tempDir, "reports", "report.html") 103 | err = reporter.GenerateReport(reportData, reportFile) 104 | assert.NoError(t, err) 105 | 106 | assert.FileExists(t, reportFile) 107 | fileInfo, err := os.Stat(reportFile) 108 | assert.NoError(t, err) 109 | assert.Greater(t, fileInfo.Size(), int64(0)) 110 | }) 111 | 112 | // 测试并行处理 113 | t.Run("TestParallelProcessing", func(t *testing.T) { 114 | // 创建多个测试文件 115 | srcDir := filepath.Join(tempDir, "src") 116 | testFiles := make([]string, 5) 117 | testCode := `package main 118 | 119 | import "os/exec" 120 | 121 | func main() { 122 | exec.Command("ls").Run() 123 | } 124 | ` 125 | for i := range testFiles { 126 | filePath := filepath.Join(srcDir, "test_%d.go") 127 | err := os.WriteFile(filePath, []byte(testCode), 0644) 128 | assert.NoError(t, err) 129 | testFiles[i] = filePath 130 | } 131 | 132 | // 串行处理时间 133 | startSerial := time.Now() 134 | for _, file := range testFiles { 135 | _, err := detector.DetectFile(file) 136 | assert.NoError(t, err) 137 | checker.PerformFullCheck(file) 138 | _, err = analyzer.AnalyzeFile(file) 139 | assert.NoError(t, err) 140 | } 141 | serialDuration := time.Since(startSerial) 142 | 143 | // 并行处理时间 144 | startParallel := time.Now() 145 | resultChan := make(chan struct{}, len(testFiles)) 146 | for _, file := range testFiles { 147 | go func(f string) { 148 | _, err := detector.DetectFile(f) 149 | assert.NoError(t, err) 150 | checker.PerformFullCheck(f) 151 | _, err = analyzer.AnalyzeFile(f) 152 | assert.NoError(t, err) 153 | resultChan <- struct{}{} 154 | }(file) 155 | } 156 | 157 | // 等待所有并行任务完成 158 | for i := 0; i < len(testFiles); i++ { 159 | <-resultChan 160 | } 161 | parallelDuration := time.Since(startParallel) 162 | 163 | assert.Less(t, parallelDuration, serialDuration) 164 | }) 165 | 166 | // 测试错误处理 167 | t.Run("TestErrorHandling", func(t *testing.T) { 168 | // 测试无效的配置文件 169 | invalidConfig := filepath.Join(tempDir, "invalid_config.json") 170 | err := os.WriteFile(invalidConfig, []byte("invalid json"), 0644) 171 | assert.NoError(t, err) 172 | 173 | _, err = os.ReadFile(invalidConfig) 174 | assert.NoError(t, err) 175 | var config map[string]interface{} 176 | err = json.Unmarshal([]byte("invalid json"), &config) 177 | assert.Error(t, err) 178 | 179 | // 测试不存在的源代码文件 180 | nonExistentFile := filepath.Join(tempDir, "non_existent.go") 181 | _, err = detector.DetectFile(nonExistentFile) 182 | assert.Error(t, err) 183 | 184 | // 测试无效的源代码 185 | invalidCode := filepath.Join(tempDir, "invalid.go") 186 | err = os.WriteFile(invalidCode, []byte("invalid go code"), 0644) 187 | assert.NoError(t, err) 188 | 189 | _, err = analyzer.AnalyzeFile(invalidCode) 190 | assert.Error(t, err) 191 | }) 192 | } 193 | 194 | func createTestProject(dir string) error { 195 | // 创建配置文件 196 | config := map[string]interface{}{ 197 | "project_name": "Test Project", 198 | "scan_paths": []string{"src"}, 199 | "exclude_paths": []string{"tests", "docs"}, 200 | "report_format": "html", 201 | "report_path": "reports", 202 | "severity_threshold": "medium", 203 | "parallel_processing": true, 204 | "max_workers": 4, 205 | } 206 | 207 | configFile := filepath.Join(dir, "config.json") 208 | configData, err := json.MarshalIndent(config, "", " ") 209 | if err != nil { 210 | return err 211 | } 212 | 213 | err = os.WriteFile(configFile, configData, 0644) 214 | if err != nil { 215 | return err 216 | } 217 | 218 | // 创建签名文件 219 | signatures := map[string]interface{}{ 220 | "signatures": []map[string]interface{}{ 221 | { 222 | "id": "CMD001", 223 | "name": "命令注入", 224 | "severity": "high", 225 | "code_patterns": []string{ 226 | `exec\.Command\([^)]*\)`, 227 | `os\.exec\.Command\([^)]*\)`, 228 | }, 229 | }, 230 | { 231 | "id": "SQL001", 232 | "name": "SQL注入", 233 | "severity": "high", 234 | "code_patterns": []string{ 235 | `db\.Query\([^)]*\+[^)]*\)`, 236 | `db\.Exec\([^)]*\+[^)]*\)`, 237 | }, 238 | }, 239 | }, 240 | } 241 | 242 | signatureFile := filepath.Join(dir, "signatures.json") 243 | signatureData, err := json.MarshalIndent(signatures, "", " ") 244 | if err != nil { 245 | return err 246 | } 247 | 248 | err = os.WriteFile(signatureFile, signatureData, 0644) 249 | if err != nil { 250 | return err 251 | } 252 | 253 | // 创建源代码目录 254 | srcDir := filepath.Join(dir, "src") 255 | err = os.MkdirAll(srcDir, 0755) 256 | if err != nil { 257 | return err 258 | } 259 | 260 | // 创建漏洞代码文件 261 | vulnerableCode := `package main 262 | 263 | import ( 264 | "database/sql" 265 | "os/exec" 266 | ) 267 | 268 | func unsafeCommand(cmd string) { 269 | exec.Command("bash", "-c", cmd).Run() 270 | } 271 | 272 | func unsafeQuery(db *sql.DB, id string) { 273 | db.Query("SELECT * FROM users WHERE id = " + id) 274 | } 275 | 276 | func main() { 277 | unsafeCommand("ls -l") 278 | db, _ := sql.Open("mysql", "user:password@/dbname") 279 | unsafeQuery(db, "1 OR 1=1") 280 | } 281 | ` 282 | 283 | vulnerableFile := filepath.Join(srcDir, "vulnerable.go") 284 | err = os.WriteFile(vulnerableFile, []byte(vulnerableCode), 0644) 285 | if err != nil { 286 | return err 287 | } 288 | 289 | // 创建安全代码文件 290 | safeCode := `package main 291 | 292 | import ( 293 | "database/sql" 294 | ) 295 | 296 | func safeQuery(db *sql.DB, id string) { 297 | db.Query("SELECT * FROM users WHERE id = ?", id) 298 | } 299 | 300 | func main() { 301 | db, _ := sql.Open("mysql", "user:password@/dbname") 302 | safeQuery(db, "1") 303 | } 304 | ` 305 | 306 | safeFile := filepath.Join(srcDir, "safe.go") 307 | err = os.WriteFile(safeFile, []byte(safeCode), 0644) 308 | if err != nil { 309 | return err 310 | } 311 | 312 | // 创建报告目录 313 | reportDir := filepath.Join(dir, "reports") 314 | return os.MkdirAll(reportDir, 0755) 315 | } 316 | 317 | func hasIssues(results map[string]utils.SecurityCheckResult) bool { 318 | for _, result := range results { 319 | if result.HasIssues { 320 | return true 321 | } 322 | } 323 | return false 324 | } -------------------------------------------------------------------------------- /go/tests/security/security_test.go: -------------------------------------------------------------------------------- 1 | package security 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | "time" 10 | 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | 14 | "re-movery/internal/detectors" 15 | "re-movery/internal/utils" 16 | ) 17 | 18 | // TestSecurity 包含所有安全相关的测试 19 | type TestSecurity struct { 20 | tempDir string 21 | detector *detectors.VulnerabilityDetector 22 | checker *utils.SecurityChecker 23 | } 24 | 25 | // createTestFile 创建测试文件 26 | func (ts *TestSecurity) createTestFile(content string) (string, error) { 27 | file, err := ioutil.TempFile(ts.tempDir, "test-*.go") 28 | if err != nil { 29 | return "", fmt.Errorf("创建临时文件失败: %v", err) 30 | } 31 | defer file.Close() 32 | 33 | if _, err := file.WriteString(content); err != nil { 34 | return "", fmt.Errorf("写入文件内容失败: %v", err) 35 | } 36 | 37 | return file.Name(), nil 38 | } 39 | 40 | func TestMemoryLimit(t *testing.T) { 41 | ts := &TestSecurity{ 42 | tempDir: t.TempDir(), 43 | detector: detectors.NewVulnerabilityDetector(), 44 | checker: utils.NewSecurityChecker(), 45 | } 46 | 47 | // 创建可能导致内存溢出的测试文件 48 | content := ` 49 | package main 50 | 51 | func memoryIntensive() { 52 | largeSlice := make([]int, 1<<30) // 尝试分配大量内存 53 | for i := range largeSlice { 54 | largeSlice[i] = i 55 | } 56 | } 57 | ` 58 | 59 | filePath, err := ts.createTestFile(content) 60 | require.NoError(t, err) 61 | 62 | // 检查内存使用 63 | memoryUsage, err := ts.checker.CheckMemoryUsage(filePath) 64 | require.NoError(t, err) 65 | assert.Less(t, memoryUsage, uint64(8<<30)) // 8GB限制 66 | } 67 | 68 | func TestExecutionTimeout(t *testing.T) { 69 | ts := &TestSecurity{ 70 | tempDir: t.TempDir(), 71 | detector: detectors.NewVulnerabilityDetector(), 72 | checker: utils.NewSecurityChecker(), 73 | } 74 | 75 | // 创建可能导致无限循环的测试文件 76 | content := ` 77 | package main 78 | 79 | func infiniteLoop() { 80 | for { 81 | // 无限循环 82 | } 83 | } 84 | ` 85 | 86 | filePath, err := ts.createTestFile(content) 87 | require.NoError(t, err) 88 | 89 | // 检查执行时间 90 | err = ts.checker.CheckExecutionTime(filePath, 5*time.Second) 91 | assert.Error(t, err) 92 | assert.Contains(t, err.Error(), "timeout") 93 | } 94 | 95 | func TestFileAccess(t *testing.T) { 96 | ts := &TestSecurity{ 97 | tempDir: t.TempDir(), 98 | detector: detectors.NewVulnerabilityDetector(), 99 | checker: utils.NewSecurityChecker(), 100 | } 101 | 102 | // 创建测试文件 103 | content := ` 104 | package main 105 | 106 | import "os" 107 | 108 | func accessSensitiveFile() { 109 | file, _ := os.Open("/etc/passwd") 110 | defer file.Close() 111 | } 112 | ` 113 | 114 | filePath, err := ts.createTestFile(content) 115 | require.NoError(t, err) 116 | 117 | // 检查文件访问 118 | violations, err := ts.checker.CheckFileAccess(filePath) 119 | require.NoError(t, err) 120 | assert.Greater(t, len(violations), 0) 121 | assert.Contains(t, violations[0], "/etc/passwd") 122 | } 123 | 124 | func TestNetworkAccess(t *testing.T) { 125 | ts := &TestSecurity{ 126 | tempDir: t.TempDir(), 127 | detector: detectors.NewVulnerabilityDetector(), 128 | checker: utils.NewSecurityChecker(), 129 | } 130 | 131 | // 创建测试文件 132 | content := ` 133 | package main 134 | 135 | import "net" 136 | 137 | func connectExternal() { 138 | conn, _ := net.Dial("tcp", "example.com:80") 139 | defer conn.Close() 140 | } 141 | ` 142 | 143 | filePath, err := ts.createTestFile(content) 144 | require.NoError(t, err) 145 | 146 | // 检查网络访问 147 | violations, err := ts.checker.CheckNetworkAccess(filePath) 148 | require.NoError(t, err) 149 | assert.Greater(t, len(violations), 0) 150 | assert.Contains(t, violations[0], "net.Dial") 151 | } 152 | 153 | func TestCodeInjection(t *testing.T) { 154 | ts := &TestSecurity{ 155 | tempDir: t.TempDir(), 156 | detector: detectors.NewVulnerabilityDetector(), 157 | checker: utils.NewSecurityChecker(), 158 | } 159 | 160 | // 创建测试文件 161 | content := ` 162 | package main 163 | 164 | import "os/exec" 165 | 166 | func executeInput(userInput string) { 167 | cmd := exec.Command("bash", "-c", userInput) 168 | cmd.Run() 169 | } 170 | ` 171 | 172 | filePath, err := ts.createTestFile(content) 173 | require.NoError(t, err) 174 | 175 | // 检查代码注入 176 | vulnerabilities, err := ts.detector.DetectFile(filePath) 177 | require.NoError(t, err) 178 | assert.Greater(t, len(vulnerabilities), 0) 179 | assert.Equal(t, "HIGH", vulnerabilities[0].Severity) 180 | } 181 | 182 | func TestInputValidation(t *testing.T) { 183 | ts := &TestSecurity{ 184 | tempDir: t.TempDir(), 185 | detector: detectors.NewVulnerabilityDetector(), 186 | checker: utils.NewSecurityChecker(), 187 | } 188 | 189 | // 创建测试文件 190 | content := ` 191 | package main 192 | 193 | import "fmt" 194 | 195 | func processInput(userInput string) { 196 | fmt.Sprintf("%s", userInput) // 未经验证的输入 197 | } 198 | ` 199 | 200 | filePath, err := ts.createTestFile(content) 201 | require.NoError(t, err) 202 | 203 | // 检查输入验证 204 | issues, err := ts.checker.CheckInputValidation(filePath) 205 | require.NoError(t, err) 206 | assert.Greater(t, len(issues), 0) 207 | } 208 | 209 | func TestSecureRandom(t *testing.T) { 210 | ts := &TestSecurity{ 211 | tempDir: t.TempDir(), 212 | detector: detectors.NewVulnerabilityDetector(), 213 | checker: utils.NewSecurityChecker(), 214 | } 215 | 216 | // 创建测试文件 217 | content := ` 218 | package main 219 | 220 | import "math/rand" 221 | 222 | func generateToken() string { 223 | const chars = "0123456789ABCDEF" 224 | result := make([]byte, 32) 225 | for i := range result { 226 | result[i] = chars[rand.Intn(len(chars))] 227 | } 228 | return string(result) 229 | } 230 | ` 231 | 232 | filePath, err := ts.createTestFile(content) 233 | require.NoError(t, err) 234 | 235 | // 检查随机数生成 236 | issues, err := ts.checker.CheckRandomGeneration(filePath) 237 | require.NoError(t, err) 238 | assert.Greater(t, len(issues), 0) 239 | assert.Contains(t, issues[0], "math/rand") 240 | } 241 | 242 | func TestSensitiveData(t *testing.T) { 243 | ts := &TestSecurity{ 244 | tempDir: t.TempDir(), 245 | detector: detectors.NewVulnerabilityDetector(), 246 | checker: utils.NewSecurityChecker(), 247 | } 248 | 249 | // 创建测试文件 250 | content := ` 251 | package main 252 | 253 | import "fmt" 254 | 255 | func processPassword(password string) { 256 | fmt.Printf("Password is: %s\n", password) // 敏感信息泄露 257 | } 258 | ` 259 | 260 | filePath, err := ts.createTestFile(content) 261 | require.NoError(t, err) 262 | 263 | // 检查敏感数据处理 264 | issues, err := ts.checker.CheckSensitiveData(filePath) 265 | require.NoError(t, err) 266 | assert.Greater(t, len(issues), 0) 267 | assert.Contains(t, issues[0], "password") 268 | } 269 | 270 | func TestSandboxEscape(t *testing.T) { 271 | ts := &TestSecurity{ 272 | tempDir: t.TempDir(), 273 | detector: detectors.NewVulnerabilityDetector(), 274 | checker: utils.NewSecurityChecker(), 275 | } 276 | 277 | // 创建测试文件 278 | content := ` 279 | package main 280 | 281 | import ( 282 | "os" 283 | "os/exec" 284 | ) 285 | 286 | func dangerousOperation() { 287 | os.RemoveAll("/") 288 | exec.Command("chmod", "777", "/etc/passwd").Run() 289 | } 290 | ` 291 | 292 | filePath, err := ts.createTestFile(content) 293 | require.NoError(t, err) 294 | 295 | // 检查沙箱逃逸 296 | violations, err := ts.checker.CheckSandboxEscape(filePath) 297 | require.NoError(t, err) 298 | assert.Greater(t, len(violations), 0) 299 | assert.Contains(t, violations[0], "os.RemoveAll") 300 | } -------------------------------------------------------------------------------- /go/web/templates/report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Re-movery Vulnerability Report 7 | 8 | 9 | 29 | 30 | 31 |
32 |

Re-movery Vulnerability Report

33 | 34 |
35 |
36 |
37 |
38 |
Report Summary
39 |

Generated at: {{.GeneratedAt}}

40 |

Total Files Scanned: {{.TotalFiles}}

41 |

Total Vulnerabilities Found: {{.TotalMatches}}

42 |
43 |
44 |
45 |
46 | 47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | 64 |

Detailed Findings

65 | {{range .Vulnerabilities}} 66 |
67 |
68 |
69 | {{.Signature.Name}} 70 | 71 | {{.Signature.Severity}} 72 | 73 |
74 |
75 |
76 |
ID: {{.Signature.ID}}
77 |

{{.Signature.Description}}

78 | 79 |
80 | File: {{.File}}
81 | Line: {{.Line}}
82 | Confidence: {{printf "%.1f%%" (mul .Confidence 100)}} 83 |
84 | 85 |
86 |
{{.Code}}
87 |
88 | 89 | {{if .Context}} 90 |
91 |
Context:
92 |
93 |
{{range .Context}}{{.}}
 94 | {{end}}
95 |
96 |
97 | {{end}} 98 | 99 | {{if .Signature.References}} 100 |
101 |
References:
102 |
    103 | {{range .Signature.References}} 104 |
  • {{.}}
  • 105 | {{end}} 106 |
107 |
108 | {{end}} 109 |
110 |
111 | {{end}} 112 |
113 | 114 | 129 | 130 | -------------------------------------------------------------------------------- /movery/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Re-Movery - A tool for discovering modified vulnerable code clones 3 | """ 4 | 5 | __version__ = "1.0.0" 6 | __author__ = "heyangxu" 7 | __email__ = "" 8 | 9 | from .config.config import config 10 | from .detectors.vulnerability import VulnerabilityDetector 11 | from .utils.security import SecurityChecker 12 | 13 | __all__ = ["config", "VulnerabilityDetector", "SecurityChecker"] -------------------------------------------------------------------------------- /movery/analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | from .language import LanguageAnalyzer 2 | from .code_analyzer import CodeAnalyzer 3 | 4 | __all__ = ['LanguageAnalyzer', 'CodeAnalyzer'] -------------------------------------------------------------------------------- /movery/analyzers/code_analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code analysis utilities for Movery 3 | """ 4 | from typing import Dict, List, Optional 5 | import os 6 | import ast 7 | import logging 8 | 9 | from movery.utils.logging import get_logger 10 | from movery.config.config import config 11 | from .language import LanguageAnalyzer, PythonAnalyzer, JavaAnalyzer, CppAnalyzer, GoAnalyzer 12 | 13 | logger = get_logger(__name__) 14 | 15 | class CodeAnalyzer: 16 | """Code analyzer that supports multiple programming languages""" 17 | 18 | def __init__(self): 19 | self.analyzers = { 20 | ".py": PythonAnalyzer(), 21 | ".java": JavaAnalyzer(), 22 | ".cpp": CppAnalyzer(), 23 | ".hpp": CppAnalyzer(), 24 | ".cc": CppAnalyzer(), 25 | ".hh": CppAnalyzer(), 26 | ".go": GoAnalyzer() 27 | } 28 | 29 | def analyze_file(self, filename: str) -> Dict: 30 | """Analyze a source code file""" 31 | ext = os.path.splitext(filename)[1].lower() 32 | 33 | if ext not in self.analyzers: 34 | logger.warning(f"Unsupported file type: {ext}") 35 | return { 36 | "complexity": 0, 37 | "functions": [], 38 | "classes": [], 39 | "imports": [], 40 | "variables": [] 41 | } 42 | 43 | analyzer = self.analyzers[ext] 44 | try: 45 | ast_node = analyzer.parse_file(filename) 46 | 47 | return { 48 | "complexity": self._calculate_complexity(ast_node), 49 | "functions": analyzer.get_functions(ast_node), 50 | "classes": analyzer.get_classes(ast_node), 51 | "imports": analyzer.get_imports(ast_node), 52 | "variables": analyzer.get_variables(ast_node) 53 | } 54 | 55 | except Exception as e: 56 | logger.error(f"Error analyzing file {filename}: {str(e)}") 57 | return { 58 | "complexity": 0, 59 | "functions": [], 60 | "classes": [], 61 | "imports": [], 62 | "variables": [] 63 | } 64 | 65 | def _calculate_complexity(self, ast_node: any) -> int: 66 | """Calculate code complexity""" 67 | # 简单实现 - 仅计算函数和类的数量 68 | if isinstance(ast_node, ast.AST): 69 | functions = sum(1 for node in ast.walk(ast_node) 70 | if isinstance(node, ast.FunctionDef)) 71 | classes = sum(1 for node in ast.walk(ast_node) 72 | if isinstance(node, ast.ClassDef)) 73 | return functions + classes 74 | return 0 -------------------------------------------------------------------------------- /movery/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "processing": { 3 | "num_processes": 4, 4 | "max_memory_usage": 8589934592, 5 | "chunk_size": 1048576, 6 | "enable_cache": true, 7 | "cache_dir": ".cache", 8 | "cache_max_size": 1073741824, 9 | "supported_languages": [ 10 | "c", 11 | "cpp", 12 | "java", 13 | "python", 14 | "go", 15 | "javascript" 16 | ] 17 | }, 18 | "detector": { 19 | "min_similarity": 0.8, 20 | "max_edit_distance": 10, 21 | "context_lines": 3, 22 | "max_ast_depth": 50, 23 | "max_cfg_nodes": 1000, 24 | "enable_semantic_match": true, 25 | "enable_syntax_match": true, 26 | "enable_token_match": true, 27 | "report_format": "html", 28 | "report_dir": "reports", 29 | "exclude_patterns": [ 30 | "**/test/*", 31 | "**/tests/*", 32 | "**/vendor/*", 33 | "**/node_modules/*" 34 | ] 35 | }, 36 | "logging": { 37 | "log_level": "INFO", 38 | "log_file": "movery.log", 39 | "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", 40 | "enable_profiling": false, 41 | "profile_output": "profile.stats", 42 | "show_progress": true, 43 | "progress_interval": 1 44 | }, 45 | "security": { 46 | "max_file_size": 104857600, 47 | "allowed_schemes": [ 48 | "file", 49 | "http", 50 | "https" 51 | ], 52 | "enable_sandbox": true, 53 | "sandbox_timeout": 60, 54 | "require_auth": false, 55 | "rate_limit": 100, 56 | "rate_limit_period": 60 57 | } 58 | } -------------------------------------------------------------------------------- /movery/config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration module for Movery 3 | """ 4 | from .config import config 5 | 6 | __all__ = ['config'] -------------------------------------------------------------------------------- /movery/config/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "processing": { 3 | "num_processes": 4, 4 | "max_memory_usage": 8589934592, 5 | "chunk_size": 1048576, 6 | "enable_cache": true, 7 | "cache_dir": ".cache", 8 | "cache_max_size": 1073741824, 9 | "supported_languages": [ 10 | "c", 11 | "cpp", 12 | "java", 13 | "python", 14 | "go", 15 | "javascript" 16 | ] 17 | }, 18 | "detector": { 19 | "min_similarity": 0.8, 20 | "max_edit_distance": 10, 21 | "context_lines": 3, 22 | "max_ast_depth": 50, 23 | "max_cfg_nodes": 1000, 24 | "enable_semantic_match": true, 25 | "enable_syntax_match": true, 26 | "enable_token_match": true, 27 | "report_format": "html", 28 | "report_dir": "reports", 29 | "exclude_patterns": [ 30 | "**/test/*", 31 | "**/tests/*", 32 | "**/vendor/*", 33 | "**/node_modules/*" 34 | ] 35 | }, 36 | "logging": { 37 | "log_level": "INFO", 38 | "log_file": "movery.log", 39 | "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", 40 | "enable_profiling": false, 41 | "profile_output": "profile.stats", 42 | "show_progress": true, 43 | "progress_interval": 1 44 | }, 45 | "security": { 46 | "max_file_size": 104857600, 47 | "allowed_schemes": [ 48 | "file", 49 | "http", 50 | "https" 51 | ], 52 | "enable_sandbox": true, 53 | "sandbox_timeout": 60, 54 | "require_auth": false, 55 | "rate_limit": 100, 56 | "rate_limit_period": 60 57 | } 58 | } -------------------------------------------------------------------------------- /movery/config/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration module for Movery 3 | """ 4 | import json 5 | import os 6 | from typing import Dict, Any, List 7 | from dataclasses import dataclass 8 | 9 | @dataclass 10 | class ProcessingConfig: 11 | num_processes: int 12 | max_memory_usage: int 13 | chunk_size: int 14 | enable_cache: bool 15 | cache_dir: str 16 | cache_max_size: int 17 | supported_languages: List[str] 18 | 19 | @dataclass 20 | class DetectorConfig: 21 | min_similarity: float 22 | max_edit_distance: int 23 | context_lines: int 24 | max_ast_depth: int 25 | max_cfg_nodes: int 26 | enable_semantic_match: bool 27 | enable_syntax_match: bool 28 | enable_token_match: bool 29 | report_format: str 30 | report_dir: str 31 | exclude_patterns: List[str] 32 | 33 | @dataclass 34 | class LoggingConfig: 35 | log_level: str 36 | log_file: str 37 | log_format: str 38 | enable_profiling: bool 39 | profile_output: str 40 | show_progress: bool 41 | progress_interval: int 42 | 43 | @dataclass 44 | class SecurityConfig: 45 | max_file_size: int 46 | allowed_schemes: List[str] 47 | enable_sandbox: bool 48 | sandbox_timeout: int 49 | require_auth: bool 50 | rate_limit: int 51 | rate_limit_period: int 52 | 53 | @dataclass 54 | class Config: 55 | processing: ProcessingConfig 56 | detector: DetectorConfig 57 | logging: LoggingConfig 58 | security: SecurityConfig 59 | 60 | def load_config(config_path: str = None) -> Config: 61 | """ 62 | Load configuration from JSON file 63 | 64 | Args: 65 | config_path: Path to config file. If None, uses default config.json 66 | 67 | Returns: 68 | Configuration object 69 | """ 70 | if config_path is None: 71 | config_path = os.path.join(os.path.dirname(__file__), "config.json") 72 | 73 | with open(config_path, "r", encoding="utf-8") as f: 74 | data = json.load(f) 75 | 76 | return Config( 77 | processing=ProcessingConfig(**data["processing"]), 78 | detector=DetectorConfig(**data["detector"]), 79 | logging=LoggingConfig(**data["logging"]), 80 | security=SecurityConfig(**data["security"]) 81 | ) 82 | 83 | # Load default configuration 84 | config = load_config() -------------------------------------------------------------------------------- /movery/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .vulnerability import VulnerabilityDetector, Signature, VulnerabilityMatch 2 | 3 | __all__ = ['VulnerabilityDetector', 'Signature', 'VulnerabilityMatch'] -------------------------------------------------------------------------------- /movery/go/cmd/movery/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heyangxu/Re-movery/aad70c570ac2c4417d7b4844cc9b4846a148cbd5/movery/go/cmd/movery/main.go -------------------------------------------------------------------------------- /movery/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main entry point for Movery 3 | """ 4 | import os 5 | import sys 6 | import argparse 7 | import logging 8 | import json 9 | from typing import List, Dict, Optional 10 | import time 11 | from pathlib import Path 12 | import concurrent.futures 13 | import traceback 14 | 15 | from movery.config.config import config, MoveryConfig 16 | from movery.utils.logging import setup_logging, get_logger 17 | from movery.utils.memory import memory_monitor 18 | from movery.utils.parallel import worker_pool 19 | from movery.analyzers.language import LanguageAnalyzerFactory 20 | from movery.detectors.vulnerability import detector 21 | from movery.reporters.html import reporter 22 | 23 | logger = get_logger(__name__) 24 | 25 | def parse_args(): 26 | """Parse command line arguments""" 27 | parser = argparse.ArgumentParser( 28 | description="Movery - A tool for discovering modified vulnerable code clones" 29 | ) 30 | 31 | parser.add_argument( 32 | "target", 33 | help="Target program or directory to analyze" 34 | ) 35 | 36 | parser.add_argument( 37 | "-c", "--config", 38 | help="Path to configuration file", 39 | default="config.json" 40 | ) 41 | 42 | parser.add_argument( 43 | "-s", "--signatures", 44 | help="Path to vulnerability signatures file", 45 | default="signatures.json" 46 | ) 47 | 48 | parser.add_argument( 49 | "-o", "--output", 50 | help="Output directory for reports", 51 | default="reports" 52 | ) 53 | 54 | parser.add_argument( 55 | "-j", "--jobs", 56 | help="Number of parallel jobs", 57 | type=int, 58 | default=None 59 | ) 60 | 61 | parser.add_argument( 62 | "-v", "--verbose", 63 | help="Enable verbose output", 64 | action="store_true" 65 | ) 66 | 67 | parser.add_argument( 68 | "--cache", 69 | help="Enable result caching", 70 | action="store_true" 71 | ) 72 | 73 | return parser.parse_args() 74 | 75 | def load_config(config_file: str) -> MoveryConfig: 76 | """Load configuration from file""" 77 | if os.path.exists(config_file): 78 | return MoveryConfig.from_file(config_file) 79 | return MoveryConfig() 80 | 81 | def find_source_files(target: str) -> List[str]: 82 | """Find all source files in target""" 83 | source_files = [] 84 | 85 | for root, _, files in os.walk(target): 86 | for file in files: 87 | file_path = os.path.join(root, file) 88 | 89 | # Skip files larger than limit 90 | if os.path.getsize(file_path) > config.security.max_file_size: 91 | logger.warning(f"Skipping large file: {file_path}") 92 | continue 93 | 94 | # Skip files matching exclude patterns 95 | skip = False 96 | for pattern in config.detector.exclude_patterns: 97 | if Path(file_path).match(pattern): 98 | skip = True 99 | break 100 | if skip: 101 | continue 102 | 103 | # Check if file is supported 104 | if LanguageAnalyzerFactory.get_analyzer(file_path): 105 | source_files.append(file_path) 106 | 107 | return source_files 108 | 109 | def analyze_file(file: str) -> List[Dict]: 110 | """Analyze single file for vulnerabilities""" 111 | try: 112 | matches = detector.detect(file) 113 | return [match.to_dict() for match in matches] 114 | except Exception as e: 115 | logger.error(f"Error analyzing file {file}: {str(e)}") 116 | logger.debug(traceback.format_exc()) 117 | return [] 118 | 119 | def main(): 120 | """Main entry point""" 121 | start_time = time.time() 122 | 123 | # Parse arguments 124 | args = parse_args() 125 | 126 | # Setup logging 127 | log_level = logging.DEBUG if args.verbose else logging.INFO 128 | setup_logging(level=log_level) 129 | 130 | logger.info("Starting Movery...") 131 | 132 | try: 133 | # Load configuration 134 | config = load_config(args.config) 135 | if args.jobs: 136 | config.processing.num_processes = args.jobs 137 | config.processing.enable_cache = args.cache 138 | 139 | # Load vulnerability signatures 140 | detector.load_signatures(args.signatures) 141 | 142 | # Find source files 143 | target_path = os.path.abspath(args.target) 144 | if not os.path.exists(target_path): 145 | raise FileNotFoundError(f"Target not found: {target_path}") 146 | 147 | logger.info(f"Analyzing target: {target_path}") 148 | source_files = find_source_files(target_path) 149 | logger.info(f"Found {len(source_files)} source files") 150 | 151 | # Start worker pool 152 | worker_pool.start() 153 | 154 | # Process files in parallel 155 | all_matches = [] 156 | with concurrent.futures.ThreadPoolExecutor( 157 | max_workers=config.processing.num_processes 158 | ) as executor: 159 | future_to_file = { 160 | executor.submit(analyze_file, file): file 161 | for file in source_files 162 | } 163 | 164 | for future in concurrent.futures.as_completed(future_to_file): 165 | file = future_to_file[future] 166 | try: 167 | matches = future.result() 168 | if matches: 169 | all_matches.extend(matches) 170 | logger.info( 171 | f"Found {len(matches)} vulnerabilities in {file}") 172 | except Exception as e: 173 | logger.error(f"Error processing {file}: {str(e)}") 174 | 175 | # Generate report 176 | if all_matches: 177 | os.makedirs(args.output, exist_ok=True) 178 | report_file = os.path.join( 179 | args.output, 180 | f"report_{int(time.time())}.html" 181 | ) 182 | reporter.generate_report(all_matches, report_file) 183 | logger.info(f"Generated report: {report_file}") 184 | else: 185 | logger.info("No vulnerabilities found") 186 | 187 | elapsed_time = time.time() - start_time 188 | logger.info(f"Analysis completed in {elapsed_time:.2f} seconds") 189 | 190 | except Exception as e: 191 | logger.error(f"Error: {str(e)}") 192 | logger.debug(traceback.format_exc()) 193 | sys.exit(1) 194 | finally: 195 | worker_pool.stop() 196 | 197 | if __name__ == "__main__": 198 | main() -------------------------------------------------------------------------------- /movery/reporters/__init__.py: -------------------------------------------------------------------------------- 1 | from .html import HTMLReporter 2 | 3 | __all__ = ['HTMLReporter'] -------------------------------------------------------------------------------- /movery/templates/report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Movery Vulnerability Report 6 | 93 | 94 | 95 |
96 |

Movery Vulnerability Report

97 | 98 |
99 |

Summary

100 |

Total Vulnerabilities: {{ report.summary.total_vulnerabilities }}

101 |

Total Files: {{ report.summary.total_files }}

102 |

Severity Distribution:

103 |
    104 | {% for severity, count in report.summary.severities.items() %} 105 |
  • {{ severity }}: {{ count }}
  • 106 | {% endfor %} 107 |
108 |
109 | 110 |
111 |
112 | Severity Distribution 114 |
115 |
116 | Vulnerability Types 118 |
119 |
120 | File Distribution 122 |
123 |
124 | Confidence Distribution 126 |
127 |
128 | 129 |

Vulnerabilities

130 | {% for vuln in report.vulnerabilities %} 131 |
132 |
133 |

{{ vuln.name }}

134 | {{ vuln.severity }} 135 |
136 | 137 |

{{ vuln.description }}

138 | 139 | {% if vuln.cwe_id %} 140 |

CWE: {{ vuln.cwe_id }}

141 | {% endif %} 142 | 143 | {% if vuln.cve_id %} 144 |

CVE: {{ vuln.cve_id }}

145 | {% endif %} 146 | 147 |

File: {{ vuln.file }}:{{ vuln.line_start }}-{{ vuln.line_end }}

148 |

Confidence: {{ "%.2f"|format(vuln.confidence) }}

149 | 150 |
{{ vuln.matched_code }}
151 | 152 |
153 |

Context

154 |

Imports: {{ vuln.context.imports|length }}

155 |

Functions: {{ vuln.context.functions|length }}

156 |

Classes: {{ vuln.context.classes|length }}

157 |

Variables: {{ vuln.context.variables|length }}

158 |
159 |
160 | {% endfor %} 161 | 162 | 165 |
166 | 167 | 168 | -------------------------------------------------------------------------------- /movery/tests/security/test_security.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | import tempfile 5 | import shutil 6 | import subprocess 7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 8 | 9 | from movery.detectors.vulnerability import VulnerabilityDetector 10 | from movery.utils.security import SecurityChecker 11 | 12 | class TestSecurity(unittest.TestCase): 13 | def setUp(self): 14 | """设置测试环境""" 15 | self.test_dir = tempfile.mkdtemp() 16 | self.security_checker = SecurityChecker() 17 | self.detector = VulnerabilityDetector() 18 | 19 | def create_test_file(self, content): 20 | """创建测试文件""" 21 | file_path = os.path.join(self.test_dir, 'test_file.py') 22 | with open(file_path, 'w') as f: 23 | f.write(content) 24 | return file_path 25 | 26 | def test_memory_limit(self): 27 | """测试内存限制""" 28 | # 创建一个可能导致内存溢出的文件 29 | test_file = self.create_test_file(''' 30 | def memory_intensive(): 31 | large_list = [i for i in range(10**8)] # 尝试创建大列表 32 | return large_list 33 | ''') 34 | 35 | # 检查内存使用 36 | memory_usage = self.security_checker.check_memory_usage(test_file) 37 | self.assertLess(memory_usage, 8 * 1024 * 1024 * 1024) # 8GB限制 38 | 39 | def test_execution_timeout(self): 40 | """测试执行超时""" 41 | # 创建一个可能导致无限循环的文件 42 | test_file = self.create_test_file(''' 43 | def infinite_loop(): 44 | while True: 45 | pass 46 | ''') 47 | 48 | # 检查执行时间 49 | with self.assertRaises(TimeoutError): 50 | self.security_checker.check_execution_time(test_file, timeout=5) 51 | 52 | def test_file_access(self): 53 | """测试文件访问限制""" 54 | # 创建测试文件 55 | test_file = self.create_test_file(''' 56 | import os 57 | 58 | def access_sensitive_file(): 59 | with open('/etc/passwd', 'r') as f: 60 | return f.read() 61 | ''') 62 | 63 | # 检查文件访问 64 | violations = self.security_checker.check_file_access(test_file) 65 | self.assertTrue(len(violations) > 0) 66 | self.assertIn('/etc/passwd', violations[0]) 67 | 68 | def test_network_access(self): 69 | """测试网络访问限制""" 70 | # 创建测试文件 71 | test_file = self.create_test_file(''' 72 | import socket 73 | 74 | def connect_external(): 75 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 76 | sock.connect(('example.com', 80)) 77 | ''') 78 | 79 | # 检查网络访问 80 | violations = self.security_checker.check_network_access(test_file) 81 | self.assertTrue(len(violations) > 0) 82 | self.assertIn('socket.connect', violations[0]) 83 | 84 | def test_code_injection(self): 85 | """测试代码注入防护""" 86 | # 创建测试文件 87 | test_file = self.create_test_file(''' 88 | def execute_input(user_input): 89 | exec(user_input) # 危险的代码执行 90 | ''') 91 | 92 | # 检查代码注入 93 | vulnerabilities = self.detector.detect_file(test_file) 94 | self.assertTrue(len(vulnerabilities) > 0) 95 | self.assertEqual(vulnerabilities[0].severity, 'HIGH') 96 | 97 | def test_input_validation(self): 98 | """测试输入验证""" 99 | # 创建测试文件 100 | test_file = self.create_test_file(''' 101 | def process_input(user_input): 102 | # 没有验证的输入处理 103 | return eval(user_input) 104 | ''') 105 | 106 | # 检查输入验证 107 | issues = self.security_checker.check_input_validation(test_file) 108 | self.assertTrue(len(issues) > 0) 109 | self.assertIn('eval', str(issues[0])) 110 | 111 | def test_secure_random(self): 112 | """测试安全随机数生成""" 113 | # 创建测试文件 114 | test_file = self.create_test_file(''' 115 | import random 116 | 117 | def generate_token(): 118 | return ''.join(random.choice('0123456789ABCDEF') for i in range(32)) 119 | ''') 120 | 121 | # 检查随机数生成 122 | issues = self.security_checker.check_random_generation(test_file) 123 | self.assertTrue(len(issues) > 0) 124 | self.assertIn('random.choice', str(issues[0])) 125 | 126 | def test_sensitive_data(self): 127 | """测试敏感数据处理""" 128 | # 创建测试文件 129 | test_file = self.create_test_file(''' 130 | def process_password(password): 131 | print(f"Password is: {password}") # 敏感信息泄露 132 | return hash(password) # 不安全的哈希 133 | ''') 134 | 135 | # 检查敏感数据处理 136 | issues = self.security_checker.check_sensitive_data(test_file) 137 | self.assertTrue(len(issues) > 0) 138 | self.assertIn('password', str(issues[0]).lower()) 139 | 140 | def test_sandbox_escape(self): 141 | """测试沙箱逃逸防护""" 142 | # 创建测试文件 143 | test_file = self.create_test_file(''' 144 | import subprocess 145 | import os 146 | 147 | def dangerous_operation(): 148 | os.system('rm -rf /') # 危险的系统命令 149 | subprocess.call(['chmod', '777', '/etc/passwd']) # 危险的权限修改 150 | ''') 151 | 152 | # 检查沙箱逃逸 153 | violations = self.security_checker.check_sandbox_escape(test_file) 154 | self.assertTrue(len(violations) > 0) 155 | self.assertIn('os.system', str(violations[0])) 156 | 157 | def tearDown(self): 158 | """清理测试环境""" 159 | shutil.rmtree(self.test_dir) 160 | 161 | if __name__ == '__main__': 162 | unittest.main() -------------------------------------------------------------------------------- /movery/tests/unit/test_analyzer.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | 6 | from movery.analyzers.code_analyzer import CodeAnalyzer 7 | 8 | class TestCodeAnalyzer(unittest.TestCase): 9 | def setUp(self): 10 | self.analyzer = CodeAnalyzer() 11 | self.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data') 12 | if not os.path.exists(self.test_data_dir): 13 | os.makedirs(self.test_data_dir) 14 | 15 | def test_parse_python(self): 16 | """测试Python代码解析""" 17 | test_file = os.path.join(self.test_data_dir, 'test_python.py') 18 | with open(test_file, 'w') as f: 19 | f.write(''' 20 | def example_function(): 21 | x = 1 22 | y = 2 23 | return x + y 24 | ''') 25 | 26 | ast = self.analyzer.parse_file(test_file) 27 | self.assertIsNotNone(ast) 28 | self.assertEqual(ast.type, 'Module') 29 | 30 | def test_analyze_function(self): 31 | """测试函数分析""" 32 | test_file = os.path.join(self.test_data_dir, 'test_function.py') 33 | with open(test_file, 'w') as f: 34 | f.write(''' 35 | def process_data(data): 36 | result = [] 37 | for item in data: 38 | if item > 0: 39 | result.append(item * 2) 40 | return result 41 | ''') 42 | 43 | functions = self.analyzer.analyze_functions(test_file) 44 | self.assertEqual(len(functions), 1) 45 | self.assertEqual(functions[0].name, 'process_data') 46 | self.assertTrue(functions[0].has_loop) 47 | self.assertTrue(functions[0].has_condition) 48 | 49 | def test_data_flow(self): 50 | """测试数据流分析""" 51 | test_file = os.path.join(self.test_data_dir, 'test_dataflow.py') 52 | with open(test_file, 'w') as f: 53 | f.write(''' 54 | def data_flow_example(user_input): 55 | data = user_input.strip() 56 | processed = data.lower() 57 | return processed 58 | ''') 59 | 60 | flows = self.analyzer.analyze_data_flow(test_file) 61 | self.assertTrue(len(flows) > 0) 62 | self.assertIn('user_input', flows[0].sources) 63 | self.assertIn('processed', flows[0].sinks) 64 | 65 | def test_complexity_analysis(self): 66 | """测试复杂度分析""" 67 | test_file = os.path.join(self.test_data_dir, 'test_complexity.py') 68 | with open(test_file, 'w') as f: 69 | f.write(''' 70 | def complex_function(x, y): 71 | if x > 0: 72 | if y > 0: 73 | return x + y 74 | else: 75 | return x - y 76 | else: 77 | if y < 0: 78 | return -x - y 79 | else: 80 | return -x + y 81 | ''') 82 | 83 | complexity = self.analyzer.analyze_complexity(test_file) 84 | self.assertTrue(complexity > 1) 85 | self.assertEqual(complexity, 4) # 4个条件分支 86 | 87 | def tearDown(self): 88 | """清理测试数据""" 89 | import shutil 90 | if os.path.exists(self.test_data_dir): 91 | shutil.rmtree(self.test_data_dir) 92 | 93 | if __name__ == '__main__': 94 | unittest.main() -------------------------------------------------------------------------------- /movery/tests/unit/test_detector.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import sys 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 5 | 6 | from movery.detectors.vulnerability import VulnerabilityDetector 7 | 8 | class TestVulnerabilityDetector(unittest.TestCase): 9 | def setUp(self): 10 | self.detector = VulnerabilityDetector() 11 | self.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data') 12 | if not os.path.exists(self.test_data_dir): 13 | os.makedirs(self.test_data_dir) 14 | 15 | def test_load_signatures(self): 16 | """测试加载漏洞签名""" 17 | # 创建测试签名文件 18 | test_sig_file = os.path.join(self.test_data_dir, 'test_signatures.json') 19 | with open(test_sig_file, 'w') as f: 20 | f.write(''' 21 | { 22 | "signatures": [ 23 | { 24 | "id": "CWE-78", 25 | "name": "OS Command Injection", 26 | "severity": "HIGH", 27 | "code_patterns": ["os\\.system\\(.*\\)"] 28 | } 29 | ] 30 | } 31 | ''') 32 | 33 | self.detector.load_signatures(test_sig_file) 34 | self.assertEqual(len(self.detector.signatures), 1) 35 | self.assertEqual(self.detector.signatures[0].id, "CWE-78") 36 | 37 | def test_detect_vulnerability(self): 38 | """测试漏洞检测""" 39 | # 创建测试代码文件 40 | test_code_file = os.path.join(self.test_data_dir, 'test_code.py') 41 | with open(test_code_file, 'w') as f: 42 | f.write(''' 43 | import os 44 | def unsafe_function(cmd): 45 | os.system(cmd) # 不安全的系统命令执行 46 | ''') 47 | 48 | matches = self.detector.detect_file(test_code_file) 49 | self.assertTrue(len(matches) > 0) 50 | self.assertEqual(matches[0].signature.id, "CWE-78") 51 | 52 | def test_false_positive(self): 53 | """测试误报情况""" 54 | # 创建安全的测试代码 55 | test_safe_file = os.path.join(self.test_data_dir, 'test_safe.py') 56 | with open(test_safe_file, 'w') as f: 57 | f.write(''' 58 | def safe_function(): 59 | print("This is safe code") 60 | ''') 61 | 62 | matches = self.detector.detect_file(test_safe_file) 63 | self.assertEqual(len(matches), 0) 64 | 65 | def test_similarity_matching(self): 66 | """测试相似度匹配""" 67 | # 创建相似代码测试文件 68 | test_similar_file = os.path.join(self.test_data_dir, 'test_similar.py') 69 | with open(test_similar_file, 'w') as f: 70 | f.write(''' 71 | import subprocess 72 | def similar_unsafe(command): 73 | subprocess.call(command, shell=True) # 类似的不安全模式 74 | ''') 75 | 76 | matches = self.detector.detect_file(test_similar_file) 77 | self.assertTrue(len(matches) > 0) 78 | self.assertTrue(matches[0].confidence > 0.7) 79 | 80 | def tearDown(self): 81 | """清理测试数据""" 82 | import shutil 83 | if os.path.exists(self.test_data_dir): 84 | shutil.rmtree(self.test_data_dir) 85 | 86 | if __name__ == '__main__': 87 | unittest.main() -------------------------------------------------------------------------------- /movery/tests/unit/test_security.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import tempfile 4 | import shutil 5 | import time 6 | import threading 7 | from movery.utils.security import SecurityChecker 8 | 9 | class TestSecurityChecker(unittest.TestCase): 10 | def setUp(self): 11 | """测试前的准备工作""" 12 | self.checker = SecurityChecker() 13 | self.test_dir = tempfile.mkdtemp() 14 | 15 | # 创建测试代码文件 16 | self.test_code = ''' 17 | import os 18 | import sys 19 | import time 20 | import random 21 | import socket 22 | import subprocess 23 | 24 | def unsafe_memory(): 25 | # 大量内存分配 26 | large_list = [i for i in range(10**7)] 27 | return large_list 28 | 29 | def unsafe_execution(): 30 | # 长时间执行 31 | time.sleep(5) 32 | return "Done" 33 | 34 | def unsafe_file_access(): 35 | # 危险的文件操作 36 | with open("/etc/passwd", "r") as f: 37 | data = f.read() 38 | return data 39 | 40 | def unsafe_network(): 41 | # 未经验证的网络连接 42 | sock = socket.socket() 43 | sock.connect(("example.com", 80)) 44 | return sock 45 | 46 | def unsafe_input(): 47 | # 未验证的输入 48 | user_input = input("Enter command: ") 49 | os.system(user_input) 50 | 51 | def unsafe_random(): 52 | # 不安全的随机数生成 53 | return random.randint(1, 100) 54 | 55 | def unsafe_sensitive_data(): 56 | # 敏感数据暴露 57 | password = "super_secret_123" 58 | print(f"Password is: {password}") 59 | 60 | def unsafe_sandbox(): 61 | # 沙箱逃逸尝试 62 | subprocess.call("rm -rf /", shell=True) 63 | ''' 64 | self.test_file = os.path.join(self.test_dir, "test_code.py") 65 | with open(self.test_file, "w") as f: 66 | f.write(self.test_code) 67 | 68 | def tearDown(self): 69 | """测试后的清理工作""" 70 | shutil.rmtree(self.test_dir) 71 | 72 | def test_check_memory_usage(self): 73 | """测试内存使用检查""" 74 | result = self.checker.check_memory_usage(self.test_file) 75 | self.assertTrue(result["has_issues"]) 76 | self.assertIn("large_list", result["details"]) 77 | self.assertGreater(len(result["patterns"]), 0) 78 | 79 | def test_check_execution_time(self): 80 | """测试执行时间检查""" 81 | result = self.checker.check_execution_time(self.test_file) 82 | self.assertTrue(result["has_issues"]) 83 | self.assertIn("time.sleep", result["details"]) 84 | self.assertGreater(len(result["patterns"]), 0) 85 | 86 | def test_check_file_access(self): 87 | """测试文件访问检查""" 88 | result = self.checker.check_file_access(self.test_file) 89 | self.assertTrue(result["has_issues"]) 90 | self.assertIn("/etc/passwd", result["details"]) 91 | self.assertGreater(len(result["patterns"]), 0) 92 | 93 | def test_check_network_access(self): 94 | """测试网络访问检查""" 95 | result = self.checker.check_network_access(self.test_file) 96 | self.assertTrue(result["has_issues"]) 97 | self.assertIn("socket.connect", result["details"]) 98 | self.assertGreater(len(result["patterns"]), 0) 99 | 100 | def test_check_input_validation(self): 101 | """测试输入验证检查""" 102 | result = self.checker.check_input_validation(self.test_file) 103 | self.assertTrue(result["has_issues"]) 104 | self.assertIn("os.system", result["details"]) 105 | self.assertGreater(len(result["patterns"]), 0) 106 | 107 | def test_check_random_generation(self): 108 | """测试随机数生成检查""" 109 | result = self.checker.check_random_generation(self.test_file) 110 | self.assertTrue(result["has_issues"]) 111 | self.assertIn("random.randint", result["details"]) 112 | self.assertGreater(len(result["patterns"]), 0) 113 | 114 | def test_check_sensitive_data(self): 115 | """测试敏感数据检查""" 116 | result = self.checker.check_sensitive_data(self.test_file) 117 | self.assertTrue(result["has_issues"]) 118 | self.assertIn("password", result["details"]) 119 | self.assertGreater(len(result["patterns"]), 0) 120 | 121 | def test_check_sandbox_escape(self): 122 | """测试沙箱逃逸检查""" 123 | result = self.checker.check_sandbox_escape(self.test_file) 124 | self.assertTrue(result["has_issues"]) 125 | self.assertIn("subprocess.call", result["details"]) 126 | self.assertGreater(len(result["patterns"]), 0) 127 | 128 | def test_perform_full_check(self): 129 | """测试完整安全检查""" 130 | results = self.checker.perform_full_check(self.test_file) 131 | 132 | self.assertIsInstance(results, dict) 133 | self.assertGreater(len(results), 0) 134 | 135 | # 验证所有检查项都已执行 136 | expected_checks = [ 137 | "memory_usage", 138 | "execution_time", 139 | "file_access", 140 | "network_access", 141 | "input_validation", 142 | "random_generation", 143 | "sensitive_data", 144 | "sandbox_escape" 145 | ] 146 | 147 | for check in expected_checks: 148 | self.assertIn(check, results) 149 | self.assertTrue(results[check]["has_issues"]) 150 | self.assertGreater(len(results[check]["patterns"]), 0) 151 | 152 | def test_concurrent_checks(self): 153 | """测试并发安全检查""" 154 | # 创建多个测试文件 155 | test_files = [] 156 | for i in range(5): 157 | file_path = os.path.join(self.test_dir, f"test_code_{i}.py") 158 | with open(file_path, "w") as f: 159 | f.write(self.test_code) 160 | test_files.append(file_path) 161 | 162 | # 并发执行检查 163 | results = [] 164 | threads = [] 165 | 166 | def check_file(file_path): 167 | result = self.checker.perform_full_check(file_path) 168 | results.append(result) 169 | 170 | for file_path in test_files: 171 | thread = threading.Thread(target=check_file, args=(file_path,)) 172 | threads.append(thread) 173 | thread.start() 174 | 175 | for thread in threads: 176 | thread.join() 177 | 178 | self.assertEqual(len(results), len(test_files)) 179 | for result in results: 180 | self.assertIsInstance(result, dict) 181 | self.assertGreater(len(result), 0) 182 | 183 | if __name__ == '__main__': 184 | unittest.main() -------------------------------------------------------------------------------- /movery/tests/unit/test_vulnerability.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import json 4 | import tempfile 5 | import shutil 6 | import ast 7 | from movery.detectors.vulnerability import VulnerabilityDetector, Signature, VulnerabilityMatch 8 | 9 | class TestVulnerabilityDetector(unittest.TestCase): 10 | def setUp(self): 11 | """测试前的准备工作""" 12 | self.detector = VulnerabilityDetector() 13 | self.test_dir = tempfile.mkdtemp() 14 | 15 | # 创建测试签名文件 16 | self.signatures = { 17 | "signatures": [ 18 | { 19 | "id": "CMD001", 20 | "name": "命令注入", 21 | "severity": "high", 22 | "code_patterns": [ 23 | "os\\.system\\([^)]*\\)", 24 | "subprocess\\.call\\([^)]*\\)" 25 | ] 26 | }, 27 | { 28 | "id": "SQL001", 29 | "name": "SQL注入", 30 | "severity": "high", 31 | "code_patterns": [ 32 | "execute\\(['\"][^'\"]*%[^'\"]*['\"]\\)", 33 | "executemany\\(['\"][^'\"]*%[^'\"]*['\"]\\)" 34 | ] 35 | } 36 | ] 37 | } 38 | 39 | self.signature_file = os.path.join(self.test_dir, "signatures.json") 40 | with open(self.signature_file, "w") as f: 41 | json.dump(self.signatures, f) 42 | 43 | # 创建测试代码文件 44 | self.test_code = ''' 45 | import os 46 | import subprocess 47 | 48 | def unsafe_command(): 49 | cmd = "ls -l" 50 | os.system(cmd) 51 | subprocess.call(["echo", "hello"]) 52 | 53 | def unsafe_sql(): 54 | query = "SELECT * FROM users WHERE id = %s" 55 | cursor.execute(query % user_id) 56 | ''' 57 | self.test_file = os.path.join(self.test_dir, "test_code.py") 58 | with open(self.test_file, "w") as f: 59 | f.write(self.test_code) 60 | 61 | def tearDown(self): 62 | """测试后的清理工作""" 63 | shutil.rmtree(self.test_dir) 64 | 65 | def test_load_signatures(self): 66 | """测试加载签名文件""" 67 | self.detector.load_signatures(self.signature_file) 68 | 69 | self.assertEqual(len(self.detector.signatures), 2) 70 | self.assertEqual(self.detector.signatures[0].id, "CMD001") 71 | self.assertEqual(self.detector.signatures[0].name, "命令注入") 72 | self.assertEqual(len(self.detector.signatures[0].code_patterns), 2) 73 | 74 | def test_detect_file(self): 75 | """测试文件漏洞检测""" 76 | self.detector.load_signatures(self.signature_file) 77 | matches = self.detector.detect_file(self.test_file) 78 | 79 | self.assertGreater(len(matches), 0) 80 | for match in matches: 81 | self.assertIsInstance(match, VulnerabilityMatch) 82 | self.assertIsInstance(match.signature, Signature) 83 | self.assertGreater(match.confidence, 0.7) 84 | 85 | def test_analyze_ast(self): 86 | """测试AST分析""" 87 | self.detector.load_signatures(self.signature_file) 88 | with open(self.test_file, 'r') as f: 89 | tree = ast.parse(f.read()) 90 | matches = self.detector.analyze_ast(tree) 91 | 92 | self.assertGreater(len(matches), 0) 93 | for match in matches: 94 | self.assertIsInstance(match, VulnerabilityMatch) 95 | self.assertGreater(match.line_number, 0) 96 | 97 | def test_detect_similar_patterns(self): 98 | """测试相似模式检测""" 99 | similar_code = ''' 100 | import os 101 | import subprocess 102 | 103 | def custom_system(cmd): 104 | os.system(cmd) # 直接模式 105 | 106 | def modified_system(command): 107 | os.system(command) # 相似模式 108 | ''' 109 | similar_file = os.path.join(self.test_dir, "similar_code.py") 110 | with open(similar_file, "w") as f: 111 | f.write(similar_code) 112 | 113 | self.detector.load_signatures(self.signature_file) 114 | matches = self.detector.detect_similar_patterns(similar_code) 115 | 116 | self.assertGreater(len(matches), 0) 117 | for match in matches: 118 | self.assertIsInstance(match, VulnerabilityMatch) 119 | self.assertGreater(match.confidence, 0.8) 120 | 121 | def test_calculate_confidence(self): 122 | """测试置信度计算""" 123 | test_cases = [ 124 | ("os.system('ls')", r"os\.system\([^)]*\)", 0.8), 125 | ("subprocess.call(['ls'])", r"subprocess\.call\([^)]*\)", 0.9), 126 | ("import os; os.system('ls')", r"os\.system\([^)]*\)", 1.0) 127 | ] 128 | 129 | for code, pattern, expected in test_cases: 130 | confidence = self.detector._calculate_confidence(code, pattern) 131 | self.assertGreaterEqual(confidence, expected) 132 | self.assertLessEqual(confidence, 1.0) 133 | 134 | def test_calculate_similarity(self): 135 | """测试相似度计算""" 136 | test_cases = [ 137 | ("os.system", "os.system", 1.0), 138 | ("os.system", "subprocess.system", 0.5), 139 | ("execute", "executemany", 0.7) 140 | ] 141 | 142 | for str1, str2, expected in test_cases: 143 | similarity = self.detector._calculate_similarity(str1, str2) 144 | self.assertGreaterEqual(similarity, expected - 0.1) 145 | self.assertLessEqual(similarity, 1.0) 146 | 147 | if __name__ == '__main__': 148 | unittest.main() -------------------------------------------------------------------------------- /movery/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .security import SecurityChecker 2 | from .parallel import WorkerPool, ParallelExecutor 3 | from .logging import get_logger 4 | from .memory import MemoryMonitor 5 | 6 | __all__ = ['SecurityChecker', 'WorkerPool', 'ParallelExecutor', 'get_logger', 'MemoryMonitor'] -------------------------------------------------------------------------------- /movery/utils/logging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging utilities for Movery 3 | """ 4 | import logging 5 | import sys 6 | import os 7 | import time 8 | from typing import Optional 9 | from datetime import datetime 10 | from functools import wraps 11 | import threading 12 | from concurrent.futures import ThreadPoolExecutor 13 | import queue 14 | import json 15 | 16 | from movery.config.config import config 17 | 18 | class AsyncLogHandler(logging.Handler): 19 | """Asynchronous log handler that processes logs in a separate thread""" 20 | 21 | def __init__(self, capacity: int = 1000): 22 | super().__init__() 23 | self.queue = queue.Queue(maxsize=capacity) 24 | self.executor = ThreadPoolExecutor(max_workers=1) 25 | self.running = True 26 | self.worker = threading.Thread(target=self._process_logs) 27 | self.worker.daemon = True 28 | self.worker.start() 29 | 30 | def emit(self, record: logging.LogRecord): 31 | try: 32 | self.queue.put_nowait(record) 33 | except queue.Full: 34 | sys.stderr.write(f"Log queue full, dropping message: {record.getMessage()}\n") 35 | 36 | def _process_logs(self): 37 | while self.running: 38 | try: 39 | record = self.queue.get(timeout=0.1) 40 | self.executor.submit(self._write_log, record) 41 | except queue.Empty: 42 | continue 43 | except Exception as e: 44 | sys.stderr.write(f"Error processing log: {str(e)}\n") 45 | 46 | def _write_log(self, record: logging.LogRecord): 47 | try: 48 | message = self.format(record) 49 | with open(config.logging.log_file, "a", encoding="utf-8") as f: 50 | f.write(message + "\n") 51 | except Exception as e: 52 | sys.stderr.write(f"Error writing log: {str(e)}\n") 53 | 54 | def close(self): 55 | self.running = False 56 | self.worker.join() 57 | self.executor.shutdown() 58 | super().close() 59 | 60 | class ProgressLogger: 61 | """Logger for tracking and displaying progress""" 62 | 63 | def __init__(self, total: int, desc: str = "", interval: float = 0.1): 64 | self.total = total 65 | self.desc = desc 66 | self.interval = interval 67 | self.current = 0 68 | self.start_time = time.time() 69 | self.last_update = 0 70 | 71 | def update(self, n: int = 1): 72 | self.current += n 73 | now = time.time() 74 | if now - self.last_update >= self.interval: 75 | self._display_progress() 76 | self.last_update = now 77 | 78 | def _display_progress(self): 79 | percentage = (self.current / self.total) * 100 80 | elapsed = time.time() - self.start_time 81 | rate = self.current / elapsed if elapsed > 0 else 0 82 | eta = (self.total - self.current) / rate if rate > 0 else 0 83 | 84 | sys.stdout.write(f"\r{self.desc}: [{self.current}/{self.total}] " 85 | f"{percentage:.1f}% Rate: {rate:.1f}/s ETA: {eta:.1f}s") 86 | sys.stdout.flush() 87 | 88 | def finish(self): 89 | self._display_progress() 90 | sys.stdout.write("\n") 91 | sys.stdout.flush() 92 | 93 | class JsonFormatter(logging.Formatter): 94 | """Format logs as JSON for better parsing""" 95 | 96 | def format(self, record: logging.LogRecord) -> str: 97 | data = { 98 | "timestamp": datetime.fromtimestamp(record.created).isoformat(), 99 | "level": record.levelname, 100 | "logger": record.name, 101 | "message": record.getMessage(), 102 | "module": record.module, 103 | "function": record.funcName, 104 | "line": record.lineno 105 | } 106 | 107 | if record.exc_info: 108 | data["exception"] = self.formatException(record.exc_info) 109 | 110 | if hasattr(record, "extra"): 111 | data.update(record.extra) 112 | 113 | return json.dumps(data) 114 | 115 | def setup_logging(log_file: Optional[str] = None, level: str = None): 116 | """Setup logging configuration""" 117 | if log_file: 118 | config.logging.log_file = log_file 119 | if level: 120 | config.logging.log_level = level 121 | 122 | # Create logs directory in current directory for relative paths 123 | if not os.path.isabs(config.logging.log_file): 124 | log_dir = os.path.join(os.getcwd(), "logs") 125 | config.logging.log_file = os.path.join(log_dir, config.logging.log_file) 126 | 127 | # Create log directory if needed 128 | os.makedirs(os.path.dirname(config.logging.log_file), exist_ok=True) 129 | 130 | # Setup root logger 131 | root_logger = logging.getLogger() 132 | root_logger.setLevel(config.logging.log_level) 133 | 134 | # Console handler 135 | console_handler = logging.StreamHandler(sys.stdout) 136 | console_handler.setLevel(logging.INFO) 137 | console_formatter = logging.Formatter(config.logging.log_format) 138 | console_handler.setFormatter(console_formatter) 139 | root_logger.addHandler(console_handler) 140 | 141 | # File handler 142 | file_handler = AsyncLogHandler() 143 | file_handler.setLevel(logging.DEBUG) 144 | file_formatter = JsonFormatter() 145 | file_handler.setFormatter(file_formatter) 146 | root_logger.addHandler(file_handler) 147 | 148 | def log_execution_time(logger: Optional[logging.Logger] = None): 149 | """Decorator to log function execution time""" 150 | def decorator(func): 151 | @wraps(func) 152 | def wrapper(*args, **kwargs): 153 | start_time = time.time() 154 | result = func(*args, **kwargs) 155 | elapsed_time = time.time() - start_time 156 | 157 | log = logger or logging.getLogger(func.__module__) 158 | log.debug(f"{func.__name__} executed in {elapsed_time:.2f} seconds") 159 | 160 | return result 161 | return wrapper 162 | return decorator 163 | 164 | def get_logger(name: str) -> logging.Logger: 165 | """Get a logger instance with the given name""" 166 | return logging.getLogger(name) 167 | 168 | # Initialize logging when module is imported 169 | setup_logging() -------------------------------------------------------------------------------- /movery/utils/memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memory management utilities for Movery 3 | """ 4 | import os 5 | import mmap 6 | import psutil 7 | import gc 8 | import sys 9 | from typing import Optional, Generator, Any 10 | from contextlib import contextmanager 11 | import threading 12 | import weakref 13 | from collections import OrderedDict 14 | import logging 15 | 16 | from movery.config.config import config 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | class MemoryMonitor: 21 | """Monitor memory usage and enforce limits""" 22 | 23 | def __init__(self, max_memory: Optional[int] = None): 24 | self.max_memory = max_memory or config.processing.max_memory_usage 25 | self.process = psutil.Process() 26 | self._lock = threading.Lock() 27 | self._last_check = 0 28 | 29 | def get_memory_usage(self) -> int: 30 | """Get current memory usage in bytes""" 31 | return self.process.memory_info().rss 32 | 33 | def check_memory(self) -> bool: 34 | """Check if memory usage is within limits""" 35 | with self._lock: 36 | current_usage = self.get_memory_usage() 37 | if current_usage > self.max_memory: 38 | logger.warning(f"Memory usage ({current_usage} bytes) exceeds limit " 39 | f"({self.max_memory} bytes)") 40 | return False 41 | return True 42 | 43 | def force_garbage_collection(self): 44 | """Force garbage collection""" 45 | gc.collect() 46 | 47 | @contextmanager 48 | def monitor_operation(self, operation_name: str): 49 | """Context manager to monitor memory during an operation""" 50 | start_usage = self.get_memory_usage() 51 | try: 52 | yield 53 | finally: 54 | end_usage = self.get_memory_usage() 55 | delta = end_usage - start_usage 56 | logger.debug(f"Memory delta for {operation_name}: {delta} bytes") 57 | if not self.check_memory(): 58 | self.force_garbage_collection() 59 | 60 | class LRUCache: 61 | """Least Recently Used Cache with memory limit""" 62 | 63 | def __init__(self, max_size: Optional[int] = None): 64 | self.max_size = max_size or config.processing.cache_max_size 65 | self._cache = OrderedDict() 66 | self._size = 0 67 | self._lock = threading.Lock() 68 | 69 | def get(self, key: str) -> Optional[Any]: 70 | """Get item from cache""" 71 | with self._lock: 72 | if key in self._cache: 73 | value = self._cache.pop(key) 74 | self._cache[key] = value 75 | return value 76 | return None 77 | 78 | def put(self, key: str, value: Any, size: Optional[int] = None): 79 | """Put item in cache""" 80 | if not size: 81 | size = sys.getsizeof(value) 82 | 83 | if size > self.max_size: 84 | logger.warning(f"Item size ({size} bytes) exceeds cache limit " 85 | f"({self.max_size} bytes)") 86 | return 87 | 88 | with self._lock: 89 | if key in self._cache: 90 | self._size -= sys.getsizeof(self._cache[key]) 91 | 92 | while self._size + size > self.max_size and self._cache: 93 | _, removed = self._cache.popitem(last=False) 94 | self._size -= sys.getsizeof(removed) 95 | 96 | self._cache[key] = value 97 | self._size += size 98 | 99 | def clear(self): 100 | """Clear cache""" 101 | with self._lock: 102 | self._cache.clear() 103 | self._size = 0 104 | 105 | class MemoryMappedFile: 106 | """Memory mapped file for efficient large file handling""" 107 | 108 | def __init__(self, filename: str, mode: str = "r"): 109 | self.filename = filename 110 | self.mode = mode 111 | self._file = None 112 | self._mmap = None 113 | 114 | def __enter__(self): 115 | access = mmap.ACCESS_READ 116 | if "w" in self.mode: 117 | access = mmap.ACCESS_WRITE 118 | 119 | self._file = open(self.filename, mode=self.mode + "b") 120 | self._mmap = mmap.mmap(self._file.fileno(), 0, access=access) 121 | return self 122 | 123 | def __exit__(self, exc_type, exc_val, exc_tb): 124 | if self._mmap: 125 | self._mmap.close() 126 | if self._file: 127 | self._file.close() 128 | 129 | def read(self, size: int = -1) -> bytes: 130 | """Read from memory mapped file""" 131 | if size == -1: 132 | return self._mmap[:] 133 | return self._mmap[:size] 134 | 135 | def write(self, data: bytes): 136 | """Write to memory mapped file""" 137 | if "w" not in self.mode: 138 | raise IOError("File not opened for writing") 139 | self._mmap.write(data) 140 | 141 | def seek(self, offset: int): 142 | """Seek to position in file""" 143 | self._mmap.seek(offset) 144 | 145 | def chunk_iterator(data: Any, chunk_size: Optional[int] = None) -> Generator: 146 | """Iterator that yields chunks of data""" 147 | if not chunk_size: 148 | chunk_size = config.processing.chunk_size 149 | 150 | if isinstance(data, (bytes, str)): 151 | for i in range(0, len(data), chunk_size): 152 | yield data[i:i + chunk_size] 153 | elif hasattr(data, "__iter__"): 154 | chunk = [] 155 | for item in data: 156 | chunk.append(item) 157 | if len(chunk) >= chunk_size: 158 | yield chunk 159 | chunk = [] 160 | if chunk: 161 | yield chunk 162 | else: 163 | raise TypeError(f"Unsupported data type: {type(data)}") 164 | 165 | # Global memory monitor instance 166 | memory_monitor = MemoryMonitor() 167 | 168 | # Global cache instance 169 | cache = LRUCache() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2>=3.0.0 2 | plotly>=5.0.0 3 | pandas>=1.3.0 4 | psutil>=5.8.0 5 | tqdm>=4.61.0 6 | colorama>=0.4.4 7 | requests>=2.26.0 8 | beautifulsoup4>=4.9.3 9 | lxml>=4.6.3 10 | pygments>=2.9.0 11 | typing-extensions>=3.10.0 12 | dataclasses>=0.8;python_version<"3.7" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup script for Re-Movery 3 | """ 4 | from setuptools import setup, find_packages 5 | 6 | with open("README.md", "r", encoding="utf-8") as f: 7 | long_description = f.read() 8 | 9 | setup( 10 | name="movery", 11 | version="0.1.0", 12 | author="heyangxu", 13 | author_email="", 14 | description="A tool for discovering modified vulnerable code clones", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/heyangxu/Re-movery", 18 | packages=find_packages(), 19 | classifiers=[ 20 | "Development Status :: 4 - Beta", 21 | "Intended Audience :: Developers", 22 | "Topic :: Security", 23 | "Topic :: Software Development :: Quality Assurance", 24 | "License :: OSI Approved :: MIT License", 25 | "Programming Language :: Python :: 3", 26 | "Programming Language :: Python :: 3.7", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Operating System :: OS Independent", 31 | ], 32 | python_requires=">=3.7", 33 | install_requires=[ 34 | "pytest>=7.3.1", 35 | "coverage>=7.2.7", 36 | "jinja2>=3.0.0", 37 | "plotly>=5.0.0", 38 | "pandas>=1.3.0", 39 | "psutil>=5.8.0", 40 | "tqdm>=4.61.0", 41 | "colorama>=0.4.4", 42 | "requests>=2.26.0", 43 | "beautifulsoup4>=4.9.3", 44 | "lxml>=4.6.3", 45 | "pygments>=2.9.0", 46 | "typing-extensions>=3.10.0", 47 | "dataclasses>=0.8;python_version<'3.7'", 48 | ], 49 | entry_points={ 50 | "console_scripts": [ 51 | "movery=movery.main:main", 52 | ], 53 | }, 54 | package_data={ 55 | "movery": [ 56 | "templates/*.html", 57 | "config/*.json", 58 | ], 59 | }, 60 | include_package_data=True, 61 | zip_safe=False, 62 | ) -------------------------------------------------------------------------------- /signatures.json: -------------------------------------------------------------------------------- 1 | { 2 | "signatures": [ 3 | { 4 | "id": "CWE-78", 5 | "name": "OS Command Injection", 6 | "description": "The software constructs all or part of an OS command using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the intended OS command when it is sent to a downstream component.", 7 | "severity": "CRITICAL", 8 | "cwe_id": "CWE-78", 9 | "affected_languages": ["python", "php", "javascript"], 10 | "code_patterns": [ 11 | "os\\.system\\(.*\\)", 12 | "subprocess\\.call\\(.*shell\\s*=\\s*True.*\\)", 13 | "exec\\(.*\\)", 14 | "eval\\(.*\\)" 15 | ], 16 | "fix_patterns": [ 17 | "shlex.quote(command)", 18 | "subprocess.run([command], shell=False)", 19 | "ast.literal_eval(input)" 20 | ], 21 | "context_patterns": [ 22 | "import\\s+os", 23 | "import\\s+subprocess", 24 | "import\\s+shlex" 25 | ] 26 | }, 27 | { 28 | "id": "CWE-89", 29 | "name": "SQL Injection", 30 | "description": "The software constructs all or part of an SQL command using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the intended SQL command when it is sent to a downstream component.", 31 | "severity": "CRITICAL", 32 | "cwe_id": "CWE-89", 33 | "affected_languages": ["python", "php", "java"], 34 | "code_patterns": [ 35 | "cursor\\.execute\\(.*%.*\\)", 36 | "cursor\\.execute\\(.*\\+.*\\)", 37 | "cursor\\.executemany\\(.*%.*\\)", 38 | "mysql_query\\(.*\\$.*\\)" 39 | ], 40 | "fix_patterns": [ 41 | "cursor.execute(query, params)", 42 | "cursor.executemany(query, params)", 43 | "prepared_statement.setString(1, input)" 44 | ], 45 | "context_patterns": [ 46 | "import\\s+sqlite3", 47 | "import\\s+mysql", 48 | "import\\s+psycopg2" 49 | ] 50 | }, 51 | { 52 | "id": "CWE-22", 53 | "name": "Path Traversal", 54 | "description": "The software uses external input to construct a pathname that is intended to identify a file or directory that is located underneath a restricted parent directory, but the software does not properly neutralize special elements within the pathname that can cause the pathname to resolve to a location that is outside of the restricted directory.", 55 | "severity": "HIGH", 56 | "cwe_id": "CWE-22", 57 | "affected_languages": ["python", "php", "java", "javascript"], 58 | "code_patterns": [ 59 | "open\\(.*\\+.*\\)", 60 | "file_get_contents\\(.*\\$.*\\)", 61 | "new\\s+File\\(.*\\+.*\\)" 62 | ], 63 | "fix_patterns": [ 64 | "os.path.abspath(os.path.join(base_dir, filename))", 65 | "os.path.normpath(path)", 66 | "Path(path).resolve().is_relative_to(base_dir)" 67 | ], 68 | "context_patterns": [ 69 | "import\\s+os", 70 | "from\\s+pathlib\\s+import\\s+Path" 71 | ] 72 | }, 73 | { 74 | "id": "CWE-79", 75 | "name": "Cross-site Scripting (XSS)", 76 | "description": "The software does not neutralize or incorrectly neutralizes user-controllable input before it is placed in output that is used as a web page that is served to other users.", 77 | "severity": "HIGH", 78 | "cwe_id": "CWE-79", 79 | "affected_languages": ["python", "php", "javascript"], 80 | "code_patterns": [ 81 | "innerHTML\\s*=.*", 82 | "document\\.write\\(.*\\)", 83 | "\\$\\(.*\\)\\.html\\(.*\\)" 84 | ], 85 | "fix_patterns": [ 86 | "textContent = content", 87 | "innerText = content", 88 | "createElement('div')" 89 | ], 90 | "context_patterns": [ 91 | "