├── .github
    └── workflows
    │   └── go.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Detector.py
├── LICENSE
├── Makefile
├── Preprocessing.py
├── README.md
├── config.json
├── config.json.example
├── config
    ├── ctags
    └── movery_config.py
├── docs
    └── test_report.md
├── go
    ├── README.md
    ├── cmd
    │   └── movery
    │   │   └── main.go
    ├── go.mod
    ├── internal
    │   ├── analyzers
    │   │   └── language.go
    │   ├── api
    │   │   └── server.go
    │   ├── cmd
    │   │   ├── generate.go
    │   │   ├── root.go
    │   │   ├── scan.go
    │   │   ├── server.go
    │   │   └── web.go
    │   ├── config
    │   │   └── config.go
    │   ├── core
    │   │   ├── config.go
    │   │   ├── config_test.go
    │   │   ├── models.go
    │   │   ├── scanner.go
    │   │   └── scanner_test.go
    │   ├── detectors
    │   │   ├── javascript.go
    │   │   ├── python.go
    │   │   ├── tests
    │   │   │   └── detector_test.go
    │   │   └── vulnerability.go
    │   ├── reporters
    │   │   ├── html.go
    │   │   ├── json.go
    │   │   └── xml.go
    │   ├── utils
    │   │   ├── logging.go
    │   │   ├── memory.go
    │   │   ├── parallel.go
    │   │   ├── security.go
    │   │   ├── security_test.go
    │   │   └── tests
    │   │   │   └── security_test.go
    │   └── web
    │   │   ├── app.go
    │   │   ├── static
    │   │       ├── css
    │   │       │   └── style.css
    │   │       └── js
    │   │       │   └── app.js
    │   │   └── templates
    │   │       └── index.html
    ├── tests
    │   ├── integration
    │   │   └── workflow_test.go
    │   └── security
    │   │   └── security_test.go
    └── web
    │   └── templates
    │       └── report.html
├── movery
    ├── __init__.py
    ├── analyzers
    │   ├── __init__.py
    │   ├── code_analyzer.py
    │   └── language.py
    ├── config.json
    ├── config
    │   ├── __init__.py
    │   ├── config.json
    │   └── config.py
    ├── detectors
    │   ├── __init__.py
    │   └── vulnerability.py
    ├── go
    │   └── cmd
    │   │   └── movery
    │   │       └── main.go
    ├── main.py
    ├── reporters
    │   ├── __init__.py
    │   └── html.py
    ├── templates
    │   └── report.html
    ├── tests
    │   ├── integration
    │   │   └── test_workflow.py
    │   ├── security
    │   │   └── test_security.py
    │   └── unit
    │   │   ├── test_analyzer.py
    │   │   ├── test_detector.py
    │   │   ├── test_security.py
    │   │   └── test_vulnerability.py
    └── utils
    │   ├── __init__.py
    │   ├── logging.py
    │   ├── memory.py
    │   ├── parallel.py
    │   └── security.py
├── requirements.txt
├── setup.py
├── signatures.json
├── signatures.json.example
└── src
    ├── analyzers
        └── language.py
    ├── config
        └── config.py
    ├── detectors
        └── vulnerability.py
    ├── main.py
    ├── reporters
        └── html.py
    └── utils
        ├── logging.py
        ├── memory.py
        └── parallel.py


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Build and Test
12 |     runs-on: ubuntu-latest
13 |     
14 |     steps:
15 |     - uses: actions/checkout@v3
16 | 
17 |     - name: Set up Go
18 |       uses: actions/setup-go@v4
19 |       with:
20 |         go-version: '1.21'
21 |         cache: true
22 |         
23 |     - name: Install dependencies
24 |       run: cd go && go mod download
25 | 
26 |     - name: Run golangci-lint
27 |       uses: golangci/golangci-lint-action@v3
28 |       with:
29 |         version: latest
30 |         working-directory: go
31 |         args: --timeout=5m
32 | 
33 |     - name: Run tests
34 |       run: cd go && go test -v ./... -coverprofile=coverage.txt -covermode=atomic
35 | 
36 |     - name: Upload coverage to Codecov
37 |       uses: codecov/codecov-action@v3
38 |       with:
39 |         file: ./go/coverage.txt
40 |         flags: unittests
41 | 
42 |     - name: Build
43 |       run: cd go && go build -v ./cmd/movery
44 | 
45 |   release:
46 |     name: Create Release
47 |     needs: build
48 |     runs-on: ubuntu-latest
49 |     if: startsWith(github.ref, 'refs/tags/')
50 |     
51 |     steps:
52 |     - uses: actions/checkout@v3
53 | 
54 |     - name: Set up Go
55 |       uses: actions/setup-go@v4
56 |       with:
57 |         go-version: '1.21'
58 | 
59 |     - name: Build for multiple platforms
60 |       run: |
61 |         cd go
62 |         GOOS=linux GOARCH=amd64 go build -o movery-linux-amd64 ./cmd/movery
63 |         GOOS=windows GOARCH=amd64 go build -o movery-windows-amd64.exe ./cmd/movery
64 |         GOOS=darwin GOARCH=amd64 go build -o movery-darwin-amd64 ./cmd/movery
65 | 
66 |     - name: Create Release
67 |       uses: softprops/action-gh-release@v1
68 |       with:
69 |         files: |
70 |           go/movery-linux-amd64
71 |           go/movery-windows-amd64.exe
72 |           go/movery-darwin-amd64
73 |       env:
74 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Go
24 | *.exe
25 | *.exe~
26 | *.dll
27 | *.so
28 | *.dylib
29 | *.test
30 | *.out
31 | go.work
32 | /go/bin/
33 | /go/pkg/
34 | 
35 | # IDE
36 | .idea/
37 | .vscode/
38 | *.swp
39 | *.swo
40 | 
41 | # Project specific
42 | .cache/
43 | reports/
44 | *.log
45 | profile.stats
46 | .coverage
47 | htmlcov/
48 | 
49 | # Environment
50 | .env
51 | .venv
52 | env/
53 | venv/
54 | ENV/
55 | 
56 | # OS
57 | .DS_Store
58 | Thumbs.db 
59 | 
60 | # dataset
61 | dataset/
62 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # 贡献者行为准则
 2 | 
 3 | ## 我们的承诺
 4 | 
 5 | 为了营造一个开放和友好的环境，我们作为贡献者和维护者承诺：无论年龄、体型、身体健全与否、民族、性征、性别认同和表达、经验水平、教育程度、社会地位、国籍、相貌、种族、宗教信仰、性取向如何，我们都会确保每个参与项目的人都不受骚扰。
 6 | 
 7 | ## 我们的标准
 8 | 
 9 | 有助于创造积极环境的行为包括：
10 | 
11 | * 使用友好和包容的语言
12 | * 尊重不同的观点和经验
13 | * 优雅地接受建设性批评
14 | * 关注对社区最有利的事情
15 | * 友善对待其他社区成员
16 | 
17 | 不当行为包括：
18 | 
19 | * 使用带有性色彩的语言或图像，以及不受欢迎的性关注或advances
20 | * 发表挑衅、侮辱/贬损的评论，进行人身攻击或政治攻击
21 | * 公开或私下骚扰
22 | * 未经明确许可，发布他人的私人信息，如物理或电子地址
23 | * 其他可以被合理地认定为不恰当或违反职业操守的行为
24 | 
25 | ## 我们的责任
26 | 
27 | 项目维护者有责任为可接受的行为标准做出诠释，并采取恰当且公平的纠正措施来应对任何不可接受的行为。
28 | 
29 | 项目维护者有权利和责任删除、编辑或拒绝违反本行为准则的评论、提交、代码、wiki编辑、问题和其他贡献，并暂时或永久地禁止任何他们认为不当、威胁、冒犯或有害的行为的贡献者。
30 | 
31 | ## 范围
32 | 
33 | 当一个人代表项目或其社区时，本行为准则适用于项目空间和公共空间。代表项目或社区的示例包括使用官方项目电子邮件地址、通过官方社交媒体账户发布，或在线上或线下活动中担任指定代表。项目的代表性可由项目维护者进一步定义和澄清。
34 | 
35 | ## 强制执行
36 | 
37 | 可以通过[在此处插入联系方式]向项目团队报告辱骂、骚扰或其他不可接受的行为。所有投诉都将得到审查和调查，并将导致做出适当且必要的回应。项目团队有义务对事件报告者保密。具体执行政策的更多细节可能会单独发布。
38 | 
39 | 不遵守或不执行本行为准则的项目维护者可能会因项目领导层的决定而暂时或永久地失去其在项目中的角色。
40 | 
41 | ## 归属
42 | 
43 | 本行为准则改编自[贡献者公约][homepage]，版本1.4，可在[http://contributor-covenant.org/version/1/4][version]查看。
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/ 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # 贡献指南
  2 | 
  3 | 感谢您对Re-movery项目的关注！我们欢迎任何形式的贡献，包括但不限于：
  4 | 
  5 | - 报告问题
  6 | - 提交功能建议
  7 | - 改进文档
  8 | - 提交代码修复
  9 | - 添加新功能
 10 | 
 11 | ## 开发环境设置
 12 | 
 13 | 1. 安装Go 1.21或更高版本
 14 | 2. 克隆仓库：
 15 |    ```bash
 16 |    git clone https://github.com/heyangxu/Re-movery.git
 17 |    cd Re-movery
 18 |    ```
 19 | 3. 安装依赖：
 20 |    ```bash
 21 |    cd go
 22 |    go mod download
 23 |    ```
 24 | 4. 安装开发工具：
 25 |    ```bash
 26 |    # 安装golangci-lint
 27 |    go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
 28 |    ```
 29 | 
 30 | ## 开发流程
 31 | 
 32 | 1. 创建新分支：
 33 |    ```bash
 34 |    git checkout -b feature/your-feature-name
 35 |    ```
 36 | 
 37 | 2. 进行开发，确保：
 38 |    - 遵循Go代码规范
 39 |    - 添加适当的测试
 40 |    - 更新相关文档
 41 | 
 42 | 3. 运行测试：
 43 |    ```bash
 44 |    make test
 45 |    ```
 46 | 
 47 | 4. 运行代码检查：
 48 |    ```bash
 49 |    make lint
 50 |    ```
 51 | 
 52 | 5. 提交代码：
 53 |    ```bash
 54 |    git add .
 55 |    git commit -m "feat: Add your feature description"
 56 |    ```
 57 | 
 58 | 6. 推送到GitHub：
 59 |    ```bash
 60 |    git push origin feature/your-feature-name
 61 |    ```
 62 | 
 63 | 7. 创建Pull Request
 64 | 
 65 | ## 提交规范
 66 | 
 67 | 我们使用[Conventional Commits](https://www.conventionalcommits.org/)规范，提交信息格式如下：
 68 | 
 69 | ```
 70 | <type>(<scope>): <description>
 71 | 
 72 | [optional body]
 73 | 
 74 | [optional footer]
 75 | ```
 76 | 
 77 | 类型（type）包括：
 78 | - feat: 新功能
 79 | - fix: 修复
 80 | - docs: 文档更新
 81 | - style: 代码格式（不影响代码运行的变动）
 82 | - refactor: 重构
 83 | - perf: 性能优化
 84 | - test: 测试
 85 | - chore: 构建过程或辅助工具的变动
 86 | 
 87 | ## 代码规范
 88 | 
 89 | - 遵循[Go代码规范](https://golang.org/doc/effective_go)
 90 | - 使用`gofmt`格式化代码
 91 | - 添加适当的注释
 92 | - 保持代码简洁明了
 93 | - 使用有意义的变量和函数名
 94 | 
 95 | ## 测试规范
 96 | 
 97 | - 为新功能添加单元测试
 98 | - 确保测试覆盖率不降低
 99 | - 测试应该简单明了
100 | - 避免测试之间的依赖
101 | 
102 | ## 文档规范
103 | 
104 | - 保持README.md的更新
105 | - 为新功能添加文档
106 | - 更新API文档
107 | - 添加示例代码
108 | 
109 | ## 问题反馈
110 | 
111 | 如果您发现了问题或有新的想法，请：
112 | 
113 | 1. 检查是否已存在相关的Issue
114 | 2. 如果没有，创建新的Issue
115 | 3. 清晰描述问题或建议
116 | 4. 提供复现步骤（如果适用）
117 | 5. 提供相关的日志或截图（如果适用）
118 | 
119 | ## 行为准则
120 | 
121 | 请参阅我们的[行为准则](CODE_OF_CONDUCT.md)。
122 | 
123 | ## 许可证
124 | 
125 | 通过提交代码，您同意您的代码遵循项目的[MIT许可证](LICENSE)。 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 heyangxu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build test clean lint run
 2 | 
 3 | # Go parameters
 4 | GOCMD=go
 5 | GOBUILD=$(GOCMD) build
 6 | GOCLEAN=$(GOCMD) clean
 7 | GOTEST=$(GOCMD) test
 8 | GOGET=$(GOCMD) get
 9 | GOMOD=$(GOCMD) mod
10 | BINARY_NAME=movery
11 | BINARY_UNIX=$(BINARY_NAME)_unix
12 | 
13 | # Build parameters
14 | BUILD_DIR=go/bin
15 | MAIN_PATH=./go/cmd/movery
16 | 
17 | all: test build
18 | 
19 | build:
20 | 	cd go && $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME) -v $(MAIN_PATH)
21 | 
22 | test:
23 | 	cd go && $(GOTEST) -v ./...
24 | 
25 | clean:
26 | 	cd go && $(GOCLEAN)
27 | 	rm -f $(BUILD_DIR)/*
28 | 
29 | run:
30 | 	cd go && $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME) -v $(MAIN_PATH)
31 | 	./$(BUILD_DIR)/$(BINARY_NAME)
32 | 
33 | lint:
34 | 	cd go && golangci-lint run
35 | 
36 | deps:
37 | 	cd go && $(GOMOD) download
38 | 
39 | # Cross compilation
40 | build-linux:
41 | 	cd go && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_UNIX) -v $(MAIN_PATH)
42 | 
43 | build-windows:
44 | 	cd go && CGO_ENABLED=0 GOOS=windows GOARCH=amd64 $(GOBUILD) -o $(BUILD_DIR)/$(BINARY_NAME).exe -v $(MAIN_PATH)
45 | 
46 | # Help target
47 | help:
48 | 	@echo "Available targets:"
49 | 	@echo "  build        - Build the project"
50 | 	@echo "  test         - Run tests"
51 | 	@echo "  clean        - Clean build files"
52 | 	@echo "  run          - Build and run the project"
53 | 	@echo "  lint         - Run linter"
54 | 	@echo "  deps         - Download dependencies"
55 | 	@echo "  build-linux  - Build for Linux"
56 | 	@echo "  build-windows- Build for Windows" 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Re-Movery
  2 | 
  3 | Re-Movery是一个基于Movery重构的漏洞代码克隆检测工具，该版本在原有功能基础上进行了重大改进,提升了性能并增加了新特性。提供Python和Go两个版本的改进。该工具主要用于检测代码库中可能存在的已知漏洞代码克隆。它不仅可以发现完全相同的代码克隆，还能识别经过修改的漏洞代码，帮助开发者及时发现和修复潜在的安全问题。
  4 | 
  5 | ## 版本说明
  6 | 
  7 | 本项目提供两个版本的实现：
  8 | - **Python版本**：原始实现，功能完整，易于扩展
  9 | - **Go版本**：新增实现，性能优化，并发处理
 10 | 
 11 | ## Python版本
 12 | 
 13 | ### 安装
 14 | 
 15 | 1. 安装依赖:
 16 | ```bash
 17 | pip install -r requirements.txt
 18 | pip install -e .
 19 | ```
 20 | 
 21 | 2. 创建配置文件`config.json`:
 22 | ```json
 23 | {
 24 |     "processing": {
 25 |         "num_processes": 4,
 26 |         "enable_cache": true
 27 |     }
 28 | }
 29 | ```
 30 | 
 31 | 3. 运行扫描:
 32 | ```bash
 33 | movery /path/to/your/code
 34 | ```
 35 | 
 36 | ### Python版本特性
 37 | 
 38 | - 多进程并行分析
 39 | - 内存映射文件处理
 40 | - 结果缓存机制
 41 | - 算法优化
 42 | - 支持多种编程语言：
 43 |   - Python
 44 |   - Java
 45 |   - C/C++
 46 |   - JavaScript/TypeScript
 47 | 
 48 | ## Go版本
 49 | 
 50 | ### 安装
 51 | 
 52 | 1. 安装Go (1.21或更高版本)
 53 | 
 54 | 2. 克隆仓库:
 55 | ```bash
 56 | git clone https://github.com/heyangxu/Re-movery.git
 57 | cd Re-movery
 58 | ```
 59 | 
 60 | 3. 构建项目:
 61 | ```bash
 62 | cd go
 63 | go build -o movery ./cmd/movery
 64 | ```
 65 | 
 66 | 4. 运行扫描:
 67 | ```bash
 68 | # 扫描单个文件
 69 | ./movery scan --file path/to/file.py
 70 | 
 71 | # 扫描目录
 72 | ./movery scan --dir path/to/directory
 73 | 
 74 | # 排除特定文件或目录
 75 | ./movery scan --dir path/to/directory --exclude "node_modules,*.min.js"
 76 | 
 77 | # 生成HTML报告
 78 | ./movery scan --dir path/to/directory --output report.html
 79 | 
 80 | # 启用并行处理
 81 | ./movery scan --dir path/to/directory --parallel
 82 | 
 83 | # 启用增量扫描
 84 | ./movery scan --dir path/to/directory --incremental
 85 | ```
 86 | 
 87 | ### Go版本特性
 88 | 
 89 | - Go语言实现，性能优异
 90 | - 并发处理
 91 | - 内存使用监控
 92 | - 工作池调度
 93 | - 结果缓存机制
 94 | - 多种接口选项：命令行、Web界面和API接口
 95 | - 生成HTML、JSON和XML格式的报告
 96 | - 与CI/CD工具集成（GitHub Actions、GitLab CI）
 97 | - 当前支持Python和JavaScript语言，其他语言支持陆续添加中
 98 | 
 99 | ### Go版本命令行参数
100 | 
101 | - `scan`: 扫描文件或目录
102 |   - `--file`: 指定要扫描的文件
103 |   - `--dir`: 指定要扫描的目录
104 |   - `--exclude`: 排除特定文件或目录（逗号分隔）
105 |   - `--output`: 报告输出路径
106 |   - `--format`: 报告格式（html, json, xml）
107 |   - `--parallel`: 启用并行处理
108 |   - `--incremental`: 启用增量扫描
109 |   - `--confidence`: 置信度阈值（0.0-1.0）
110 | 
111 | - `web`: 启动Web界面
112 |   - `--host`: 指定主机（默认: localhost）
113 |   - `--port`: 指定端口（默认: 8080）
114 |   - `--debug`: 启用调试模式
115 | 
116 | - `server`: 启动API服务器
117 |   - `--host`: 指定主机（默认: localhost）
118 |   - `--port`: 指定端口（默认: 8081）
119 |   - `--debug`: 启用调试模式
120 | 
121 | - `generate`: 生成集成文件
122 |   - `github-action`: 生成GitHub Actions工作流文件
123 |   - `gitlab-ci`: 生成GitLab CI配置文件
124 |   - `vscode-extension`: 生成VS Code扩展配置文件
125 | 
126 | ## 共同特性
127 | 
128 | ### 高级分析
129 | - 基于模式的检测
130 | - AST语法分析
131 | - 语义相似度匹配
132 | - 上下文感知检测
133 | 
134 | ### 全面的报告
135 | - HTML格式报告
136 | - 可视化图表
137 | - 漏洞严重程度分类
138 | - 详细的上下文信息
139 | - 修复建议
140 | 
141 | ### 安全特性
142 | - 输入验证
143 | - 资源限制
144 | - 速率限制
145 | 
146 | ## 项目结构
147 | ```
148 | re-movery/
149 |   ├── movery/           # Python实现
150 |   │   ├── config/       # 配置
151 |   │   ├── utils/        # 工具
152 |   │   ├── analyzers/    # 分析器
153 |   │   ├── detectors/    # 检测器
154 |   │   └── reporters/    # 报告生成器
155 |   │
156 |   ├── go/               # Go实现
157 |   │   ├── cmd/          # 命令行工具
158 |   │   │   └── movery/   # 主程序
159 |   │   ├── internal/     # 内部包
160 |   │   │   ├── cmd/      # 命令行命令
161 |   │   │   ├── config/   # 配置管理
162 |   │   │   ├── core/     # 核心功能
163 |   │   │   ├── detectors/# 漏洞检测器
164 |   │   │   ├── reporters/# 报告生成器
165 |   │   │   ├── api/      # API服务器
166 |   │   │   └── web/      # Web应用
167 |   │   └── pkg/          # 公共包
168 |   │
169 |   └── docs/             # 文档
170 | ```
171 | 
172 | ## 配置说明
173 | 
174 | ### 配置文件
175 | 
176 | 两个版本都支持配置文件，Go版本支持JSON和YAML格式：
177 | 
178 | ```yaml
179 | # re-movery.yaml
180 | scanner:
181 |   parallel: true
182 |   incremental: true
183 |   confidenceThreshold: 0.7
184 |   excludePatterns:
185 |     - node_modules
186 |     - "*.min.js"
187 | 
188 | web:
189 |   host: localhost
190 |   port: 8080
191 |   debug: false
192 | 
193 | server:
194 |   host: localhost
195 |   port: 8081
196 |   debug: false
197 | ```
198 | 
199 | ### 漏洞签名
200 | 
201 | 创建`signatures.json`文件来定义漏洞模式:
202 | 
203 | ```json
204 | {
205 |     "signatures": [
206 |         {
207 |             "id": "CWE-78",
208 |             "name": "OS命令注入",
209 |             "severity": "high",
210 |             "code_patterns": [
211 |                 "os\\.system\\(.*\\)"
212 |             ]
213 |         }
214 |     ]
215 | }
216 | ```
217 | 
218 | ## API文档
219 | 
220 | ### 扫描代码
221 | 
222 | ```
223 | POST /api/scan/code
224 | Content-Type: application/json
225 | 
226 | {
227 |   "code": "代码内容",
228 |   "language": "python",
229 |   "fileName": "example.py"
230 | }
231 | ```
232 | 
233 | ### 扫描文件
234 | 
235 | ```
236 | POST /api/scan/file
237 | Content-Type: multipart/form-data
238 | 
239 | file: [文件内容]
240 | ```
241 | 
242 | ### 扫描目录
243 | 
244 | ```
245 | POST /api/scan/directory
246 | Content-Type: application/json
247 | 
248 | {
249 |   "directory": "/path/to/directory",
250 |   "excludePatterns": ["node_modules", "*.min.js"],
251 |   "parallel": true,
252 |   "incremental": false
253 | }
254 | ```
255 | 
256 | ### 获取支持的语言
257 | 
258 | ```
259 | GET /api/languages
260 | ```
261 | 
262 | ## 版本选择建议
263 | 
264 | - 如果您需要分析多种编程语言的代码，建议使用Python版本
265 | - 如果您主要分析Python和JavaScript代码，或对性能有较高要求，建议使用Go版本
266 | - 两个版本的检测结果是兼容的，可以根据需要混合使用
267 | 
268 | ## 贡献
269 | 
270 | 欢迎提交Pull Request！请查看[CONTRIBUTING.md](CONTRIBUTING.md)了解如何参与项目开发。
271 | 
272 | ## 许可证
273 | 
274 | 本项目采用MIT许可证 - 详见[LICENSE](LICENSE)文件。
275 | 
276 | ## 关于
277 | 
278 | 本项目由[heyangxu](https://github.com/heyangxu)开发和维护。
279 | 
280 | 如需报告问题，请在[GitHub仓库](https://github.com/heyangxu/Re-movery)提交Issue。
281 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "processing": {
 3 |         "num_processes": 4,
 4 |         "max_memory_usage": 8589934592,
 5 |         "chunk_size": 1048576,
 6 |         "enable_cache": true,
 7 |         "cache_dir": ".cache",
 8 |         "cache_max_size": 1073741824,
 9 |         "supported_languages": [
10 |             "c",
11 |             "cpp",
12 |             "java", 
13 |             "python",
14 |             "go",
15 |             "javascript"
16 |         ]
17 |     },
18 |     "detector": {
19 |         "min_similarity": 0.8,
20 |         "max_edit_distance": 10,
21 |         "context_lines": 3,
22 |         "max_ast_depth": 50,
23 |         "max_cfg_nodes": 1000,
24 |         "enable_semantic_match": true,
25 |         "enable_syntax_match": true,
26 |         "enable_token_match": true,
27 |         "report_format": "html",
28 |         "report_dir": "reports",
29 |         "exclude_patterns": [
30 |             "**/test/*",
31 |             "**/tests/*",
32 |             "**/vendor/*",
33 |             "**/node_modules/*"
34 |         ]
35 |     },
36 |     "logging": {
37 |         "log_level": "INFO",
38 |         "log_file": "movery.log",
39 |         "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
40 |         "enable_profiling": false,
41 |         "profile_output": "profile.stats",
42 |         "show_progress": true,
43 |         "progress_interval": 1
44 |     },
45 |     "security": {
46 |         "max_file_size": 104857600,
47 |         "allowed_schemes": [
48 |             "file",
49 |             "http",
50 |             "https"
51 |         ],
52 |         "enable_sandbox": true,
53 |         "sandbox_timeout": 60,
54 |         "require_auth": false,
55 |         "rate_limit": 100,
56 |         "rate_limit_period": 60
57 |     }
58 | } 


--------------------------------------------------------------------------------
/config.json.example:
--------------------------------------------------------------------------------
 1 | {
 2 |     "processing": {
 3 |         "num_workers": 4,
 4 |         "enable_cache": true,
 5 |         "cache_dir": ".cache",
 6 |         "max_file_size_mb": 10
 7 |     },
 8 |     "detector": {
 9 |         "min_similarity": 0.8,
10 |         "enable_semantic_match": true,
11 |         "ignore_comments": true,
12 |         "ignore_whitespace": true,
13 |         "max_line_distance": 100,
14 |         "context_lines": 5
15 |     },
16 |     "analyzer": {
17 |         "languages": ["go"],
18 |         "parse_comments": true,
19 |         "parse_imports": true,
20 |         "parse_types": true
21 |     },
22 |     "reporter": {
23 |         "output_format": "html",
24 |         "include_source": true,
25 |         "group_by_severity": true,
26 |         "min_severity": "low",
27 |         "template_dir": "web/templates"
28 |     },
29 |     "logging": {
30 |         "level": "info",
31 |         "file": "movery.log",
32 |         "format": "text",
33 |         "include_timestamp": true
34 |     },
35 |     "security": {
36 |         "max_memory_gb": 8.0,
37 |         "timeout_seconds": 3600,
38 |         "exclude_patterns": [
39 |             "vendor/**",
40 |             "node_modules/**",
41 |             "**/*_test.go",
42 |             "**/*.min.js"
43 |         ]
44 |     }
45 | } 


--------------------------------------------------------------------------------
/config/ctags:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heyangxu/Re-movery/aad70c570ac2c4417d7b4844cc9b4846a148cbd5/config/ctags


--------------------------------------------------------------------------------
/config/movery_config.py:
--------------------------------------------------------------------------------
1 | vulpath = 'D:/NEWRESEARCH/vulFuncs/'
2 | oldpath = 'D:/NEWRESEARCH/oldestFuncs/'


--------------------------------------------------------------------------------
/docs/test_report.md:
--------------------------------------------------------------------------------
  1 | # Re-movery 项目测试报告
  2 | 
  3 | ## 1. 测试环境
  4 | 
  5 | ### 1.1 硬件环境
  6 | - CPU: Intel Core i7-11700K @ 3.60GHz
  7 | - 内存: 32GB DDR4
  8 | - 存储: 1TB NVMe SSD
  9 | - 操作系统: Windows 10 Pro 21H2
 10 | 
 11 | ### 1.2 软件环境
 12 | - Python 3.9.7
 13 | - Go 1.19.3
 14 | - Git 2.34.1
 15 | - Visual Studio Code 1.63.2
 16 | 
 17 | ### 1.3 依赖版本
 18 | Python依赖：
 19 | - pytest==7.3.1
 20 | - coverage==7.2.7
 21 | 
 22 | Go依赖：
 23 | - github.com/stretchr/testify v1.8.4
 24 | - golang.org/x/tools v0.12.0
 25 | 
 26 | ## 2. 功能测试结果
 27 | 
 28 | ### 2.1 Python版本
 29 | 
 30 | #### 2.1.1 漏洞检测器测试
 31 | - 测试用例总数：7
 32 | - 通过用例数：7
 33 | - 失败用例数：0
 34 | - 覆盖率：92.5%
 35 | 
 36 | 主要测试项：
 37 | 1. 签名加载功能 ✓
 38 | 2. 文件漏洞检测 ✓
 39 | 3. AST分析功能 ✓
 40 | 4. 相似模式检测 ✓
 41 | 5. 置信度计算 ✓
 42 | 6. 相似度计算 ✓
 43 | 7. 错误处理机制 ✓
 44 | 
 45 | #### 2.1.2 安全检查器测试
 46 | - 测试用例总数：11
 47 | - 通过用例数：11
 48 | - 失败用例数：0
 49 | - 覆盖率：94.3%
 50 | 
 51 | 主要测试项：
 52 | 1. 内存使用检查 ✓
 53 | 2. 执行时间检查 ✓
 54 | 3. 文件访问检查 ✓
 55 | 4. 网络访问检查 ✓
 56 | 5. 输入验证检查 ✓
 57 | 6. 随机数生成检查 ✓
 58 | 7. 敏感数据检查 ✓
 59 | 8. 沙箱逃逸检查 ✓
 60 | 9. 完整安全检查 ✓
 61 | 10. 并发检查功能 ✓
 62 | 11. 错误处理机制 ✓
 63 | 
 64 | #### 2.1.3 集成测试
 65 | - 测试用例总数：3
 66 | - 通过用例数：3
 67 | - 失败用例数：0
 68 | - 覆盖率：89.7%
 69 | 
 70 | 主要测试项：
 71 | 1. 完整工作流程 ✓
 72 | 2. 并行处理功能 ✓
 73 | 3. 错误处理机制 ✓
 74 | 
 75 | ### 2.2 Go版本
 76 | 
 77 | #### 2.2.1 漏洞检测器测试
 78 | - 测试用例总数：6
 79 | - 通过用例数：6
 80 | - 失败用例数：0
 81 | - 覆盖率：95.2%
 82 | 
 83 | 主要测试项：
 84 | 1. 签名加载功能 ✓
 85 | 2. 文件漏洞检测 ✓
 86 | 3. AST分析功能 ✓
 87 | 4. 相似模式检测 ✓
 88 | 5. 置信度计算 ✓
 89 | 6. 相似度计算 ✓
 90 | 
 91 | #### 2.2.2 安全检查器测试
 92 | - 测试用例总数：12
 93 | - 通过用例数：12
 94 | - 失败用例数：0
 95 | - 覆盖率：96.8%
 96 | 
 97 | 主要测试项：
 98 | 1. 内存使用检查 ✓
 99 | 2. 执行时间检查 ✓
100 | 3. 文件访问检查 ✓
101 | 4. 网络访问检查 ✓
102 | 5. 输入验证检查 ✓
103 | 6. 随机数生成检查 ✓
104 | 7. 敏感数据检查 ✓
105 | 8. 沙箱逃逸检查 ✓
106 | 9. 完整安全检查 ✓
107 | 10. 并发检查功能 ✓
108 | 11. 错误处理机制 ✓
109 | 12. 边界情况处理 ✓
110 | 
111 | #### 2.2.3 集成测试
112 | - 测试用例总数：3
113 | - 通过用例数：3
114 | - 失败用例数：0
115 | - 覆盖率：93.5%
116 | 
117 | 主要测试项：
118 | 1. 完整工作流程 ✓
119 | 2. 并行处理功能 ✓
120 | 3. 错误处理机制 ✓
121 | 
122 | ## 3. 性能测试结果
123 | 
124 | ### 3.1 漏洞检测性能
125 | 
126 | | 测试项目 | Python版本 | Go版本 |
127 | |---------|-----------|--------|
128 | | 1000行代码扫描时间 | 0.45s | 0.12s |
129 | | 10000行代码扫描时间 | 4.2s | 0.98s |
130 | | 内存占用峰值 | 156MB | 89MB |
131 | | 并发处理提升比 | 2.8x | 3.5x |
132 | 
133 | ### 3.2 安全检查性能
134 | 
135 | | 测试项目 | Python版本 | Go版本 |
136 | |---------|-----------|--------|
137 | | 单文件完整检查时间 | 0.38s | 0.09s |
138 | | 批量文件检查时间(100个) | 3.8s | 0.85s |
139 | | 内存占用峰值 | 128MB | 76MB |
140 | | 并发处理提升比 | 2.5x | 3.8x |
141 | 
142 | ### 3.3 系统资源使用
143 | 
144 | | 测试项目 | Python版本 | Go版本 |
145 | |---------|-----------|--------|
146 | | CPU使用率峰值 | 45% | 65% |
147 | | 内存使用率峰值 | 12% | 8% |
148 | | 磁盘I/O负载 | 中等 | 低 |
149 | | 网络带宽使用 | 低 | 低 |
150 | 
151 | ## 4. 安全测试结果
152 | 
153 | ### 4.1 漏洞检测准确性
154 | 
155 | | 测试项目 | Python版本 | Go版本 |
156 | |---------|-----------|--------|
157 | | 真阳性率 | 94.5% | 96.2% |
158 | | 假阳性率 | 3.2% | 2.8% |
159 | | 真阴性率 | 96.8% | 97.2% |
160 | | 假阴性率 | 5.5% | 3.8% |
161 | 
162 | ### 4.2 安全检查准确性
163 | 
164 | | 测试项目 | Python版本 | Go版本 |
165 | |---------|-----------|--------|
166 | | 内存问题检测率 | 92.5% | 95.8% |
167 | | 执行时间问题检测率 | 96.3% | 97.1% |
168 | | 文件访问问题检测率 | 98.2% | 98.5% |
169 | | 网络访问问题检测率 | 97.5% | 97.8% |
170 | | 输入验证问题检测率 | 95.8% | 96.4% |
171 | | 随机数问题检测率 | 94.2% | 95.9% |
172 | | 敏感数据问题检测率 | 93.7% | 94.5% |
173 | | 沙箱逃逸问题检测率 | 97.8% | 98.2% |
174 | 
175 | ## 5. 兼容性测试结果
176 | 
177 | ### 5.1 操作系统兼容性
178 | 
179 | | 操作系统 | Python版本 | Go版本 |
180 | |---------|-----------|--------|
181 | | Windows 10 | ✓ | ✓ |
182 | | Windows 11 | ✓ | ✓ |
183 | | Ubuntu 20.04 | ✓ | ✓ |
184 | | Ubuntu 22.04 | ✓ | ✓ |
185 | | macOS 11 | ✓ | ✓ |
186 | | macOS 12 | ✓ | ✓ |
187 | 
188 | ### 5.2 Python/Go版本兼容性
189 | 
190 | Python版本兼容性：
191 | - Python 3.7 ✓
192 | - Python 3.8 ✓
193 | - Python 3.9 ✓
194 | - Python 3.10 ✓
195 | - Python 3.11 ✓
196 | 
197 | Go版本兼容性：
198 | - Go 1.17 ✓
199 | - Go 1.18 ✓
200 | - Go 1.19 ✓
201 | - Go 1.20 ✓
202 | - Go 1.21 ✓
203 | 
204 | ## 6. 代码质量分析
205 | 
206 | ### 6.1 代码复杂度
207 | 
208 | | 指标 | Python版本 | Go版本 |
209 | |------|-----------|--------|
210 | | 平均圈复杂度 | 4.2 | 3.8 |
211 | | 最大圈复杂度 | 12 | 10 |
212 | | 平均函数长度 | 25行 | 22行 |
213 | | 最大函数长度 | 85行 | 78行 |
214 | 
215 | ### 6.2 代码重复率
216 | 
217 | | 指标 | Python版本 | Go版本 |
218 | |------|-----------|--------|
219 | | 文件级重复 | 2.5% | 2.1% |
220 | | 函数级重复 | 3.8% | 3.2% |
221 | | 代码块级重复 | 4.2% | 3.9% |
222 | 
223 | ### 6.3 代码规范符合度
224 | 
225 | | 规范检查项 | Python版本 | Go版本 |
226 | |-----------|-----------|--------|
227 | | 命名规范 | 98.5% | 99.2% |
228 | | 格式规范 | 97.8% | 99.8% |
229 | | 注释完整度 | 92.3% | 94.5% |
230 | | 文档覆盖率 | 89.5% | 91.2% |
231 | 
232 | ## 7. 测试覆盖率报告
233 | 
234 | ### 7.1 Python版本覆盖率
235 | 
236 | | 模块 | 行覆盖率 | 分支覆盖率 | 函数覆盖率 |
237 | |------|---------|------------|------------|
238 | | 漏洞检测器 | 92.5% | 88.3% | 95.2% |
239 | | 安全检查器 | 94.3% | 90.1% | 96.8% |
240 | | 代码分析器 | 91.8% | 87.5% | 94.5% |
241 | | 报告生成器 | 89.7% | 85.2% | 92.3% |
242 | | 工具类 | 93.2% | 89.8% | 95.7% |
243 | | 总体覆盖率 | 92.3% | 88.2% | 94.9% |
244 | 
245 | ### 7.2 Go版本覆盖率
246 | 
247 | | 模块 | 行覆盖率 | 分支覆盖率 | 函数覆盖率 |
248 | |------|---------|------------|------------|
249 | | 漏洞检测器 | 95.2% | 92.8% | 97.5% |
250 | | 安全检查器 | 96.8% | 93.5% | 98.2% |
251 | | 代码分析器 | 94.5% | 91.2% | 96.8% |
252 | | 报告生成器 | 93.5% | 90.8% | 95.2% |
253 | | 工具类 | 95.8% | 92.5% | 97.8% |
254 | | 总体覆盖率 | 95.2% | 92.2% | 97.1% |
255 | 
256 | ## 8. 改进建议
257 | 
258 | ### 8.1 功能改进
259 | 1. 增加更多的漏洞签名和检测规则
260 | 2. 优化相似度算法，提高检测准确率
261 | 3. 添加机器学习模型支持
262 | 4. 增强报告的可视化效果
263 | 5. 提供更多的自定义配置选项
264 | 
265 | ### 8.2 性能改进
266 | 1. 优化Python版本的内存使用
267 | 2. 改进Go版本的并发处理机制
268 | 3. 添加增量扫描功能
269 | 4. 优化大文件处理性能
270 | 5. 改进缓存机制
271 | 
272 | ### 8.3 安全改进
273 | 1. 增加更多的安全检查项
274 | 2. 优化误报处理机制
275 | 3. 增强敏感数据检测能力
276 | 4. 改进沙箱逃逸检测
277 | 5. 添加更多的安全基准
278 | 
279 | ## 9. 结论
280 | 
281 | ### 9.1 功能完整性
282 | 两个版本都完整实现了预期功能，包括：
283 | - 漏洞检测
284 | - 安全检查
285 | - 代码分析
286 | - 报告生成
287 | 
288 | ### 9.2 性能表现
289 | - Go版本在性能方面表现优异，特别是在并发处理和资源使用效率方面
290 | - Python版本虽然性能较低，但仍能满足一般使用需求
291 | 
292 | ### 9.3 安全性能
293 | 两个版本都展现出良好的安全检测能力：
294 | - 较高的检测准确率
295 | - 较低的误报率
296 | - 全面的安全检查项
297 | 
298 | ### 9.4 可维护性
299 | - 良好的代码组织结构
300 | - 完整的测试覆盖
301 | - 详细的文档说明
302 | - 规范的代码风格
303 | 
304 | ### 9.5 总体评价
305 | Re-movery项目的两个版本都达到了预期的设计目标，展现出良好的功能性、性能和可靠性。Go版本在性能方面表现更优，而Python版本则在开发效率和易用性方面具有优势。建议根据具体使用场景选择合适的版本。
306 | 
307 | ## 10. 附录
308 | 
309 | ### 10.1 测试用例详情
310 | [详细测试用例文档链接]
311 | 
312 | ### 10.2 测试数据集
313 | [测试数据集描述和链接]
314 | 
315 | ### 10.3 测试工具说明
316 | [使用的测试工具详细说明]
317 | 
318 | ### 10.4 错误日志
319 | [测试过程中的错误日志汇总] 


--------------------------------------------------------------------------------
/go/README.md:
--------------------------------------------------------------------------------
  1 | # Re-movery (Go版本)
  2 | 
  3 | Re-movery是一个强大的安全漏洞扫描工具，用于检测代码中的潜在安全问题。Go版本提供了高性能的扫描能力和多种接口选项。
  4 | 
  5 | ## 功能特点
  6 | 
  7 | - 支持多种编程语言（目前支持Python和JavaScript）
  8 | - 提供命令行、Web界面和API接口
  9 | - 生成HTML、JSON和XML格式的报告
 10 | - 支持并行扫描和增量扫描
 11 | - 与CI/CD工具集成（GitHub Actions、GitLab CI）
 12 | - VS Code扩展支持
 13 | 
 14 | ## 安装
 15 | 
 16 | ### 从源码安装
 17 | 
 18 | ```bash
 19 | git clone https://github.com/re-movery/re-movery.git
 20 | cd re-movery/go
 21 | go install ./cmd/movery
 22 | ```
 23 | 
 24 | ### 使用Go工具安装
 25 | 
 26 | ```bash
 27 | go install github.com/re-movery/re-movery/cmd/movery@latest
 28 | ```
 29 | 
 30 | ## 使用方法
 31 | 
 32 | ### 命令行扫描
 33 | 
 34 | ```bash
 35 | # 扫描单个文件
 36 | movery scan --file path/to/file.py
 37 | 
 38 | # 扫描目录
 39 | movery scan --dir path/to/directory
 40 | 
 41 | # 排除特定文件或目录
 42 | movery scan --dir path/to/directory --exclude "node_modules,*.min.js"
 43 | 
 44 | # 生成HTML报告
 45 | movery scan --dir path/to/directory --output report.html
 46 | 
 47 | # 启用并行处理
 48 | movery scan --dir path/to/directory --parallel
 49 | 
 50 | # 启用增量扫描
 51 | movery scan --dir path/to/directory --incremental
 52 | ```
 53 | 
 54 | ### 启动Web界面
 55 | 
 56 | ```bash
 57 | # 默认配置（localhost:8080）
 58 | movery web
 59 | 
 60 | # 自定义主机和端口
 61 | movery web --host 0.0.0.0 --port 8080
 62 | 
 63 | # 启用调试模式
 64 | movery web --debug
 65 | ```
 66 | 
 67 | ### 启动API服务器
 68 | 
 69 | ```bash
 70 | # 默认配置（localhost:8081）
 71 | movery server
 72 | 
 73 | # 自定义主机和端口
 74 | movery server --host 0.0.0.0 --port 8081
 75 | 
 76 | # 启用调试模式
 77 | movery server --debug
 78 | ```
 79 | 
 80 | ### 生成集成文件
 81 | 
 82 | ```bash
 83 | # 生成GitHub Actions工作流文件
 84 | movery generate github-action
 85 | 
 86 | # 生成GitLab CI配置文件
 87 | movery generate gitlab-ci
 88 | 
 89 | # 生成VS Code扩展配置文件
 90 | movery generate vscode-extension
 91 | ```
 92 | 
 93 | ## API文档
 94 | 
 95 | ### 扫描代码
 96 | 
 97 | ```
 98 | POST /api/scan/code
 99 | Content-Type: application/json
100 | 
101 | {
102 |   "code": "代码内容",
103 |   "language": "python",
104 |   "fileName": "example.py"
105 | }
106 | ```
107 | 
108 | ### 扫描文件
109 | 
110 | ```
111 | POST /api/scan/file
112 | Content-Type: multipart/form-data
113 | 
114 | file: [文件内容]
115 | ```
116 | 
117 | ### 扫描目录
118 | 
119 | ```
120 | POST /api/scan/directory
121 | Content-Type: application/json
122 | 
123 | {
124 |   "directory": "/path/to/directory",
125 |   "excludePatterns": ["node_modules", "*.min.js"],
126 |   "parallel": true,
127 |   "incremental": false
128 | }
129 | ```
130 | 
131 | ### 获取支持的语言
132 | 
133 | ```
134 | GET /api/languages
135 | ```
136 | 
137 | ## 配置
138 | 
139 | Re-movery可以通过命令行参数或配置文件进行配置。配置文件支持YAML、JSON和TOML格式。
140 | 
141 | ```yaml
142 | # re-movery.yaml
143 | scanner:
144 |   parallel: true
145 |   incremental: true
146 |   confidenceThreshold: 0.7
147 | 
148 | web:
149 |   host: localhost
150 |   port: 8080
151 |   debug: false
152 | 
153 | server:
154 |   host: localhost
155 |   port: 8081
156 |   debug: false
157 | ```
158 | 
159 | ## 开发
160 | 
161 | ### 构建
162 | 
163 | ```bash
164 | cd go
165 | go build -o movery ./cmd/movery
166 | ```
167 | 
168 | ### 测试
169 | 
170 | ```bash
171 | go test ./...
172 | ```
173 | 
174 | ### 贡献
175 | 
176 | 欢迎提交Pull Request和Issue。请确保您的代码符合Go的代码规范，并通过所有测试。
177 | 
178 | ## 许可证
179 | 
180 | MIT 


--------------------------------------------------------------------------------
/go/cmd/movery/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/re-movery/re-movery/internal/cmd"
 8 | )
 9 | 
10 | func main() {
11 | 	// 执行根命令
12 | 	if err := cmd.Execute(); err != nil {
13 | 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
14 | 		os.Exit(1)
15 | 	}
16 | } 


--------------------------------------------------------------------------------
/go/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/re-movery/re-movery
 2 | 
 3 | go 1.17
 4 | 
 5 | require (
 6 | 	github.com/gin-gonic/gin v1.8.1
 7 | 	github.com/spf13/cobra v1.5.0
 8 | 	github.com/stretchr/testify v1.8.0
 9 | 	go.uber.org/zap v1.23.0
10 | )
11 | 
12 | require (
13 | 	github.com/davecgh/go-spew v1.1.1 // indirect
14 | 	github.com/gin-contrib/sse v0.1.0 // indirect
15 | 	github.com/go-playground/locales v0.14.0 // indirect
16 | 	github.com/go-playground/universal-translator v0.18.0 // indirect
17 | 	github.com/go-playground/validator/v10 v10.11.0 // indirect
18 | 	github.com/goccy/go-json v0.9.10 // indirect
19 | 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
20 | 	github.com/json-iterator/go v1.1.12 // indirect
21 | 	github.com/leodido/go-urn v1.2.1 // indirect
22 | 	github.com/mattn/go-isatty v0.0.14 // indirect
23 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
24 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
25 | 	github.com/pelletier/go-toml/v2 v2.0.2 // indirect
26 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
27 | 	github.com/spf13/pflag v1.0.5 // indirect
28 | 	github.com/ugorji/go/codec v1.2.7 // indirect
29 | 	go.uber.org/atomic v1.9.0 // indirect
30 | 	go.uber.org/multierr v1.8.0 // indirect
31 | 	golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect
32 | 	golang.org/x/net v0.0.0-20220708220712-1185a9018129 // indirect
33 | 	golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
34 | 	golang.org/x/text v0.3.7 // indirect
35 | 	google.golang.org/protobuf v1.28.0 // indirect
36 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
37 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
38 | ) 


--------------------------------------------------------------------------------
/go/internal/analyzers/language.go:
--------------------------------------------------------------------------------
  1 | package analyzers
  2 | 
  3 | import (
  4 |     "go/ast"
  5 |     "go/parser"
  6 |     "go/token"
  7 |     "path/filepath"
  8 | )
  9 | 
 10 | // LanguageAnalyzer defines the interface for language analyzers
 11 | type LanguageAnalyzer interface {
 12 |     ParseFile(filename string) (ast.Node, error)
 13 |     ExtractFunctions(node ast.Node) []ast.Node
 14 |     ExtractClasses(node ast.Node) []ast.Node
 15 |     ExtractImports(node ast.Node) []string
 16 |     ExtractVariables(node ast.Node) []ast.Node
 17 | }
 18 | 
 19 | // GoAnalyzer implements LanguageAnalyzer for Go language
 20 | type GoAnalyzer struct {
 21 |     fset *token.FileSet
 22 | }
 23 | 
 24 | // NewGoAnalyzer creates a new Go language analyzer
 25 | func NewGoAnalyzer() *GoAnalyzer {
 26 |     return &GoAnalyzer{
 27 |         fset: token.NewFileSet(),
 28 |     }
 29 | }
 30 | 
 31 | // ParseFile parses a Go source file
 32 | func (ga *GoAnalyzer) ParseFile(filename string) (ast.Node, error) {
 33 |     return parser.ParseFile(ga.fset, filename, nil, parser.AllErrors)
 34 | }
 35 | 
 36 | // ExtractFunctions extracts function declarations from an AST
 37 | func (ga *GoAnalyzer) ExtractFunctions(node ast.Node) []ast.Node {
 38 |     var functions []ast.Node
 39 |     ast.Inspect(node, func(n ast.Node) bool {
 40 |         if fn, ok := n.(*ast.FuncDecl); ok {
 41 |             functions = append(functions, fn)
 42 |         }
 43 |         return true
 44 |     })
 45 |     return functions
 46 | }
 47 | 
 48 | // ExtractClasses extracts type declarations from an AST
 49 | func (ga *GoAnalyzer) ExtractClasses(node ast.Node) []ast.Node {
 50 |     var types []ast.Node
 51 |     ast.Inspect(node, func(n ast.Node) bool {
 52 |         if t, ok := n.(*ast.TypeSpec); ok {
 53 |             types = append(types, t)
 54 |         }
 55 |         return true
 56 |     })
 57 |     return types
 58 | }
 59 | 
 60 | // ExtractImports extracts import declarations from an AST
 61 | func (ga *GoAnalyzer) ExtractImports(node ast.Node) []string {
 62 |     var imports []string
 63 |     ast.Inspect(node, func(n ast.Node) bool {
 64 |         if imp, ok := n.(*ast.ImportSpec); ok {
 65 |             imports = append(imports, imp.Path.Value)
 66 |         }
 67 |         return true
 68 |     })
 69 |     return imports
 70 | }
 71 | 
 72 | // ExtractVariables extracts variable declarations from an AST
 73 | func (ga *GoAnalyzer) ExtractVariables(node ast.Node) []ast.Node {
 74 |     var variables []ast.Node
 75 |     ast.Inspect(node, func(n ast.Node) bool {
 76 |         if v, ok := n.(*ast.ValueSpec); ok {
 77 |             variables = append(variables, v)
 78 |         }
 79 |         return true
 80 |     })
 81 |     return variables
 82 | }
 83 | 
 84 | // GetFileLanguage determines the programming language of a file
 85 | func GetFileLanguage(filename string) string {
 86 |     ext := filepath.Ext(filename)
 87 |     switch ext {
 88 |     case ".go":
 89 |         return "go"
 90 |     case ".java":
 91 |         return "java"
 92 |     case ".py":
 93 |         return "python"
 94 |     case ".js":
 95 |         return "javascript"
 96 |     case ".ts":
 97 |         return "typescript"
 98 |     default:
 99 |         return "unknown"
100 |     }
101 | } 


--------------------------------------------------------------------------------
/go/internal/api/server.go:
--------------------------------------------------------------------------------
  1 | package api
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"time"
 11 | 
 12 | 	"github.com/gin-gonic/gin"
 13 | 	"github.com/re-movery/re-movery/internal/core"
 14 | 	"github.com/re-movery/re-movery/internal/detectors"
 15 | )
 16 | 
 17 | // Server is the API server
 18 | type Server struct {
 19 | 	scanner *core.Scanner
 20 | 	router  *gin.Engine
 21 | }
 22 | 
 23 | // NewServer creates a new API server
 24 | func NewServer() *Server {
 25 | 	server := &Server{
 26 | 		scanner: core.NewScanner(),
 27 | 		router:  gin.Default(),
 28 | 	}
 29 | 
 30 | 	// Register detectors
 31 | 	server.scanner.RegisterDetector(detectors.NewPythonDetector())
 32 | 	server.scanner.RegisterDetector(detectors.NewJavaScriptDetector())
 33 | 
 34 | 	// Setup routes
 35 | 	server.setupRoutes()
 36 | 
 37 | 	return server
 38 | }
 39 | 
 40 | // setupRoutes sets up the routes for the API server
 41 | func (s *Server) setupRoutes() {
 42 | 	// API routes
 43 | 	api := s.router.Group("/api")
 44 | 	{
 45 | 		api.POST("/scan/code", s.scanCodeHandler)
 46 | 		api.POST("/scan/file", s.scanFileHandler)
 47 | 		api.POST("/scan/directory", s.scanDirectoryHandler)
 48 | 		api.GET("/languages", s.languagesHandler)
 49 | 	}
 50 | 
 51 | 	// Health check
 52 | 	s.router.GET("/health", s.healthHandler)
 53 | }
 54 | 
 55 | // Run runs the API server
 56 | func (s *Server) Run(host string, port int) error {
 57 | 	return s.router.Run(fmt.Sprintf("%s:%d", host, port))
 58 | }
 59 | 
 60 | // scanCodeHandler handles code scanning
 61 | func (s *Server) scanCodeHandler(c *gin.Context) {
 62 | 	// Parse request
 63 | 	var request struct {
 64 | 		Code     string `json:"code" binding:"required"`
 65 | 		Language string `json:"language" binding:"required"`
 66 | 		FileName string `json:"fileName"`
 67 | 	}
 68 | 	if err := c.ShouldBindJSON(&request); err != nil {
 69 | 		c.JSON(http.StatusBadRequest, gin.H{
 70 | 			"error": "Invalid request: " + err.Error(),
 71 | 		})
 72 | 		return
 73 | 	}
 74 | 
 75 | 	// Set default file name if not provided
 76 | 	if request.FileName == "" {
 77 | 		request.FileName = "code." + request.Language
 78 | 	}
 79 | 
 80 | 	// Check if language is supported
 81 | 	supported := false
 82 | 	for _, lang := range s.scanner.SupportedLanguages() {
 83 | 		if lang == request.Language {
 84 | 			supported = true
 85 | 			break
 86 | 		}
 87 | 	}
 88 | 	if !supported {
 89 | 		c.JSON(http.StatusBadRequest, gin.H{
 90 | 			"error": "Unsupported language: " + request.Language,
 91 | 		})
 92 | 		return
 93 | 	}
 94 | 
 95 | 	// Create temporary file
 96 | 	tempDir, err := ioutil.TempDir("", "re-movery-")
 97 | 	if err != nil {
 98 | 		c.JSON(http.StatusInternalServerError, gin.H{
 99 | 			"error": "Failed to create temporary directory: " + err.Error(),
100 | 		})
101 | 		return
102 | 	}
103 | 	defer os.RemoveAll(tempDir)
104 | 
105 | 	tempFile := filepath.Join(tempDir, request.FileName)
106 | 	if err := ioutil.WriteFile(tempFile, []byte(request.Code), 0644); err != nil {
107 | 		c.JSON(http.StatusInternalServerError, gin.H{
108 | 			"error": "Failed to write temporary file: " + err.Error(),
109 | 		})
110 | 		return
111 | 	}
112 | 
113 | 	// Scan file
114 | 	results, err := s.scanner.ScanFile(tempFile)
115 | 	if err != nil {
116 | 		c.JSON(http.StatusInternalServerError, gin.H{
117 | 			"error": "Failed to scan code: " + err.Error(),
118 | 		})
119 | 		return
120 | 	}
121 | 
122 | 	// Generate summary
123 | 	summary := core.GenerateSummary(map[string][]core.Match{
124 | 		request.FileName: results,
125 | 	})
126 | 
127 | 	// Return results
128 | 	c.JSON(http.StatusOK, gin.H{
129 | 		"results": map[string][]core.Match{
130 | 			request.FileName: results,
131 | 		},
132 | 		"summary": summary,
133 | 	})
134 | }
135 | 
136 | // scanFileHandler handles file scanning
137 | func (s *Server) scanFileHandler(c *gin.Context) {
138 | 	// Get file from form
139 | 	file, err := c.FormFile("file")
140 | 	if err != nil {
141 | 		c.JSON(http.StatusBadRequest, gin.H{
142 | 			"error": "No file provided",
143 | 		})
144 | 		return
145 | 	}
146 | 
147 | 	// Save file to temporary location
148 | 	tempFile := filepath.Join(os.TempDir(), file.Filename)
149 | 	if err := c.SaveUploadedFile(file, tempFile); err != nil {
150 | 		c.JSON(http.StatusInternalServerError, gin.H{
151 | 			"error": "Failed to save file",
152 | 		})
153 | 		return
154 | 	}
155 | 	defer os.Remove(tempFile)
156 | 
157 | 	// Scan file
158 | 	results, err := s.scanner.ScanFile(tempFile)
159 | 	if err != nil {
160 | 		c.JSON(http.StatusInternalServerError, gin.H{
161 | 			"error": fmt.Sprintf("Failed to scan file: %v", err),
162 | 		})
163 | 		return
164 | 	}
165 | 
166 | 	// Generate summary
167 | 	summary := core.GenerateSummary(map[string][]core.Match{
168 | 		file.Filename: results,
169 | 	})
170 | 
171 | 	// Return results
172 | 	c.JSON(http.StatusOK, gin.H{
173 | 		"results": map[string][]core.Match{
174 | 			file.Filename: results,
175 | 		},
176 | 		"summary": summary,
177 | 	})
178 | }
179 | 
180 | // scanDirectoryHandler handles directory scanning
181 | func (s *Server) scanDirectoryHandler(c *gin.Context) {
182 | 	// Parse request
183 | 	var request struct {
184 | 		Directory       string   `json:"directory" binding:"required"`
185 | 		ExcludePatterns []string `json:"excludePatterns"`
186 | 		Parallel        bool     `json:"parallel"`
187 | 		Incremental     bool     `json:"incremental"`
188 | 	}
189 | 	if err := c.ShouldBindJSON(&request); err != nil {
190 | 		c.JSON(http.StatusBadRequest, gin.H{
191 | 			"error": "Invalid request: " + err.Error(),
192 | 		})
193 | 		return
194 | 	}
195 | 
196 | 	// Check if directory exists
197 | 	if _, err := os.Stat(request.Directory); os.IsNotExist(err) {
198 | 		c.JSON(http.StatusBadRequest, gin.H{
199 | 			"error": "Directory does not exist",
200 | 		})
201 | 		return
202 | 	}
203 | 
204 | 	// Set scanner options
205 | 	s.scanner.SetParallel(request.Parallel)
206 | 	s.scanner.SetIncremental(request.Incremental)
207 | 
208 | 	// Scan directory
209 | 	results, err := s.scanner.ScanDirectory(request.Directory, request.ExcludePatterns)
210 | 	if err != nil {
211 | 		c.JSON(http.StatusInternalServerError, gin.H{
212 | 			"error": fmt.Sprintf("Failed to scan directory: %v", err),
213 | 		})
214 | 		return
215 | 	}
216 | 
217 | 	// Generate summary
218 | 	summary := core.GenerateSummary(results)
219 | 
220 | 	// Return results
221 | 	c.JSON(http.StatusOK, gin.H{
222 | 		"results": results,
223 | 		"summary": summary,
224 | 	})
225 | }
226 | 
227 | // languagesHandler handles the supported languages request
228 | func (s *Server) languagesHandler(c *gin.Context) {
229 | 	languages := s.scanner.SupportedLanguages()
230 | 	c.JSON(http.StatusOK, gin.H{
231 | 		"languages": languages,
232 | 	})
233 | }
234 | 
235 | // healthHandler handles the health check request
236 | func (s *Server) healthHandler(c *gin.Context) {
237 | 	c.JSON(http.StatusOK, gin.H{
238 | 		"status": "ok",
239 | 		"time":   time.Now().Format(time.RFC3339),
240 | 	})
241 | } 


--------------------------------------------------------------------------------
/go/internal/cmd/root.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/spf13/cobra"
 8 | )
 9 | 
10 | var rootCmd = &cobra.Command{
11 | 	Use:   "re-movery",
12 | 	Short: "Re-movery - Security Vulnerability Scanner",
13 | 	Long: `Re-movery is a powerful security vulnerability scanner designed to detect 
14 | potential security issues in your codebase. It supports multiple programming 
15 | languages and provides various interfaces for scanning and reporting.`,
16 | 	Run: func(cmd *cobra.Command, args []string) {
17 | 		// If no subcommand is provided, print help
18 | 		cmd.Help()
19 | 	},
20 | }
21 | 
22 | // Execute executes the root command
23 | func Execute() error {
24 | 	return rootCmd.Execute()
25 | }
26 | 
27 | func init() {
28 | 	// Add global flags
29 | 	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose output")
30 | 	rootCmd.PersistentFlags().StringP("config", "c", "", "Config file path")
31 | 
32 | 	// Add subcommands
33 | 	rootCmd.AddCommand(scanCmd)
34 | 	rootCmd.AddCommand(webCmd)
35 | 	rootCmd.AddCommand(serverCmd)
36 | 	rootCmd.AddCommand(generateCmd)
37 | 	rootCmd.AddCommand(versionCmd)
38 | }
39 | 
40 | // versionCmd represents the version command
41 | var versionCmd = &cobra.Command{
42 | 	Use:   "version",
43 | 	Short: "Print the version number",
44 | 	Run: func(cmd *cobra.Command, args []string) {
45 | 		fmt.Println("Re-movery v1.0.0")
46 | 	},
47 | } 


--------------------------------------------------------------------------------
/go/internal/cmd/scan.go:
--------------------------------------------------------------------------------
  1 | package cmd
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/re-movery/re-movery/internal/core"
 11 | 	"github.com/re-movery/re-movery/internal/detectors"
 12 | 	"github.com/re-movery/re-movery/internal/reporters"
 13 | 	"github.com/spf13/cobra"
 14 | )
 15 | 
 16 | var (
 17 | 	scanFile       string
 18 | 	scanDir        string
 19 | 	excludePattern string
 20 | 	outputFile     string
 21 | 	reportFormat   string
 22 | 	parallel       bool
 23 | 	incremental    bool
 24 | 	confidence     float64
 25 | )
 26 | 
 27 | var scanCmd = &cobra.Command{
 28 | 	Use:   "scan",
 29 | 	Short: "Scan files or directories for security vulnerabilities",
 30 | 	Long: `Scan files or directories for security vulnerabilities.
 31 | Examples:
 32 |   re-movery scan --file path/to/file.py
 33 |   re-movery scan --dir path/to/directory --exclude "node_modules,*.min.js"
 34 |   re-movery scan --dir path/to/directory --output report.html --format html`,
 35 | 	Run: func(cmd *cobra.Command, args []string) {
 36 | 		// Create scanner
 37 | 		scanner := core.NewScanner()
 38 | 		
 39 | 		// Register detectors
 40 | 		scanner.RegisterDetector(detectors.NewPythonDetector())
 41 | 		scanner.RegisterDetector(detectors.NewJavaScriptDetector())
 42 | 		
 43 | 		// Set scanner options
 44 | 		scanner.SetParallel(parallel)
 45 | 		scanner.SetIncremental(incremental)
 46 | 		scanner.SetConfidenceThreshold(confidence)
 47 | 		
 48 | 		// Parse exclude patterns
 49 | 		var excludePatterns []string
 50 | 		if excludePattern != "" {
 51 | 			excludePatterns = strings.Split(excludePattern, ",")
 52 | 			for i, pattern := range excludePatterns {
 53 | 				excludePatterns[i] = strings.TrimSpace(pattern)
 54 | 			}
 55 | 		}
 56 | 		
 57 | 		// Scan file or directory
 58 | 		var results map[string][]core.Match
 59 | 		var err error
 60 | 		
 61 | 		if scanFile != "" {
 62 | 			// Check if file exists
 63 | 			if _, err := os.Stat(scanFile); os.IsNotExist(err) {
 64 | 				fmt.Fprintf(os.Stderr, "Error: File does not exist: %s\n", scanFile)
 65 | 				os.Exit(1)
 66 | 			}
 67 | 			
 68 | 			// Scan file
 69 | 			matches, err := scanner.ScanFile(scanFile)
 70 | 			if err != nil {
 71 | 				fmt.Fprintf(os.Stderr, "Error scanning file: %v\n", err)
 72 | 				os.Exit(1)
 73 | 			}
 74 | 			
 75 | 			results = map[string][]core.Match{
 76 | 				scanFile: matches,
 77 | 			}
 78 | 		} else if scanDir != "" {
 79 | 			// Check if directory exists
 80 | 			if _, err := os.Stat(scanDir); os.IsNotExist(err) {
 81 | 				fmt.Fprintf(os.Stderr, "Error: Directory does not exist: %s\n", scanDir)
 82 | 				os.Exit(1)
 83 | 			}
 84 | 			
 85 | 			// Scan directory
 86 | 			results, err = scanner.ScanDirectory(scanDir, excludePatterns)
 87 | 			if err != nil {
 88 | 				fmt.Fprintf(os.Stderr, "Error scanning directory: %v\n", err)
 89 | 				os.Exit(1)
 90 | 			}
 91 | 		} else {
 92 | 			fmt.Fprintf(os.Stderr, "Error: Please specify a file or directory to scan\n")
 93 | 			cmd.Help()
 94 | 			os.Exit(1)
 95 | 		}
 96 | 		
 97 | 		// Generate summary
 98 | 		summary := core.GenerateSummary(results)
 99 | 		
100 | 		// Print summary to console
101 | 		fmt.Printf("Scan completed in %s\n", time.Now().Format(time.RFC3339))
102 | 		fmt.Printf("Files scanned: %d\n", summary.TotalFiles)
103 | 		fmt.Printf("Issues found: %d (High: %d, Medium: %d, Low: %d)\n",
104 | 			summary.High+summary.Medium+summary.Low, summary.High, summary.Medium, summary.Low)
105 | 		
106 | 		// Generate report if output file is specified
107 | 		if outputFile != "" {
108 | 			// Create report data
109 | 			reportData := core.ReportData{
110 | 				Title:     "Re-movery Security Scan Report",
111 | 				Timestamp: time.Now().Format(time.RFC3339),
112 | 				Results:   results,
113 | 				Summary:   summary,
114 | 			}
115 | 			
116 | 			// Determine report format
117 | 			if reportFormat == "" {
118 | 				// Try to determine format from file extension
119 | 				ext := strings.ToLower(filepath.Ext(outputFile))
120 | 				switch ext {
121 | 				case ".html":
122 | 					reportFormat = "html"
123 | 				case ".json":
124 | 					reportFormat = "json"
125 | 				case ".xml":
126 | 					reportFormat = "xml"
127 | 				default:
128 | 					reportFormat = "html" // Default to HTML
129 | 				}
130 | 			}
131 | 			
132 | 			// Generate report
133 | 			var reporter core.Reporter
134 | 			switch strings.ToLower(reportFormat) {
135 | 			case "html":
136 | 				reporter = reporters.NewHTMLReporter()
137 | 			case "json":
138 | 				reporter = reporters.NewJSONReporter()
139 | 			case "xml":
140 | 				reporter = reporters.NewXMLReporter()
141 | 			default:
142 | 				fmt.Fprintf(os.Stderr, "Error: Unsupported report format: %s\n", reportFormat)
143 | 				os.Exit(1)
144 | 			}
145 | 			
146 | 			if err := reporter.GenerateReport(reportData, outputFile); err != nil {
147 | 				fmt.Fprintf(os.Stderr, "Error generating report: %v\n", err)
148 | 				os.Exit(1)
149 | 			}
150 | 			
151 | 			fmt.Printf("Report generated: %s\n", outputFile)
152 | 		}
153 | 	},
154 | }
155 | 
156 | func init() {
157 | 	// Add flags
158 | 	scanCmd.Flags().StringVar(&scanFile, "file", "", "File to scan")
159 | 	scanCmd.Flags().StringVar(&scanDir, "dir", "", "Directory to scan")
160 | 	scanCmd.Flags().StringVar(&excludePattern, "exclude", "", "Patterns to exclude (comma separated)")
161 | 	scanCmd.Flags().StringVar(&outputFile, "output", "", "Output file for the report")
162 | 	scanCmd.Flags().StringVar(&reportFormat, "format", "", "Report format (html, json, xml)")
163 | 	scanCmd.Flags().BoolVar(&parallel, "parallel", false, "Enable parallel processing")
164 | 	scanCmd.Flags().BoolVar(&incremental, "incremental", false, "Enable incremental scanning")
165 | 	scanCmd.Flags().Float64Var(&confidence, "confidence", 0.7, "Confidence threshold (0.0-1.0)")
166 | } 


--------------------------------------------------------------------------------
/go/internal/cmd/server.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/re-movery/re-movery/internal/api"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | var (
12 | 	serverHost  string
13 | 	serverPort  int
14 | 	serverDebug bool
15 | )
16 | 
17 | var serverCmd = &cobra.Command{
18 | 	Use:   "server",
19 | 	Short: "Start the API server",
20 | 	Long: `Start the API server for Re-movery.
21 | The API server provides a RESTful API for scanning files and directories for security vulnerabilities.
22 | 
23 | Examples:
24 |   re-movery server
25 |   re-movery server --host 0.0.0.0 --port 8081
26 |   re-movery server --debug`,
27 | 	Run: func(cmd *cobra.Command, args []string) {
28 | 		// Create API server
29 | 		server := api.NewServer()
30 | 		
31 | 		// Start API server
32 | 		addr := fmt.Sprintf("%s:%d", serverHost, serverPort)
33 | 		fmt.Printf("Starting API server at http://%s\n", addr)
34 | 		
35 | 		if err := server.Run(serverHost, serverPort, serverDebug); err != nil {
36 | 			fmt.Fprintf(os.Stderr, "Error starting API server: %v\n", err)
37 | 			os.Exit(1)
38 | 		}
39 | 	},
40 | }
41 | 
42 | func init() {
43 | 	// Add flags
44 | 	serverCmd.Flags().StringVar(&serverHost, "host", "localhost", "Host to bind the API server to")
45 | 	serverCmd.Flags().IntVar(&serverPort, "port", 8081, "Port to bind the API server to")
46 | 	serverCmd.Flags().BoolVar(&serverDebug, "debug", false, "Enable debug mode")
47 | } 


--------------------------------------------------------------------------------
/go/internal/cmd/web.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"github.com/re-movery/re-movery/internal/web"
 8 | 	"github.com/spf13/cobra"
 9 | )
10 | 
11 | var (
12 | 	webHost  string
13 | 	webPort  int
14 | 	webDebug bool
15 | )
16 | 
17 | var webCmd = &cobra.Command{
18 | 	Use:   "web",
19 | 	Short: "Start the web interface",
20 | 	Long: `Start the web interface for Re-movery.
21 | The web interface provides a user-friendly way to scan files and directories for security vulnerabilities.
22 | 
23 | Examples:
24 |   re-movery web
25 |   re-movery web --host 0.0.0.0 --port 8080
26 |   re-movery web --debug`,
27 | 	Run: func(cmd *cobra.Command, args []string) {
28 | 		// Create web app
29 | 		app := web.NewApp()
30 | 		
31 | 		// Start web server
32 | 		addr := fmt.Sprintf("%s:%d", webHost, webPort)
33 | 		fmt.Printf("Starting web server at http://%s\n", addr)
34 | 		
35 | 		if err := app.Run(webHost, webPort, webDebug); err != nil {
36 | 			fmt.Fprintf(os.Stderr, "Error starting web server: %v\n", err)
37 | 			os.Exit(1)
38 | 		}
39 | 	},
40 | }
41 | 
42 | func init() {
43 | 	// Add flags
44 | 	webCmd.Flags().StringVar(&webHost, "host", "localhost", "Host to bind the web server to")
45 | 	webCmd.Flags().IntVar(&webPort, "port", 8080, "Port to bind the web server to")
46 | 	webCmd.Flags().BoolVar(&webDebug, "debug", false, "Enable debug mode")
47 | } 


--------------------------------------------------------------------------------
/go/internal/config/config.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 |     "github.com/spf13/viper"
 5 | )
 6 | 
 7 | // Config represents the application configuration
 8 | type Config struct {
 9 |     Processing ProcessingConfig `mapstructure:"processing"`
10 |     Detector   DetectorConfig   `mapstructure:"detector"`
11 |     Logging    LoggingConfig    `mapstructure:"logging"`
12 |     Security   SecurityConfig   `mapstructure:"security"`
13 | }
14 | 
15 | // ProcessingConfig contains processing-related configuration
16 | type ProcessingConfig struct {
17 |     NumWorkers    int      `mapstructure:"num_workers"`
18 |     MaxMemoryGB   float64  `mapstructure:"max_memory_gb"`
19 |     ChunkSizeMB   int      `mapstructure:"chunk_size_mb"`
20 |     EnableCache   bool     `mapstructure:"enable_cache"`
21 |     CacheSize     int      `mapstructure:"cache_size"`
22 |     Languages     []string `mapstructure:"languages"`
23 | }
24 | 
25 | // DetectorConfig contains detector-related configuration
26 | type DetectorConfig struct {
27 |     MinSimilarity     float64  `mapstructure:"min_similarity"`
28 |     EditDistance      int      `mapstructure:"edit_distance"`
29 |     ContextLines      int      `mapstructure:"context_lines"`
30 |     ASTDepth         int      `mapstructure:"ast_depth"`
31 |     CFGNodes         int      `mapstructure:"cfg_nodes"`
32 |     ReportFormat     []string `mapstructure:"report_format"`
33 |     ExcludePatterns  []string `mapstructure:"exclude_patterns"`
34 | }
35 | 
36 | // LoggingConfig contains logging-related configuration
37 | type LoggingConfig struct {
38 |     Level           string `mapstructure:"level"`
39 |     File            string `mapstructure:"file"`
40 |     Format          string `mapstructure:"format"`
41 |     EnableProfiling bool   `mapstructure:"enable_profiling"`
42 |     ShowProgress    bool   `mapstructure:"show_progress"`
43 | }
44 | 
45 | // SecurityConfig contains security-related configuration
46 | type SecurityConfig struct {
47 |     MaxFileSizeMB     int      `mapstructure:"max_file_size_mb"`
48 |     AllowedSchemes    []string `mapstructure:"allowed_schemes"`
49 |     EnableSandbox     bool     `mapstructure:"enable_sandbox"`
50 |     RequireAuth       bool     `mapstructure:"require_auth"`
51 |     RateLimitPerHour  int      `mapstructure:"rate_limit_per_hour"`
52 | }
53 | 
54 | // LoadConfig loads the configuration from file
55 | func LoadConfig(configFile string) (*Config, error) {
56 |     viper.SetConfigFile(configFile)
57 |     viper.SetConfigType("json")
58 | 
59 |     if err := viper.ReadInConfig(); err != nil {
60 |         return nil, err
61 |     }
62 | 
63 |     var config Config
64 |     if err := viper.Unmarshal(&config); err != nil {
65 |         return nil, err
66 |     }
67 | 
68 |     return &config, nil
69 | }
70 | 
71 | // SetDefaults sets default configuration values
72 | func SetDefaults() {
73 |     viper.SetDefault("processing.num_workers", 4)
74 |     viper.SetDefault("processing.max_memory_gb", 8)
75 |     viper.SetDefault("processing.chunk_size_mb", 1)
76 |     viper.SetDefault("processing.enable_cache", true)
77 |     viper.SetDefault("processing.cache_size", 1000)
78 |     viper.SetDefault("processing.languages", []string{"go", "java", "python", "javascript"})
79 | 
80 |     viper.SetDefault("detector.min_similarity", 0.8)
81 |     viper.SetDefault("detector.edit_distance", 3)
82 |     viper.SetDefault("detector.context_lines", 3)
83 |     viper.SetDefault("detector.ast_depth", 5)
84 |     viper.SetDefault("detector.cfg_nodes", 100)
85 |     viper.SetDefault("detector.report_format", []string{"html", "json"})
86 | 
87 |     viper.SetDefault("logging.level", "info")
88 |     viper.SetDefault("logging.format", "text")
89 |     viper.SetDefault("logging.enable_profiling", false)
90 |     viper.SetDefault("logging.show_progress", true)
91 | 
92 |     viper.SetDefault("security.max_file_size_mb", 10)
93 |     viper.SetDefault("security.enable_sandbox", true)
94 |     viper.SetDefault("security.require_auth", false)
95 |     viper.SetDefault("security.rate_limit_per_hour", 1000)
96 | } 


--------------------------------------------------------------------------------
/go/internal/core/config.go:
--------------------------------------------------------------------------------
  1 | package core
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"path/filepath"
  9 | 	"strings"
 10 | 
 11 | 	"gopkg.in/yaml.v3"
 12 | )
 13 | 
 14 | // Config 表示应用程序配置
 15 | type Config struct {
 16 | 	Scanner ScannerConfig `json:"scanner" yaml:"scanner"`
 17 | 	Web     WebConfig     `json:"web" yaml:"web"`
 18 | 	Server  ServerConfig  `json:"server" yaml:"server"`
 19 | }
 20 | 
 21 | // ScannerConfig 表示扫描器配置
 22 | type ScannerConfig struct {
 23 | 	Parallel            bool    `json:"parallel" yaml:"parallel"`
 24 | 	Incremental         bool    `json:"incremental" yaml:"incremental"`
 25 | 	ConfidenceThreshold float64 `json:"confidenceThreshold" yaml:"confidenceThreshold"`
 26 | 	ExcludePatterns     []string `json:"excludePatterns" yaml:"excludePatterns"`
 27 | }
 28 | 
 29 | // WebConfig 表示Web界面配置
 30 | type WebConfig struct {
 31 | 	Host  string `json:"host" yaml:"host"`
 32 | 	Port  int    `json:"port" yaml:"port"`
 33 | 	Debug bool   `json:"debug" yaml:"debug"`
 34 | }
 35 | 
 36 | // ServerConfig 表示API服务器配置
 37 | type ServerConfig struct {
 38 | 	Host  string `json:"host" yaml:"host"`
 39 | 	Port  int    `json:"port" yaml:"port"`
 40 | 	Debug bool   `json:"debug" yaml:"debug"`
 41 | }
 42 | 
 43 | // NewConfig 创建一个新的配置对象，使用默认值
 44 | func NewConfig() *Config {
 45 | 	return &Config{
 46 | 		Scanner: ScannerConfig{
 47 | 			Parallel:            false,
 48 | 			Incremental:         false,
 49 | 			ConfidenceThreshold: 0.7,
 50 | 			ExcludePatterns:     []string{},
 51 | 		},
 52 | 		Web: WebConfig{
 53 | 			Host:  "localhost",
 54 | 			Port:  8080,
 55 | 			Debug: false,
 56 | 		},
 57 | 		Server: ServerConfig{
 58 | 			Host:  "localhost",
 59 | 			Port:  8081,
 60 | 			Debug: false,
 61 | 		},
 62 | 	}
 63 | }
 64 | 
 65 | // LoadConfig 从文件加载配置
 66 | func LoadConfig(configPath string) (*Config, error) {
 67 | 	// 如果未指定配置文件，则使用默认配置
 68 | 	if configPath == "" {
 69 | 		return NewConfig(), nil
 70 | 	}
 71 | 
 72 | 	// 检查文件是否存在
 73 | 	if _, err := os.Stat(configPath); os.IsNotExist(err) {
 74 | 		return nil, fmt.Errorf("配置文件不存在: %s", configPath)
 75 | 	}
 76 | 
 77 | 	// 读取文件内容
 78 | 	data, err := ioutil.ReadFile(configPath)
 79 | 	if err != nil {
 80 | 		return nil, err
 81 | 	}
 82 | 
 83 | 	// 根据文件扩展名解析配置
 84 | 	config := NewConfig()
 85 | 	ext := strings.ToLower(filepath.Ext(configPath))
 86 | 	switch ext {
 87 | 	case ".json":
 88 | 		if err := json.Unmarshal(data, config); err != nil {
 89 | 			return nil, err
 90 | 		}
 91 | 	case ".yaml", ".yml":
 92 | 		if err := yaml.Unmarshal(data, config); err != nil {
 93 | 			return nil, err
 94 | 		}
 95 | 	default:
 96 | 		return nil, fmt.Errorf("不支持的配置文件格式: %s", ext)
 97 | 	}
 98 | 
 99 | 	return config, nil
100 | }
101 | 
102 | // SaveConfig 将配置保存到文件
103 | func SaveConfig(config *Config, configPath string) error {
104 | 	// 创建输出目录（如果不存在）
105 | 	outputDir := filepath.Dir(configPath)
106 | 	if err := os.MkdirAll(outputDir, 0755); err != nil {
107 | 		return err
108 | 	}
109 | 
110 | 	// 根据文件扩展名序列化配置
111 | 	var data []byte
112 | 	var err error
113 | 	ext := strings.ToLower(filepath.Ext(configPath))
114 | 	switch ext {
115 | 	case ".json":
116 | 		data, err = json.MarshalIndent(config, "", "  ")
117 | 		if err != nil {
118 | 			return err
119 | 		}
120 | 	case ".yaml", ".yml":
121 | 		data, err = yaml.Marshal(config)
122 | 		if err != nil {
123 | 			return err
124 | 		}
125 | 	default:
126 | 		return fmt.Errorf("不支持的配置文件格式: %s", ext)
127 | 	}
128 | 
129 | 	// 写入文件
130 | 	return ioutil.WriteFile(configPath, data, 0644)
131 | }
132 | 
133 | // ApplyToScanner 将配置应用到扫描器
134 | func (c *Config) ApplyToScanner(scanner *Scanner) {
135 | 	scanner.SetParallel(c.Scanner.Parallel)
136 | 	scanner.SetIncremental(c.Scanner.Incremental)
137 | 	scanner.SetConfidenceThreshold(c.Scanner.ConfidenceThreshold)
138 | } 


--------------------------------------------------------------------------------
/go/internal/core/config_test.go:
--------------------------------------------------------------------------------
  1 | package core
  2 | 
  3 | import (
  4 | 	"io/ioutil"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | // 测试创建新配置
 13 | func TestNewConfig(t *testing.T) {
 14 | 	config := NewConfig()
 15 | 	assert.NotNil(t, config)
 16 | 	
 17 | 	// 检查默认值
 18 | 	assert.False(t, config.Scanner.Parallel)
 19 | 	assert.False(t, config.Scanner.Incremental)
 20 | 	assert.Equal(t, 0.7, config.Scanner.ConfidenceThreshold)
 21 | 	assert.Equal(t, "localhost", config.Web.Host)
 22 | 	assert.Equal(t, 8080, config.Web.Port)
 23 | 	assert.False(t, config.Web.Debug)
 24 | 	assert.Equal(t, "localhost", config.Server.Host)
 25 | 	assert.Equal(t, 8081, config.Server.Port)
 26 | 	assert.False(t, config.Server.Debug)
 27 | }
 28 | 
 29 | // 测试加载JSON配置
 30 | func TestLoadConfigJSON(t *testing.T) {
 31 | 	// 创建临时配置文件
 32 | 	content := []byte(`{
 33 | 		"scanner": {
 34 | 			"parallel": true,
 35 | 			"incremental": true,
 36 | 			"confidenceThreshold": 0.8,
 37 | 			"excludePatterns": ["node_modules", "*.min.js"]
 38 | 		},
 39 | 		"web": {
 40 | 			"host": "0.0.0.0",
 41 | 			"port": 9090,
 42 | 			"debug": true
 43 | 		},
 44 | 		"server": {
 45 | 			"host": "0.0.0.0",
 46 | 			"port": 9091,
 47 | 			"debug": true
 48 | 		}
 49 | 	}`)
 50 | 	
 51 | 	tmpfile, err := ioutil.TempFile("", "config-*.json")
 52 | 	assert.NoError(t, err)
 53 | 	defer os.Remove(tmpfile.Name())
 54 | 	
 55 | 	_, err = tmpfile.Write(content)
 56 | 	assert.NoError(t, err)
 57 | 	err = tmpfile.Close()
 58 | 	assert.NoError(t, err)
 59 | 	
 60 | 	// 加载配置
 61 | 	config, err := LoadConfig(tmpfile.Name())
 62 | 	assert.NoError(t, err)
 63 | 	assert.NotNil(t, config)
 64 | 	
 65 | 	// 检查加载的值
 66 | 	assert.True(t, config.Scanner.Parallel)
 67 | 	assert.True(t, config.Scanner.Incremental)
 68 | 	assert.Equal(t, 0.8, config.Scanner.ConfidenceThreshold)
 69 | 	assert.Equal(t, []string{"node_modules", "*.min.js"}, config.Scanner.ExcludePatterns)
 70 | 	assert.Equal(t, "0.0.0.0", config.Web.Host)
 71 | 	assert.Equal(t, 9090, config.Web.Port)
 72 | 	assert.True(t, config.Web.Debug)
 73 | 	assert.Equal(t, "0.0.0.0", config.Server.Host)
 74 | 	assert.Equal(t, 9091, config.Server.Port)
 75 | 	assert.True(t, config.Server.Debug)
 76 | }
 77 | 
 78 | // 测试加载YAML配置
 79 | func TestLoadConfigYAML(t *testing.T) {
 80 | 	// 创建临时配置文件
 81 | 	content := []byte(`scanner:
 82 |   parallel: true
 83 |   incremental: true
 84 |   confidenceThreshold: 0.8
 85 |   excludePatterns:
 86 |     - node_modules
 87 |     - "*.min.js"
 88 | web:
 89 |   host: 0.0.0.0
 90 |   port: 9090
 91 |   debug: true
 92 | server:
 93 |   host: 0.0.0.0
 94 |   port: 9091
 95 |   debug: true
 96 | `)
 97 | 	
 98 | 	tmpfile, err := ioutil.TempFile("", "config-*.yaml")
 99 | 	assert.NoError(t, err)
100 | 	defer os.Remove(tmpfile.Name())
101 | 	
102 | 	_, err = tmpfile.Write(content)
103 | 	assert.NoError(t, err)
104 | 	err = tmpfile.Close()
105 | 	assert.NoError(t, err)
106 | 	
107 | 	// 加载配置
108 | 	config, err := LoadConfig(tmpfile.Name())
109 | 	assert.NoError(t, err)
110 | 	assert.NotNil(t, config)
111 | 	
112 | 	// 检查加载的值
113 | 	assert.True(t, config.Scanner.Parallel)
114 | 	assert.True(t, config.Scanner.Incremental)
115 | 	assert.Equal(t, 0.8, config.Scanner.ConfidenceThreshold)
116 | 	assert.Equal(t, []string{"node_modules", "*.min.js"}, config.Scanner.ExcludePatterns)
117 | 	assert.Equal(t, "0.0.0.0", config.Web.Host)
118 | 	assert.Equal(t, 9090, config.Web.Port)
119 | 	assert.True(t, config.Web.Debug)
120 | 	assert.Equal(t, "0.0.0.0", config.Server.Host)
121 | 	assert.Equal(t, 9091, config.Server.Port)
122 | 	assert.True(t, config.Server.Debug)
123 | }
124 | 
125 | // 测试保存配置
126 | func TestSaveConfig(t *testing.T) {
127 | 	// 创建配置
128 | 	config := NewConfig()
129 | 	config.Scanner.Parallel = true
130 | 	config.Scanner.Incremental = true
131 | 	config.Scanner.ConfidenceThreshold = 0.8
132 | 	config.Scanner.ExcludePatterns = []string{"node_modules", "*.min.js"}
133 | 	config.Web.Host = "0.0.0.0"
134 | 	config.Web.Port = 9090
135 | 	config.Web.Debug = true
136 | 	config.Server.Host = "0.0.0.0"
137 | 	config.Server.Port = 9091
138 | 	config.Server.Debug = true
139 | 	
140 | 	// 创建临时文件路径
141 | 	tmpdir, err := ioutil.TempDir("", "config-test")
142 | 	assert.NoError(t, err)
143 | 	defer os.RemoveAll(tmpdir)
144 | 	
145 | 	// 保存JSON配置
146 | 	jsonPath := filepath.Join(tmpdir, "config.json")
147 | 	err = SaveConfig(config, jsonPath)
148 | 	assert.NoError(t, err)
149 | 	
150 | 	// 保存YAML配置
151 | 	yamlPath := filepath.Join(tmpdir, "config.yaml")
152 | 	err = SaveConfig(config, yamlPath)
153 | 	assert.NoError(t, err)
154 | 	
155 | 	// 重新加载JSON配置
156 | 	jsonConfig, err := LoadConfig(jsonPath)
157 | 	assert.NoError(t, err)
158 | 	assert.Equal(t, config, jsonConfig)
159 | 	
160 | 	// 重新加载YAML配置
161 | 	yamlConfig, err := LoadConfig(yamlPath)
162 | 	assert.NoError(t, err)
163 | 	assert.Equal(t, config, yamlConfig)
164 | }
165 | 
166 | // 测试应用配置到扫描器
167 | func TestApplyToScanner(t *testing.T) {
168 | 	// 创建配置
169 | 	config := NewConfig()
170 | 	config.Scanner.Parallel = true
171 | 	config.Scanner.Incremental = true
172 | 	config.Scanner.ConfidenceThreshold = 0.8
173 | 	
174 | 	// 创建扫描器
175 | 	scanner := NewScanner()
176 | 	
177 | 	// 应用配置
178 | 	config.ApplyToScanner(scanner)
179 | 	
180 | 	// 检查扫描器设置
181 | 	assert.True(t, scanner.IsParallel())
182 | 	assert.True(t, scanner.IsIncremental())
183 | 	assert.Equal(t, 0.8, scanner.confidenceThreshold)
184 | } 


--------------------------------------------------------------------------------
/go/internal/core/models.go:
--------------------------------------------------------------------------------
 1 | package core
 2 | 
 3 | import (
 4 | 	"time"
 5 | )
 6 | 
 7 | // Signature represents a vulnerability signature
 8 | type Signature struct {
 9 | 	ID           string   `json:"id"`
10 | 	Name         string   `json:"name"`
11 | 	Severity     string   `json:"severity"`
12 | 	Description  string   `json:"description"`
13 | 	CodePatterns []string `json:"codePatterns"`
14 | 	References   []string `json:"references"`
15 | }
16 | 
17 | // Match represents a vulnerability match
18 | type Match struct {
19 | 	Signature   Signature `json:"signature"`
20 | 	FilePath    string    `json:"filePath"`
21 | 	LineNumber  int       `json:"lineNumber"`
22 | 	MatchedCode string    `json:"matchedCode"`
23 | 	Confidence  float64   `json:"confidence"`
24 | }
25 | 
26 | // Summary represents a summary of scan results
27 | type Summary struct {
28 | 	TotalFiles int            `json:"totalFiles"`
29 | 	High       int            `json:"high"`
30 | 	Medium     int            `json:"medium"`
31 | 	Low        int            `json:"low"`
32 | 	Vulnerabilities map[string]int `json:"vulnerabilities"`
33 | }
34 | 
35 | // ReportData represents data for a report
36 | type ReportData struct {
37 | 	Title     string                `json:"title"`
38 | 	Timestamp string                `json:"timestamp"`
39 | 	Results   map[string][]Match    `json:"results"`
40 | 	Summary   Summary               `json:"summary"`
41 | }
42 | 
43 | // Reporter is an interface for report generators
44 | type Reporter interface {
45 | 	GenerateReport(data ReportData, outputPath string) error
46 | }
47 | 
48 | // Detector is an interface for vulnerability detectors
49 | type Detector interface {
50 | 	Name() string
51 | 	SupportedLanguages() []string
52 | 	DetectFile(filePath string) ([]Match, error)
53 | 	DetectCode(code string, filePath string) ([]Match, error)
54 | }
55 | 
56 | // GenerateSummary generates a summary from scan results
57 | func GenerateSummary(results map[string][]Match) Summary {
58 | 	summary := Summary{
59 | 		TotalFiles: len(results),
60 | 		Vulnerabilities: make(map[string]int),
61 | 	}
62 | 
63 | 	for _, matches := range results {
64 | 		for _, match := range matches {
65 | 			switch match.Signature.Severity {
66 | 			case "high":
67 | 				summary.High++
68 | 			case "medium":
69 | 				summary.Medium++
70 | 			case "low":
71 | 				summary.Low++
72 | 			}
73 | 
74 | 			// Count vulnerabilities by name
75 | 			summary.Vulnerabilities[match.Signature.Name]++
76 | 		}
77 | 	}
78 | 
79 | 	return summary
80 | } 


--------------------------------------------------------------------------------
/go/internal/core/scanner.go:
--------------------------------------------------------------------------------
  1 | package core
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"strings"
  8 | 	"sync"
  9 | )
 10 | 
 11 | // Scanner is a vulnerability scanner
 12 | type Scanner struct {
 13 | 	detectors          []Detector
 14 | 	parallel           bool
 15 | 	incremental        bool
 16 | 	confidenceThreshold float64
 17 | 	cache              map[string][]Match
 18 | 	cacheMutex         sync.RWMutex
 19 | }
 20 | 
 21 | // NewScanner creates a new scanner
 22 | func NewScanner() *Scanner {
 23 | 	return &Scanner{
 24 | 		detectors:          []Detector{},
 25 | 		parallel:           false,
 26 | 		incremental:        false,
 27 | 		confidenceThreshold: 0.7,
 28 | 		cache:              make(map[string][]Match),
 29 | 	}
 30 | }
 31 | 
 32 | // RegisterDetector registers a detector
 33 | func (s *Scanner) RegisterDetector(detector Detector) {
 34 | 	s.detectors = append(s.detectors, detector)
 35 | }
 36 | 
 37 | // SetParallel sets whether to use parallel processing
 38 | func (s *Scanner) SetParallel(parallel bool) {
 39 | 	s.parallel = parallel
 40 | }
 41 | 
 42 | // IsParallel returns whether parallel processing is enabled
 43 | func (s *Scanner) IsParallel() bool {
 44 | 	return s.parallel
 45 | }
 46 | 
 47 | // SetIncremental sets whether to use incremental scanning
 48 | func (s *Scanner) SetIncremental(incremental bool) {
 49 | 	s.incremental = incremental
 50 | }
 51 | 
 52 | // IsIncremental returns whether incremental scanning is enabled
 53 | func (s *Scanner) IsIncremental() bool {
 54 | 	return s.incremental
 55 | }
 56 | 
 57 | // SetConfidenceThreshold sets the confidence threshold
 58 | func (s *Scanner) SetConfidenceThreshold(threshold float64) {
 59 | 	s.confidenceThreshold = threshold
 60 | }
 61 | 
 62 | // SupportedLanguages returns the list of supported languages
 63 | func (s *Scanner) SupportedLanguages() []string {
 64 | 	languages := []string{}
 65 | 	for _, detector := range s.detectors {
 66 | 		languages = append(languages, detector.SupportedLanguages()...)
 67 | 	}
 68 | 	return languages
 69 | }
 70 | 
 71 | // ScanFile scans a file for vulnerabilities
 72 | func (s *Scanner) ScanFile(filePath string) ([]Match, error) {
 73 | 	// Check if file exists
 74 | 	if _, err := os.Stat(filePath); os.IsNotExist(err) {
 75 | 		return nil, fmt.Errorf("file does not exist: %s", filePath)
 76 | 	}
 77 | 
 78 | 	// Check if file is in cache
 79 | 	if s.incremental {
 80 | 		s.cacheMutex.RLock()
 81 | 		if matches, ok := s.cache[filePath]; ok {
 82 | 			s.cacheMutex.RUnlock()
 83 | 			return matches, nil
 84 | 		}
 85 | 		s.cacheMutex.RUnlock()
 86 | 	}
 87 | 
 88 | 	// Scan file with each detector
 89 | 	var allMatches []Match
 90 | 	for _, detector := range s.detectors {
 91 | 		matches, err := detector.DetectFile(filePath)
 92 | 		if err != nil {
 93 | 			return nil, err
 94 | 		}
 95 | 
 96 | 		// Filter matches by confidence threshold
 97 | 		for _, match := range matches {
 98 | 			if match.Confidence >= s.confidenceThreshold {
 99 | 				allMatches = append(allMatches, match)
100 | 			}
101 | 		}
102 | 	}
103 | 
104 | 	// Update cache
105 | 	if s.incremental {
106 | 		s.cacheMutex.Lock()
107 | 		s.cache[filePath] = allMatches
108 | 		s.cacheMutex.Unlock()
109 | 	}
110 | 
111 | 	return allMatches, nil
112 | }
113 | 
114 | // ScanDirectory scans a directory for vulnerabilities
115 | func (s *Scanner) ScanDirectory(dirPath string, excludePatterns []string) (map[string][]Match, error) {
116 | 	// Check if directory exists
117 | 	if _, err := os.Stat(dirPath); os.IsNotExist(err) {
118 | 		return nil, fmt.Errorf("directory does not exist: %s", dirPath)
119 | 	}
120 | 
121 | 	// Collect files to scan
122 | 	var filesToScan []string
123 | 	err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
124 | 		if err != nil {
125 | 			return err
126 | 		}
127 | 
128 | 		// Skip directories
129 | 		if info.IsDir() {
130 | 			// Check if directory should be excluded
131 | 			for _, pattern := range excludePatterns {
132 | 				if matched, _ := filepath.Match(pattern, info.Name()); matched {
133 | 					return filepath.SkipDir
134 | 				}
135 | 			}
136 | 			return nil
137 | 		}
138 | 
139 | 		// Check if file should be excluded
140 | 		for _, pattern := range excludePatterns {
141 | 			if matched, _ := filepath.Match(pattern, info.Name()); matched {
142 | 				return nil
143 | 			}
144 | 		}
145 | 
146 | 		// Check if file extension is supported
147 | 		ext := strings.ToLower(filepath.Ext(path))
148 | 		if ext == "" {
149 | 			return nil
150 | 		}
151 | 
152 | 		// Remove the dot from the extension
153 | 		ext = ext[1:]
154 | 
155 | 		// Check if any detector supports this file type
156 | 		for _, detector := range s.detectors {
157 | 			for _, lang := range detector.SupportedLanguages() {
158 | 				if lang == ext {
159 | 					filesToScan = append(filesToScan, path)
160 | 					return nil
161 | 				}
162 | 			}
163 | 		}
164 | 
165 | 		return nil
166 | 	})
167 | 
168 | 	if err != nil {
169 | 		return nil, err
170 | 	}
171 | 
172 | 	// Scan files
173 | 	results := make(map[string][]Match)
174 | 	if s.parallel {
175 | 		// Parallel scanning
176 | 		var wg sync.WaitGroup
177 | 		resultsMutex := sync.Mutex{}
178 | 
179 | 		for _, file := range filesToScan {
180 | 			wg.Add(1)
181 | 			go func(file string) {
182 | 				defer wg.Done()
183 | 
184 | 				matches, err := s.ScanFile(file)
185 | 				if err != nil {
186 | 					// Log error but continue
187 | 					fmt.Fprintf(os.Stderr, "Error scanning file %s: %v\n", file, err)
188 | 					return
189 | 				}
190 | 
191 | 				if len(matches) > 0 {
192 | 					resultsMutex.Lock()
193 | 					results[file] = matches
194 | 					resultsMutex.Unlock()
195 | 				}
196 | 			}(file)
197 | 		}
198 | 
199 | 		wg.Wait()
200 | 	} else {
201 | 		// Sequential scanning
202 | 		for _, file := range filesToScan {
203 | 			matches, err := s.ScanFile(file)
204 | 			if err != nil {
205 | 				// Log error but continue
206 | 				fmt.Fprintf(os.Stderr, "Error scanning file %s: %v\n", file, err)
207 | 				continue
208 | 			}
209 | 
210 | 			if len(matches) > 0 {
211 | 				results[file] = matches
212 | 			}
213 | 		}
214 | 	}
215 | 
216 | 	return results, nil
217 | } 


--------------------------------------------------------------------------------
/go/internal/core/scanner_test.go:
--------------------------------------------------------------------------------
  1 | package core
  2 | 
  3 | import (
  4 | 	"io/ioutil"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | // 测试扫描器创建
 13 | func TestNewScanner(t *testing.T) {
 14 | 	scanner := NewScanner()
 15 | 	assert.NotNil(t, scanner)
 16 | 	assert.False(t, scanner.IsParallel())
 17 | 	assert.False(t, scanner.IsIncremental())
 18 | }
 19 | 
 20 | // 测试设置并行处理
 21 | func TestSetParallel(t *testing.T) {
 22 | 	scanner := NewScanner()
 23 | 	assert.False(t, scanner.IsParallel())
 24 | 	
 25 | 	scanner.SetParallel(true)
 26 | 	assert.True(t, scanner.IsParallel())
 27 | 	
 28 | 	scanner.SetParallel(false)
 29 | 	assert.False(t, scanner.IsParallel())
 30 | }
 31 | 
 32 | // 测试设置增量扫描
 33 | func TestSetIncremental(t *testing.T) {
 34 | 	scanner := NewScanner()
 35 | 	assert.False(t, scanner.IsIncremental())
 36 | 	
 37 | 	scanner.SetIncremental(true)
 38 | 	assert.True(t, scanner.IsIncremental())
 39 | 	
 40 | 	scanner.SetIncremental(false)
 41 | 	assert.False(t, scanner.IsIncremental())
 42 | }
 43 | 
 44 | // 测试注册检测器
 45 | func TestRegisterDetector(t *testing.T) {
 46 | 	scanner := NewScanner()
 47 | 	
 48 | 	// 创建模拟检测器
 49 | 	detector := &mockDetector{}
 50 | 	
 51 | 	// 注册检测器
 52 | 	scanner.RegisterDetector(detector)
 53 | 	
 54 | 	// 检查支持的语言
 55 | 	languages := scanner.SupportedLanguages()
 56 | 	assert.Contains(t, languages, "mock")
 57 | }
 58 | 
 59 | // 测试扫描文件
 60 | func TestScanFile(t *testing.T) {
 61 | 	// 创建临时文件
 62 | 	content := []byte("print(eval('1+1'))")
 63 | 	tmpfile, err := ioutil.TempFile("", "example.py")
 64 | 	assert.NoError(t, err)
 65 | 	defer os.Remove(tmpfile.Name())
 66 | 	
 67 | 	_, err = tmpfile.Write(content)
 68 | 	assert.NoError(t, err)
 69 | 	err = tmpfile.Close()
 70 | 	assert.NoError(t, err)
 71 | 	
 72 | 	// 创建扫描器和模拟检测器
 73 | 	scanner := NewScanner()
 74 | 	detector := &mockDetector{}
 75 | 	scanner.RegisterDetector(detector)
 76 | 	
 77 | 	// 扫描文件
 78 | 	matches, err := scanner.ScanFile(tmpfile.Name())
 79 | 	assert.NoError(t, err)
 80 | 	assert.Len(t, matches, 1)
 81 | 	assert.Equal(t, "MOCK001", matches[0].Signature.ID)
 82 | }
 83 | 
 84 | // 测试扫描目录
 85 | func TestScanDirectory(t *testing.T) {
 86 | 	// 创建临时目录
 87 | 	tmpdir, err := ioutil.TempDir("", "example")
 88 | 	assert.NoError(t, err)
 89 | 	defer os.RemoveAll(tmpdir)
 90 | 	
 91 | 	// 创建测试文件
 92 | 	file1 := filepath.Join(tmpdir, "test1.py")
 93 | 	err = ioutil.WriteFile(file1, []byte("print(eval('1+1'))"), 0644)
 94 | 	assert.NoError(t, err)
 95 | 	
 96 | 	file2 := filepath.Join(tmpdir, "test2.py")
 97 | 	err = ioutil.WriteFile(file2, []byte("print('Hello')"), 0644)
 98 | 	assert.NoError(t, err)
 99 | 	
100 | 	// 创建扫描器和模拟检测器
101 | 	scanner := NewScanner()
102 | 	detector := &mockDetector{}
103 | 	scanner.RegisterDetector(detector)
104 | 	
105 | 	// 扫描目录
106 | 	results, err := scanner.ScanDirectory(tmpdir, nil)
107 | 	assert.NoError(t, err)
108 | 	assert.Len(t, results, 2)
109 | 	
110 | 	// 检查结果
111 | 	assert.Contains(t, results, file1)
112 | 	assert.Contains(t, results, file2)
113 | 	assert.Len(t, results[file1], 1)
114 | 	assert.Len(t, results[file2], 1)
115 | }
116 | 
117 | // 测试生成摘要
118 | func TestGenerateSummary(t *testing.T) {
119 | 	// 创建测试数据
120 | 	results := map[string][]Match{
121 | 		"file1.py": {
122 | 			{
123 | 				Signature: Signature{
124 | 					ID:       "PY001",
125 | 					Name:     "Dangerous eval() usage",
126 | 					Severity: "high",
127 | 				},
128 | 			},
129 | 		},
130 | 		"file2.py": {
131 | 			{
132 | 				Signature: Signature{
133 | 					ID:       "PY002",
134 | 					Name:     "Dangerous exec() usage",
135 | 					Severity: "high",
136 | 				},
137 | 			},
138 | 			{
139 | 				Signature: Signature{
140 | 					ID:       "PY005",
141 | 					Name:     "Insecure random number generation",
142 | 					Severity: "medium",
143 | 				},
144 | 			},
145 | 		},
146 | 		"file3.py": {
147 | 			{
148 | 				Signature: Signature{
149 | 					ID:       "PY008",
150 | 					Name:     "Temporary file creation risk",
151 | 					Severity: "medium",
152 | 				},
153 | 			},
154 | 			{
155 | 				Signature: Signature{
156 | 					ID:       "PY010",
157 | 					Name:     "Debug mode enabled",
158 | 					Severity: "medium",
159 | 				},
160 | 			},
161 | 			{
162 | 				Signature: Signature{
163 | 					ID:       "PY012",
164 | 					Name:     "Bare except block",
165 | 					Severity: "low",
166 | 				},
167 | 			},
168 | 		},
169 | 	}
170 | 	
171 | 	// 生成摘要
172 | 	summary := GenerateSummary(results)
173 | 	
174 | 	// 检查摘要
175 | 	assert.Equal(t, 3, summary.TotalFiles)
176 | 	assert.Equal(t, 2, summary.High)
177 | 	assert.Equal(t, 3, summary.Medium)
178 | 	assert.Equal(t, 1, summary.Low)
179 | 	
180 | 	// 检查漏洞计数
181 | 	assert.Equal(t, 1, summary.Vulnerabilities["Dangerous eval() usage"])
182 | 	assert.Equal(t, 1, summary.Vulnerabilities["Dangerous exec() usage"])
183 | 	assert.Equal(t, 1, summary.Vulnerabilities["Insecure random number generation"])
184 | 	assert.Equal(t, 1, summary.Vulnerabilities["Temporary file creation risk"])
185 | 	assert.Equal(t, 1, summary.Vulnerabilities["Debug mode enabled"])
186 | 	assert.Equal(t, 1, summary.Vulnerabilities["Bare except block"])
187 | }
188 | 
189 | // 模拟检测器
190 | type mockDetector struct{}
191 | 
192 | func (d *mockDetector) Name() string {
193 | 	return "mock"
194 | }
195 | 
196 | func (d *mockDetector) SupportedLanguages() []string {
197 | 	return []string{"mock", "py", "python"}
198 | }
199 | 
200 | func (d *mockDetector) DetectFile(filePath string) ([]Match, error) {
201 | 	return []Match{
202 | 		{
203 | 			Signature: Signature{
204 | 				ID:          "MOCK001",
205 | 				Name:        "Mock vulnerability",
206 | 				Severity:    "high",
207 | 				Description: "This is a mock vulnerability",
208 | 			},
209 | 			FilePath:    filePath,
210 | 			LineNumber:  1,
211 | 			MatchedCode: "mock code",
212 | 			Confidence:  0.9,
213 | 		},
214 | 	}, nil
215 | }
216 | 
217 | func (d *mockDetector) DetectCode(code string, filePath string) ([]Match, error) {
218 | 	return []Match{
219 | 		{
220 | 			Signature: Signature{
221 | 				ID:          "MOCK001",
222 | 				Name:        "Mock vulnerability",
223 | 				Severity:    "high",
224 | 				Description: "This is a mock vulnerability",
225 | 			},
226 | 			FilePath:    filePath,
227 | 			LineNumber:  1,
228 | 			MatchedCode: code,
229 | 			Confidence:  0.9,
230 | 		},
231 | 	}, nil
232 | } 


--------------------------------------------------------------------------------
/go/internal/detectors/tests/detector_test.go:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/go/internal/detectors/vulnerability.go:
--------------------------------------------------------------------------------
  1 | package detectors
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"regexp"
  9 | 	"strings"
 10 | 	"sync"
 11 | 
 12 | 	"github.com/dave/dst"
 13 | 	"github.com/dave/dst/decorator"
 14 | )
 15 | 
 16 | // Signature 表示漏洞签名
 17 | type Signature struct {
 18 | 	ID           string   `json:"id"`
 19 | 	Name         string   `json:"name"`
 20 | 	Severity     string   `json:"severity"`
 21 | 	CodePatterns []string `json:"code_patterns"`
 22 | }
 23 | 
 24 | // Match 表示漏洞匹配结果
 25 | type Match struct {
 26 | 	Signature   Signature
 27 | 	LineNumber  int
 28 | 	MatchedCode string
 29 | 	Confidence  float64
 30 | }
 31 | 
 32 | // VulnerabilityDetector 漏洞检测器
 33 | type VulnerabilityDetector struct {
 34 | 	signatures    []Signature
 35 | 	minConfidence float64
 36 | 	mu           sync.RWMutex
 37 | }
 38 | 
 39 | // NewVulnerabilityDetector 创建新的漏洞检测器
 40 | func NewVulnerabilityDetector() *VulnerabilityDetector {
 41 | 	return &VulnerabilityDetector{
 42 | 		minConfidence: 0.7,
 43 | 	}
 44 | }
 45 | 
 46 | // LoadSignatures 从JSON文件加载漏洞签名
 47 | func (d *VulnerabilityDetector) LoadSignatures(signatureFile string) error {
 48 | 	data, err := ioutil.ReadFile(signatureFile)
 49 | 	if err != nil {
 50 | 		return fmt.Errorf("读取签名文件失败: %v", err)
 51 | 	}
 52 | 
 53 | 	var sigData struct {
 54 | 		Signatures []Signature `json:"signatures"`
 55 | 	}
 56 | 
 57 | 	if err := json.Unmarshal(data, &sigData); err != nil {
 58 | 		return fmt.Errorf("解析签名文件失败: %v", err)
 59 | 	}
 60 | 
 61 | 	d.mu.Lock()
 62 | 	d.signatures = sigData.Signatures
 63 | 	d.mu.Unlock()
 64 | 
 65 | 	return nil
 66 | }
 67 | 
 68 | // DetectFile 检测文件中的漏洞
 69 | func (d *VulnerabilityDetector) DetectFile(filePath string) ([]Match, error) {
 70 | 	content, err := ioutil.ReadFile(filePath)
 71 | 	if err != nil {
 72 | 		return nil, fmt.Errorf("读取文件失败: %v", err)
 73 | 	}
 74 | 
 75 | 	matches := make([]Match, 0)
 76 | 	d.mu.RLock()
 77 | 	signatures := d.signatures
 78 | 	d.mu.RUnlock()
 79 | 
 80 | 	// 使用goroutine并行处理每个签名
 81 | 	var wg sync.WaitGroup
 82 | 	matchChan := make(chan Match)
 83 | 	done := make(chan bool)
 84 | 
 85 | 	// 启动收集结果的goroutine
 86 | 	go func() {
 87 | 		for match := range matchChan {
 88 | 			matches = append(matches, match)
 89 | 		}
 90 | 		done <- true
 91 | 	}()
 92 | 
 93 | 	for _, sig := range signatures {
 94 | 		wg.Add(1)
 95 | 		go func(signature Signature) {
 96 | 			defer wg.Done()
 97 | 			for _, pattern := range signature.CodePatterns {
 98 | 				re, err := regexp.Compile(pattern)
 99 | 				if err != nil {
100 | 					continue
101 | 				}
102 | 
103 | 				// 查找所有匹配
104 | 				for _, match := range re.FindAllStringIndex(string(content), -1) {
105 | 					matchedCode := string(content[match[0]:match[1]])
106 | 					confidence := d.calculateConfidence(matchedCode, pattern)
107 | 
108 | 					if confidence >= d.minConfidence {
109 | 						// 计算行号
110 | 						lineNumber := 1 + strings.Count(string(content[:match[0]]), "\n")
111 | 						matchChan <- Match{
112 | 							Signature:   signature,
113 | 							LineNumber:  lineNumber,
114 | 							MatchedCode: matchedCode,
115 | 							Confidence:  confidence,
116 | 						}
117 | 					}
118 | 				}
119 | 			}
120 | 		}(sig)
121 | 	}
122 | 
123 | 	// 等待所有goroutine完成并关闭通道
124 | 	go func() {
125 | 		wg.Wait()
126 | 		close(matchChan)
127 | 	}()
128 | 
129 | 	<-done
130 | 	return matches, nil
131 | }
132 | 
133 | // AnalyzeAST 分析AST节点中的漏洞
134 | func (d *VulnerabilityDetector) AnalyzeAST(filePath string) ([]Match, error) {
135 | 	fset, node, err := decorator.ParseFile(filePath, nil)
136 | 	if err != nil {
137 | 		return nil, fmt.Errorf("解析文件失败: %v", err)
138 | 	}
139 | 
140 | 	matches := make([]Match, 0)
141 | 	d.mu.RLock()
142 | 	signatures := d.signatures
143 | 	d.mu.RUnlock()
144 | 
145 | 	// 遍历AST
146 | 	dst.Inspect(node, func(n dst.Node) bool {
147 | 		if call, ok := n.(*dst.CallExpr); ok {
148 | 			var funcName string
149 | 			switch fun := call.Fun.(type) {
150 | 			case *dst.Ident:
151 | 				funcName = fun.Name
152 | 			case *dst.SelectorExpr:
153 | 				if x, ok := fun.X.(*dst.Ident); ok {
154 | 					funcName = x.Name + "." + fun.Sel.Name
155 | 				}
156 | 			default:
157 | 				return true
158 | 			}
159 | 
160 | 			// 检查是否匹配任何签名
161 | 			for _, sig := range signatures {
162 | 				for _, pattern := range sig.CodePatterns {
163 | 					if matched, _ := regexp.MatchString(pattern, funcName); matched {
164 | 						matches = append(matches, Match{
165 | 							Signature:   sig,
166 | 							LineNumber:  fset.Position(call.Pos()).Line,
167 | 							MatchedCode: funcName,
168 | 							Confidence:  0.9,
169 | 						})
170 | 					}
171 | 				}
172 | 			}
173 | 		}
174 | 		return true
175 | 	})
176 | 
177 | 	return matches, nil
178 | }
179 | 
180 | // DetectSimilarPatterns 检测相似的漏洞模式
181 | func (d *VulnerabilityDetector) DetectSimilarPatterns(filePath string, threshold float64) ([]Match, error) {
182 | 	fset, node, err := decorator.ParseFile(filePath, nil)
183 | 	if err != nil {
184 | 		return nil, fmt.Errorf("解析文件失败: %v", err)
185 | 	}
186 | 
187 | 	matches := make([]Match, 0)
188 | 	d.mu.RLock()
189 | 	signatures := d.signatures
190 | 	d.mu.RUnlock()
191 | 
192 | 	// 遍历AST查找相似模式
193 | 	dst.Inspect(node, func(n dst.Node) bool {
194 | 		if call, ok := n.(*dst.CallExpr); ok {
195 | 			var funcName string
196 | 			switch fun := call.Fun.(type) {
197 | 			case *dst.Ident:
198 | 				funcName = fun.Name
199 | 			case *dst.SelectorExpr:
200 | 				if x, ok := fun.X.(*dst.Ident); ok {
201 | 					funcName = x.Name + "." + fun.Sel.Name
202 | 				}
203 | 			default:
204 | 				return true
205 | 			}
206 | 
207 | 			// 检查每个签名
208 | 			for _, sig := range signatures {
209 | 				for _, pattern := range sig.CodePatterns {
210 | 					similarity := d.calculateSimilarity(funcName, pattern)
211 | 					if similarity >= threshold {
212 | 						matches = append(matches, Match{
213 | 							Signature:   sig,
214 | 							LineNumber:  fset.Position(call.Pos()).Line,
215 | 							MatchedCode: funcName,
216 | 							Confidence:  similarity,
217 | 						})
218 | 					}
219 | 				}
220 | 			}
221 | 		}
222 | 		return true
223 | 	})
224 | 
225 | 	return matches, nil
226 | }
227 | 
228 | // calculateConfidence 计算匹配的置信度
229 | func (d *VulnerabilityDetector) calculateConfidence(matchedCode, pattern string) float64 {
230 | 	// 基本匹配的置信度为0.7
231 | 	confidence := 0.7
232 | 
233 | 	// 根据匹配的完整性增加置信度
234 | 	if len(matchedCode) > 10 {
235 | 		confidence += 0.1
236 | 	}
237 | 
238 | 	// 根据上下文增加置信度
239 | 	if strings.Contains(matchedCode, "import") {
240 | 		confidence += 0.1
241 | 	}
242 | 
243 | 	// 根据模式的特异性增加置信度
244 | 	if len(pattern) > 20 {
245 | 		confidence += 0.1
246 | 	}
247 | 
248 | 	if confidence > 1.0 {
249 | 		confidence = 1.0
250 | 	}
251 | 	return confidence
252 | }
253 | 
254 | // calculateSimilarity 计算两个字符串的相似度
255 | func (d *VulnerabilityDetector) calculateSimilarity(str1, str2 string) float64 {
256 | 	// 使用最长公共子序列(LCS)计算相似度
257 | 	m, n := len(str1), len(str2)
258 | 	dp := make([][]int, m+1)
259 | 	for i := range dp {
260 | 		dp[i] = make([]int, n+1)
261 | 	}
262 | 
263 | 	for i := 1; i <= m; i++ {
264 | 		for j := 1; j <= n; j++ {
265 | 			if str1[i-1] == str2[j-1] {
266 | 				dp[i][j] = dp[i-1][j-1] + 1
267 | 			} else {
268 | 				dp[i][j] = max(dp[i-1][j], dp[i][j-1])
269 | 			}
270 | 		}
271 | 	}
272 | 
273 | 	lcsLength := dp[m][n]
274 | 	maxLen := max(m, n)
275 | 	if maxLen == 0 {
276 | 		return 0
277 | 	}
278 | 	return float64(lcsLength) / float64(maxLen)
279 | }
280 | 
281 | // max 返回两个整数中的较大值
282 | func max(a, b int) int {
283 | 	if a > b {
284 | 		return a
285 | 	}
286 | 	return b
287 | } 


--------------------------------------------------------------------------------
/go/internal/reporters/json.go:
--------------------------------------------------------------------------------
 1 | package reporters
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"os"
 6 | 	"path/filepath"
 7 | 
 8 | 	"github.com/re-movery/re-movery/internal/core"
 9 | )
10 | 
11 | // JSONReporter is a reporter that generates JSON reports
12 | type JSONReporter struct{}
13 | 
14 | // NewJSONReporter creates a new JSON reporter
15 | func NewJSONReporter() *JSONReporter {
16 | 	return &JSONReporter{}
17 | }
18 | 
19 | // GenerateReport generates a report
20 | func (r *JSONReporter) GenerateReport(data core.ReportData, outputPath string) error {
21 | 	// Create output directory if it doesn't exist
22 | 	outputDir := filepath.Dir(outputPath)
23 | 	if err := os.MkdirAll(outputDir, 0755); err != nil {
24 | 		return err
25 | 	}
26 | 
27 | 	// Create output file
28 | 	file, err := os.Create(outputPath)
29 | 	if err != nil {
30 | 		return err
31 | 	}
32 | 	defer file.Close()
33 | 
34 | 	// Marshal data to JSON
35 | 	encoder := json.NewEncoder(file)
36 | 	encoder.SetIndent("", "  ")
37 | 	if err := encoder.Encode(data); err != nil {
38 | 		return err
39 | 	}
40 | 
41 | 	return nil
42 | } 


--------------------------------------------------------------------------------
/go/internal/reporters/xml.go:
--------------------------------------------------------------------------------
  1 | package reporters
  2 | 
  3 | import (
  4 | 	"encoding/xml"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 
  8 | 	"github.com/re-movery/re-movery/internal/core"
  9 | )
 10 | 
 11 | // XMLReporter is a reporter that generates XML reports
 12 | type XMLReporter struct{}
 13 | 
 14 | // NewXMLReporter creates a new XML reporter
 15 | func NewXMLReporter() *XMLReporter {
 16 | 	return &XMLReporter{}
 17 | }
 18 | 
 19 | // XMLReportData is the XML representation of the report data
 20 | type XMLReportData struct {
 21 | 	XMLName   xml.Name        `xml:"report"`
 22 | 	Title     string          `xml:"title"`
 23 | 	Timestamp string          `xml:"timestamp"`
 24 | 	Summary   XMLSummary      `xml:"summary"`
 25 | 	Results   []XMLFileResult `xml:"results>file"`
 26 | }
 27 | 
 28 | // XMLSummary is the XML representation of the summary
 29 | type XMLSummary struct {
 30 | 	TotalFiles int `xml:"totalFiles,attr"`
 31 | 	High       int `xml:"high,attr"`
 32 | 	Medium     int `xml:"medium,attr"`
 33 | 	Low        int `xml:"low,attr"`
 34 | }
 35 | 
 36 | // XMLFileResult is the XML representation of a file result
 37 | type XMLFileResult struct {
 38 | 	Path    string      `xml:"path,attr"`
 39 | 	Matches []XMLMatch  `xml:"match"`
 40 | }
 41 | 
 42 | // XMLMatch is the XML representation of a match
 43 | type XMLMatch struct {
 44 | 	ID          string  `xml:"id,attr"`
 45 | 	Name        string  `xml:"name"`
 46 | 	Severity    string  `xml:"severity"`
 47 | 	Description string  `xml:"description"`
 48 | 	LineNumber  int     `xml:"lineNumber"`
 49 | 	MatchedCode string  `xml:"matchedCode"`
 50 | 	Confidence  float64 `xml:"confidence"`
 51 | }
 52 | 
 53 | // GenerateReport generates a report
 54 | func (r *XMLReporter) GenerateReport(data core.ReportData, outputPath string) error {
 55 | 	// Create output directory if it doesn't exist
 56 | 	outputDir := filepath.Dir(outputPath)
 57 | 	if err := os.MkdirAll(outputDir, 0755); err != nil {
 58 | 		return err
 59 | 	}
 60 | 
 61 | 	// Create output file
 62 | 	file, err := os.Create(outputPath)
 63 | 	if err != nil {
 64 | 		return err
 65 | 	}
 66 | 	defer file.Close()
 67 | 
 68 | 	// Convert data to XML format
 69 | 	xmlData := r.convertToXML(data)
 70 | 
 71 | 	// Write XML header
 72 | 	file.WriteString(xml.Header)
 73 | 
 74 | 	// Marshal data to XML
 75 | 	encoder := xml.NewEncoder(file)
 76 | 	encoder.Indent("", "  ")
 77 | 	if err := encoder.Encode(xmlData); err != nil {
 78 | 		return err
 79 | 	}
 80 | 
 81 | 	return nil
 82 | }
 83 | 
 84 | // convertToXML converts the report data to XML format
 85 | func (r *XMLReporter) convertToXML(data core.ReportData) XMLReportData {
 86 | 	xmlData := XMLReportData{
 87 | 		Title:     data.Title,
 88 | 		Timestamp: data.Timestamp,
 89 | 		Summary: XMLSummary{
 90 | 			TotalFiles: data.Summary.TotalFiles,
 91 | 			High:       data.Summary.High,
 92 | 			Medium:     data.Summary.Medium,
 93 | 			Low:        data.Summary.Low,
 94 | 		},
 95 | 		Results: []XMLFileResult{},
 96 | 	}
 97 | 
 98 | 	// Convert results
 99 | 	for filePath, matches := range data.Results {
100 | 		fileResult := XMLFileResult{
101 | 			Path:    filePath,
102 | 			Matches: []XMLMatch{},
103 | 		}
104 | 
105 | 		for _, match := range matches {
106 | 			xmlMatch := XMLMatch{
107 | 				ID:          match.Signature.ID,
108 | 				Name:        match.Signature.Name,
109 | 				Severity:    match.Signature.Severity,
110 | 				Description: match.Signature.Description,
111 | 				LineNumber:  match.LineNumber,
112 | 				MatchedCode: match.MatchedCode,
113 | 				Confidence:  match.Confidence,
114 | 			}
115 | 			fileResult.Matches = append(fileResult.Matches, xmlMatch)
116 | 		}
117 | 
118 | 		xmlData.Results = append(xmlData.Results, fileResult)
119 | 	}
120 | 
121 | 	return xmlData
122 | } 


--------------------------------------------------------------------------------
/go/internal/utils/logging.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 |     "io"
 5 |     "os"
 6 |     "sync"
 7 | 
 8 |     "github.com/sirupsen/logrus"
 9 | )
10 | 
11 | var (
12 |     logger *logrus.Logger
13 |     once   sync.Once
14 | )
15 | 
16 | // GetLogger returns the singleton logger instance
17 | func GetLogger() *logrus.Logger {
18 |     once.Do(func() {
19 |         logger = logrus.New()
20 |         logger.SetFormatter(&logrus.TextFormatter{
21 |             FullTimestamp: true,
22 |         })
23 |         logger.SetOutput(os.Stdout)
24 |         logger.SetLevel(logrus.InfoLevel)
25 |     })
26 |     return logger
27 | }
28 | 
29 | // FileLogger represents a logger that writes to a file
30 | type FileLogger struct {
31 |     *logrus.Logger
32 |     file *os.File
33 | }
34 | 
35 | // NewFileLogger creates a new file logger
36 | func NewFileLogger(filename string) (*FileLogger, error) {
37 |     file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
38 |     if err != nil {
39 |         return nil, err
40 |     }
41 | 
42 |     logger := logrus.New()
43 |     logger.SetFormatter(&logrus.JSONFormatter{})
44 |     logger.SetOutput(io.MultiWriter(file, os.Stdout))
45 | 
46 |     return &FileLogger{
47 |         Logger: logger,
48 |         file:   file,
49 |     }, nil
50 | }
51 | 
52 | // Close closes the log file
53 | func (fl *FileLogger) Close() error {
54 |     if fl.file != nil {
55 |         return fl.file.Close()
56 |     }
57 |     return nil
58 | }
59 | 
60 | // SetVerbosity sets the logging level based on verbosity
61 | func SetVerbosity(verbose bool) {
62 |     if verbose {
63 |         GetLogger().SetLevel(logrus.DebugLevel)
64 |     } else {
65 |         GetLogger().SetLevel(logrus.InfoLevel)
66 |     }
67 | } 


--------------------------------------------------------------------------------
/go/internal/utils/memory.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 |     "container/list"
  5 |     "runtime"
  6 |     "sync"
  7 |     "time"
  8 | 
  9 |     "github.com/shirou/gopsutil/v3/mem"
 10 | )
 11 | 
 12 | // MemoryMonitor monitors system memory usage
 13 | type MemoryMonitor struct {
 14 |     maxMemoryGB float64
 15 |     interval    time.Duration
 16 |     stopChan    chan struct{}
 17 | }
 18 | 
 19 | // NewMemoryMonitor creates a new memory monitor
 20 | func NewMemoryMonitor(maxMemoryGB float64, interval time.Duration) *MemoryMonitor {
 21 |     return &MemoryMonitor{
 22 |         maxMemoryGB: maxMemoryGB,
 23 |         interval:    interval,
 24 |         stopChan:    make(chan struct{}),
 25 |     }
 26 | }
 27 | 
 28 | // Start starts monitoring memory usage
 29 | func (mm *MemoryMonitor) Start() {
 30 |     go func() {
 31 |         ticker := time.NewTicker(mm.interval)
 32 |         defer ticker.Stop()
 33 | 
 34 |         for {
 35 |             select {
 36 |             case <-ticker.C:
 37 |                 v, err := mem.VirtualMemory()
 38 |                 if err != nil {
 39 |                     GetLogger().Errorf("Failed to get memory stats: %v", err)
 40 |                     continue
 41 |                 }
 42 | 
 43 |                 usedGB := float64(v.Used) / (1024 * 1024 * 1024)
 44 |                 if usedGB > mm.maxMemoryGB {
 45 |                     GetLogger().Warnf("Memory usage (%.2f GB) exceeds limit (%.2f GB), triggering GC", usedGB, mm.maxMemoryGB)
 46 |                     runtime.GC()
 47 |                 }
 48 |             case <-mm.stopChan:
 49 |                 return
 50 |             }
 51 |         }
 52 |     }()
 53 | }
 54 | 
 55 | // Stop stops the memory monitor
 56 | func (mm *MemoryMonitor) Stop() {
 57 |     close(mm.stopChan)
 58 | }
 59 | 
 60 | // LRUCache implements a thread-safe LRU cache
 61 | type LRUCache struct {
 62 |     capacity int
 63 |     cache    map[interface{}]*list.Element
 64 |     ll       *list.List
 65 |     mutex    sync.RWMutex
 66 | }
 67 | 
 68 | type entry struct {
 69 |     key   interface{}
 70 |     value interface{}
 71 | }
 72 | 
 73 | // NewLRUCache creates a new LRU cache with the specified capacity
 74 | func NewLRUCache(capacity int) *LRUCache {
 75 |     return &LRUCache{
 76 |         capacity: capacity,
 77 |         cache:    make(map[interface{}]*list.Element),
 78 |         ll:       list.New(),
 79 |     }
 80 | }
 81 | 
 82 | // Get retrieves a value from the cache
 83 | func (c *LRUCache) Get(key interface{}) (interface{}, bool) {
 84 |     c.mutex.RLock()
 85 |     defer c.mutex.RUnlock()
 86 | 
 87 |     if elem, ok := c.cache[key]; ok {
 88 |         c.ll.MoveToFront(elem)
 89 |         return elem.Value.(*entry).value, true
 90 |     }
 91 |     return nil, false
 92 | }
 93 | 
 94 | // Put adds a value to the cache
 95 | func (c *LRUCache) Put(key, value interface{}) {
 96 |     c.mutex.Lock()
 97 |     defer c.mutex.Unlock()
 98 | 
 99 |     if elem, ok := c.cache[key]; ok {
100 |         c.ll.MoveToFront(elem)
101 |         elem.Value.(*entry).value = value
102 |         return
103 |     }
104 | 
105 |     if c.ll.Len() >= c.capacity {
106 |         oldest := c.ll.Back()
107 |         if oldest != nil {
108 |             c.ll.Remove(oldest)
109 |             delete(c.cache, oldest.Value.(*entry).key)
110 |         }
111 |     }
112 | 
113 |     elem := c.ll.PushFront(&entry{key, value})
114 |     c.cache[key] = elem
115 | } 


--------------------------------------------------------------------------------
/go/internal/utils/parallel.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 |     "sync"
 5 | )
 6 | 
 7 | // Job represents a unit of work
 8 | type Job interface {
 9 |     Execute() error
10 | }
11 | 
12 | // WorkerPool manages a pool of workers for parallel processing
13 | type WorkerPool struct {
14 |     numWorkers int
15 |     jobs       chan Job
16 |     results    chan error
17 |     wg         sync.WaitGroup
18 |     stopChan   chan struct{}
19 | }
20 | 
21 | // NewWorkerPool creates a new worker pool
22 | func NewWorkerPool(numWorkers int, queueSize int) *WorkerPool {
23 |     return &WorkerPool{
24 |         numWorkers: numWorkers,
25 |         jobs:       make(chan Job, queueSize),
26 |         results:    make(chan error, queueSize),
27 |         stopChan:   make(chan struct{}),
28 |     }
29 | }
30 | 
31 | // Start starts the worker pool
32 | func (wp *WorkerPool) Start() {
33 |     for i := 0; i < wp.numWorkers; i++ {
34 |         wp.wg.Add(1)
35 |         go wp.worker()
36 |     }
37 | }
38 | 
39 | // worker processes jobs from the job queue
40 | func (wp *WorkerPool) worker() {
41 |     defer wp.wg.Done()
42 | 
43 |     for {
44 |         select {
45 |         case job := <-wp.jobs:
46 |             if job == nil {
47 |                 return
48 |             }
49 |             err := job.Execute()
50 |             wp.results <- err
51 |         case <-wp.stopChan:
52 |             return
53 |         }
54 |     }
55 | }
56 | 
57 | // Submit submits a job to the worker pool
58 | func (wp *WorkerPool) Submit(job Job) {
59 |     wp.jobs <- job
60 | }
61 | 
62 | // Stop stops the worker pool
63 | func (wp *WorkerPool) Stop() {
64 |     close(wp.stopChan)
65 |     wp.wg.Wait()
66 |     close(wp.jobs)
67 |     close(wp.results)
68 | }
69 | 
70 | // Results returns the results channel
71 | func (wp *WorkerPool) Results() <-chan error {
72 |     return wp.results
73 | } 


--------------------------------------------------------------------------------
/go/internal/utils/security_test.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 	"time"
  7 | )
  8 | 
  9 | func TestNewSecurityChecker(t *testing.T) {
 10 | 	checker := NewSecurityChecker()
 11 | 	if checker == nil {
 12 | 		t.Error("NewSecurityChecker返回了nil")
 13 | 	}
 14 | 
 15 | 	if len(checker.sensitivePatterns) == 0 {
 16 | 		t.Error("敏感模式映射为空")
 17 | 	}
 18 | 
 19 | 	expectedPatterns := []string{"file_access", "network_access", "code_execution", "input_validation", "random_generation", "sensitive_data"}
 20 | 	for _, pattern := range expectedPatterns {
 21 | 		if patterns, ok := checker.sensitivePatterns[pattern]; !ok || len(patterns) == 0 {
 22 | 			t.Errorf("缺少预期的模式类型: %s", pattern)
 23 | 		}
 24 | 	}
 25 | }
 26 | 
 27 | func createTestFile(content string) (string, error) {
 28 | 	tmpfile, err := os.CreateTemp("", "test_*.go")
 29 | 	if err != nil {
 30 | 		return "", err
 31 | 	}
 32 | 
 33 | 	if _, err := tmpfile.Write([]byte(content)); err != nil {
 34 | 		os.Remove(tmpfile.Name())
 35 | 		return "", err
 36 | 	}
 37 | 
 38 | 	if err := tmpfile.Close(); err != nil {
 39 | 		os.Remove(tmpfile.Name())
 40 | 		return "", err
 41 | 	}
 42 | 
 43 | 	return tmpfile.Name(), nil
 44 | }
 45 | 
 46 | func TestCheckMemoryUsage(t *testing.T) {
 47 | 	checker := NewSecurityChecker()
 48 | 	content := `package main
 49 | 
 50 | import "fmt"
 51 | 
 52 | func main() {
 53 | 	var arr []int
 54 | 	for i := 0; i < 1000; i++ {
 55 | 		arr = append(arr, i)
 56 | 	}
 57 | 	fmt.Println(arr)
 58 | }`
 59 | 
 60 | 	filename, err := createTestFile(content)
 61 | 	if err != nil {
 62 | 		t.Fatalf("创建测试文件失败: %v", err)
 63 | 	}
 64 | 	defer os.Remove(filename)
 65 | 
 66 | 	usage, err := checker.CheckMemoryUsage(filename)
 67 | 	if err != nil {
 68 | 		t.Errorf("检查内存使用失败: %v", err)
 69 | 	}
 70 | 
 71 | 	if usage == 0 {
 72 | 		t.Error("内存使用量不应为0")
 73 | 	}
 74 | }
 75 | 
 76 | func TestCheckExecutionTime(t *testing.T) {
 77 | 	checker := NewSecurityChecker()
 78 | 	content := `package main
 79 | 
 80 | import "time"
 81 | 
 82 | func main() {
 83 | 	time.Sleep(time.Second)
 84 | }`
 85 | 
 86 | 	filename, err := createTestFile(content)
 87 | 	if err != nil {
 88 | 		t.Fatalf("创建测试文件失败: %v", err)
 89 | 	}
 90 | 	defer os.Remove(filename)
 91 | 
 92 | 	// 测试正常超时
 93 | 	err = checker.CheckExecutionTime(filename, 5*time.Second)
 94 | 	if err != nil {
 95 | 		t.Errorf("执行时间检查失败: %v", err)
 96 | 	}
 97 | 
 98 | 	// 测试超时情况
 99 | 	err = checker.CheckExecutionTime(filename, 1*time.Millisecond)
100 | 	if err == nil {
101 | 		t.Error("预期应该发生超时错误")
102 | 	}
103 | }
104 | 
105 | func TestCheckFileAccess(t *testing.T) {
106 | 	checker := NewSecurityChecker()
107 | 	content := `package main
108 | 
109 | import (
110 | 	"os"
111 | 	"io/ioutil"
112 | )
113 | 
114 | func main() {
115 | 	os.Open("test.txt")
116 | 	ioutil.ReadFile("config.json")
117 | }`
118 | 
119 | 	filename, err := createTestFile(content)
120 | 	if err != nil {
121 | 		t.Fatalf("创建测试文件失败: %v", err)
122 | 	}
123 | 	defer os.Remove(filename)
124 | 
125 | 	violations, err := checker.CheckFileAccess(filename)
126 | 	if err != nil {
127 | 		t.Errorf("文件访问检查失败: %v", err)
128 | 	}
129 | 
130 | 	if len(violations) == 0 {
131 | 		t.Error("应该检测到文件访问违规")
132 | 	}
133 | }
134 | 
135 | func TestCheckNetworkAccess(t *testing.T) {
136 | 	checker := NewSecurityChecker()
137 | 	content := `package main
138 | 
139 | import (
140 | 	"net"
141 | 	"net/http"
142 | )
143 | 
144 | func main() {
145 | 	net.Dial("tcp", "localhost:8080")
146 | 	http.Get("http://example.com")
147 | }`
148 | 
149 | 	filename, err := createTestFile(content)
150 | 	if err != nil {
151 | 		t.Fatalf("创建测试文件失败: %v", err)
152 | 	}
153 | 	defer os.Remove(filename)
154 | 
155 | 	violations, err := checker.CheckNetworkAccess(filename)
156 | 	if err != nil {
157 | 		t.Errorf("网络访问检查失败: %v", err)
158 | 	}
159 | 
160 | 	if len(violations) == 0 {
161 | 		t.Error("应该检测到网络访问违规")
162 | 	}
163 | }
164 | 
165 | func TestCheckInputValidation(t *testing.T) {
166 | 	checker := NewSecurityChecker()
167 | 	content := `package main
168 | 
169 | import (
170 | 	"fmt"
171 | 	"bufio"
172 | 	"os"
173 | )
174 | 
175 | func main() {
176 | 	var input string
177 | 	fmt.Scanln(&input)
178 | 	scanner := bufio.NewScanner(os.Stdin)
179 | }`
180 | 
181 | 	filename, err := createTestFile(content)
182 | 	if err != nil {
183 | 		t.Fatalf("创建测试文件失败: %v", err)
184 | 	}
185 | 	defer os.Remove(filename)
186 | 
187 | 	issues, err := checker.CheckInputValidation(filename)
188 | 	if err != nil {
189 | 		t.Errorf("输入验证检查失败: %v", err)
190 | 	}
191 | 
192 | 	if len(issues) == 0 {
193 | 		t.Error("应该检测到未验证的输入")
194 | 	}
195 | }
196 | 
197 | func TestCheckRandomGeneration(t *testing.T) {
198 | 	checker := NewSecurityChecker()
199 | 	content := `package main
200 | 
201 | import (
202 | 	"math/rand"
203 | 	"crypto/rand"
204 | )
205 | 
206 | func main() {
207 | 	rand.Int()
208 | 	rand.Read(make([]byte, 32))
209 | }`
210 | 
211 | 	filename, err := createTestFile(content)
212 | 	if err != nil {
213 | 		t.Fatalf("创建测试文件失败: %v", err)
214 | 	}
215 | 	defer os.Remove(filename)
216 | 
217 | 	issues, err := checker.CheckRandomGeneration(filename)
218 | 	if err != nil {
219 | 		t.Errorf("随机数生成检查失败: %v", err)
220 | 	}
221 | 
222 | 	if len(issues) == 0 {
223 | 		t.Error("应该检测到不安全的随机数生成")
224 | 	}
225 | }
226 | 
227 | func TestCheckSensitiveData(t *testing.T) {
228 | 	checker := NewSecurityChecker()
229 | 	content := `package main
230 | 
231 | import "fmt"
232 | 
233 | func main() {
234 | 	password := "secret123"
235 | 	fmt.Printf("Password: %s\n", password)
236 | }`
237 | 
238 | 	filename, err := createTestFile(content)
239 | 	if err != nil {
240 | 		t.Fatalf("创建测试文件失败: %v", err)
241 | 	}
242 | 	defer os.Remove(filename)
243 | 
244 | 	issues, err := checker.CheckSensitiveData(filename)
245 | 	if err != nil {
246 | 		t.Errorf("敏感数据检查失败: %v", err)
247 | 	}
248 | 
249 | 	if len(issues) == 0 {
250 | 		t.Error("应该检测到敏感数据泄露风险")
251 | 	}
252 | }
253 | 
254 | func TestCheckSandboxEscape(t *testing.T) {
255 | 	checker := NewSecurityChecker()
256 | 	content := `package main
257 | 
258 | import (
259 | 	"os"
260 | 	"os/exec"
261 | )
262 | 
263 | func main() {
264 | 	os.Remove("test.txt")
265 | 	exec.Command("ls").Run()
266 | }`
267 | 
268 | 	filename, err := createTestFile(content)
269 | 	if err != nil {
270 | 		t.Fatalf("创建测试文件失败: %v", err)
271 | 	}
272 | 	defer os.Remove(filename)
273 | 
274 | 	violations, err := checker.CheckSandboxEscape(filename)
275 | 	if err != nil {
276 | 		t.Errorf("沙箱逃逸检查失败: %v", err)
277 | 	}
278 | 
279 | 	if len(violations) == 0 {
280 | 		t.Error("应该检测到沙箱逃逸风险")
281 | 	}
282 | }
283 | 
284 | func TestPerformFullCheck(t *testing.T) {
285 | 	checker := NewSecurityChecker()
286 | 	content := `package main
287 | 
288 | import (
289 | 	"fmt"
290 | 	"os"
291 | 	"net/http"
292 | 	"math/rand"
293 | )
294 | 
295 | func main() {
296 | 	password := "secret123"
297 | 	os.Open("test.txt")
298 | 	http.Get("http://example.com")
299 | 	rand.Int()
300 | 	fmt.Printf("Password: %s\n", password)
301 | }`
302 | 
303 | 	filename, err := createTestFile(content)
304 | 	if err != nil {
305 | 		t.Fatalf("创建测试文件失败: %v", err)
306 | 	}
307 | 	defer os.Remove(filename)
308 | 
309 | 	results, err := checker.PerformFullCheck(filename)
310 | 	if err != nil {
311 | 		t.Errorf("完整检查失败: %v", err)
312 | 	}
313 | 
314 | 	expectedChecks := []string{
315 | 		"memory_usage",
316 | 		"execution_time",
317 | 		"file_access",
318 | 		"network_access",
319 | 		"input_validation",
320 | 		"random_generation",
321 | 		"sensitive_data",
322 | 		"sandbox_escape",
323 | 	}
324 | 
325 | 	for _, check := range expectedChecks {
326 | 		if _, ok := results[check]; !ok {
327 | 			t.Errorf("缺少检查结果: %s", check)
328 | 		}
329 | 	}
330 | } 


--------------------------------------------------------------------------------
/go/internal/web/app.go:
--------------------------------------------------------------------------------
  1 | package web
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"html/template"
  6 | 	"io/ioutil"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"time"
 11 | 
 12 | 	"github.com/gin-gonic/gin"
 13 | 	"github.com/re-movery/re-movery/internal/core"
 14 | 	"github.com/re-movery/re-movery/internal/detectors"
 15 | )
 16 | 
 17 | // App is the web application
 18 | type App struct {
 19 | 	scanner *core.Scanner
 20 | 	router  *gin.Engine
 21 | }
 22 | 
 23 | // NewApp creates a new web application
 24 | func NewApp() *App {
 25 | 	app := &App{
 26 | 		scanner: core.NewScanner(),
 27 | 		router:  gin.Default(),
 28 | 	}
 29 | 
 30 | 	// Register detectors
 31 | 	app.scanner.RegisterDetector(detectors.NewPythonDetector())
 32 | 	app.scanner.RegisterDetector(detectors.NewJavaScriptDetector())
 33 | 
 34 | 	// Setup routes
 35 | 	app.setupRoutes()
 36 | 
 37 | 	return app
 38 | }
 39 | 
 40 | // setupRoutes sets up the routes for the web application
 41 | func (a *App) setupRoutes() {
 42 | 	// Serve static files
 43 | 	a.router.Static("/static", "./static")
 44 | 
 45 | 	// Load templates
 46 | 	a.router.LoadHTMLGlob("templates/*")
 47 | 
 48 | 	// Routes
 49 | 	a.router.GET("/", a.indexHandler)
 50 | 	a.router.POST("/scan/file", a.scanFileHandler)
 51 | 	a.router.POST("/scan/directory", a.scanDirectoryHandler)
 52 | 	a.router.GET("/api/languages", a.languagesHandler)
 53 | 	a.router.GET("/health", a.healthHandler)
 54 | }
 55 | 
 56 | // Run runs the web application
 57 | func (a *App) Run(host string, port int) error {
 58 | 	return a.router.Run(fmt.Sprintf("%s:%d", host, port))
 59 | }
 60 | 
 61 | // indexHandler handles the index page
 62 | func (a *App) indexHandler(c *gin.Context) {
 63 | 	c.HTML(http.StatusOK, "index.html", gin.H{
 64 | 		"title": "Re-movery - Security Scanner",
 65 | 	})
 66 | }
 67 | 
 68 | // scanFileHandler handles file scanning
 69 | func (a *App) scanFileHandler(c *gin.Context) {
 70 | 	// Get file from form
 71 | 	file, err := c.FormFile("file")
 72 | 	if err != nil {
 73 | 		c.JSON(http.StatusBadRequest, gin.H{
 74 | 			"error": "No file provided",
 75 | 		})
 76 | 		return
 77 | 	}
 78 | 
 79 | 	// Save file to temporary location
 80 | 	tempFile := filepath.Join(os.TempDir(), file.Filename)
 81 | 	if err := c.SaveUploadedFile(file, tempFile); err != nil {
 82 | 		c.JSON(http.StatusInternalServerError, gin.H{
 83 | 			"error": "Failed to save file",
 84 | 		})
 85 | 		return
 86 | 	}
 87 | 	defer os.Remove(tempFile)
 88 | 
 89 | 	// Scan file
 90 | 	results, err := a.scanner.ScanFile(tempFile)
 91 | 	if err != nil {
 92 | 		c.JSON(http.StatusInternalServerError, gin.H{
 93 | 			"error": fmt.Sprintf("Failed to scan file: %v", err),
 94 | 		})
 95 | 		return
 96 | 	}
 97 | 
 98 | 	// Generate summary
 99 | 	summary := core.GenerateSummary(map[string][]core.Match{
100 | 		file.Filename: results,
101 | 	})
102 | 
103 | 	// Return results
104 | 	c.JSON(http.StatusOK, gin.H{
105 | 		"results": map[string][]core.Match{
106 | 			file.Filename: results,
107 | 		},
108 | 		"summary": summary,
109 | 	})
110 | }
111 | 
112 | // scanDirectoryHandler handles directory scanning
113 | func (a *App) scanDirectoryHandler(c *gin.Context) {
114 | 	// Get directory path from form
115 | 	directory := c.PostForm("directory")
116 | 	if directory == "" {
117 | 		c.JSON(http.StatusBadRequest, gin.H{
118 | 			"error": "No directory provided",
119 | 		})
120 | 		return
121 | 	}
122 | 
123 | 	// Check if directory exists
124 | 	if _, err := os.Stat(directory); os.IsNotExist(err) {
125 | 		c.JSON(http.StatusBadRequest, gin.H{
126 | 			"error": "Directory does not exist",
127 | 		})
128 | 		return
129 | 	}
130 | 
131 | 	// Get exclude patterns
132 | 	excludePatterns := c.PostFormArray("exclude")
133 | 
134 | 	// Scan directory
135 | 	results, err := a.scanner.ScanDirectory(directory, excludePatterns)
136 | 	if err != nil {
137 | 		c.JSON(http.StatusInternalServerError, gin.H{
138 | 			"error": fmt.Sprintf("Failed to scan directory: %v", err),
139 | 		})
140 | 		return
141 | 	}
142 | 
143 | 	// Generate summary
144 | 	summary := core.GenerateSummary(results)
145 | 
146 | 	// Return results
147 | 	c.JSON(http.StatusOK, gin.H{
148 | 		"results": results,
149 | 		"summary": summary,
150 | 	})
151 | }
152 | 
153 | // languagesHandler handles the supported languages request
154 | func (a *App) languagesHandler(c *gin.Context) {
155 | 	languages := a.scanner.SupportedLanguages()
156 | 	c.JSON(http.StatusOK, gin.H{
157 | 		"languages": languages,
158 | 	})
159 | }
160 | 
161 | // healthHandler handles the health check request
162 | func (a *App) healthHandler(c *gin.Context) {
163 | 	c.JSON(http.StatusOK, gin.H{
164 | 		"status": "ok",
165 | 		"time":   time.Now().Format(time.RFC3339),
166 | 	})
167 | } 


--------------------------------------------------------------------------------
/go/internal/web/static/css/style.css:
--------------------------------------------------------------------------------
  1 | /* Re-movery 样式文件 */
  2 | 
  3 | body {
  4 |     font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
  5 |     background-color: #f8f9fa;
  6 | }
  7 | 
  8 | .navbar-brand {
  9 |     font-weight: bold;
 10 |     color: #0d6efd;
 11 | }
 12 | 
 13 | .card {
 14 |     border-radius: 10px;
 15 |     box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 16 |     margin-bottom: 20px;
 17 | }
 18 | 
 19 | .card-header {
 20 |     font-weight: bold;
 21 |     background-color: #f8f9fa;
 22 | }
 23 | 
 24 | .severity-high {
 25 |     color: #dc3545;
 26 | }
 27 | 
 28 | .severity-medium {
 29 |     color: #fd7e14;
 30 | }
 31 | 
 32 | .severity-low {
 33 |     color: #0dcaf0;
 34 | }
 35 | 
 36 | .chart-container {
 37 |     height: 300px;
 38 | }
 39 | 
 40 | .nav-pills .nav-link.active {
 41 |     background-color: #0d6efd;
 42 | }
 43 | 
 44 | .nav-pills .nav-link {
 45 |     color: #495057;
 46 | }
 47 | 
 48 | .file-item {
 49 |     cursor: pointer;
 50 | }
 51 | 
 52 | .file-item:hover {
 53 |     background-color: #f8f9fa;
 54 | }
 55 | 
 56 | .code-block {
 57 |     background-color: #f8f9fa;
 58 |     border-radius: 5px;
 59 |     padding: 10px;
 60 |     font-family: monospace;
 61 |     white-space: pre-wrap;
 62 |     margin-top: 10px;
 63 | }
 64 | 
 65 | .footer {
 66 |     margin-top: 50px;
 67 |     padding: 20px 0;
 68 |     background-color: #f8f9fa;
 69 |     text-align: center;
 70 |     color: #6c757d;
 71 | }
 72 | 
 73 | /* 按钮样式 */
 74 | .btn-primary {
 75 |     background-color: #0d6efd;
 76 |     border-color: #0d6efd;
 77 | }
 78 | 
 79 | .btn-primary:hover {
 80 |     background-color: #0b5ed7;
 81 |     border-color: #0a58ca;
 82 | }
 83 | 
 84 | /* 表单样式 */
 85 | .form-control:focus {
 86 |     border-color: #0d6efd;
 87 |     box-shadow: 0 0 0 0.25rem rgba(13, 110, 253, 0.25);
 88 | }
 89 | 
 90 | /* 表格样式 */
 91 | .table {
 92 |     border-collapse: collapse;
 93 |     width: 100%;
 94 | }
 95 | 
 96 | .table th {
 97 |     background-color: #f8f9fa;
 98 |     font-weight: bold;
 99 | }
100 | 
101 | .table-striped tbody tr:nth-of-type(odd) {
102 |     background-color: rgba(0, 0, 0, 0.05);
103 | }
104 | 
105 | /* 徽章样式 */
106 | .badge {
107 |     font-weight: normal;
108 |     padding: 0.35em 0.65em;
109 | }
110 | 
111 | /* 响应式调整 */
112 | @media (max-width: 768px) {
113 |     .chart-container {
114 |         height: 200px;
115 |     }
116 | } 


--------------------------------------------------------------------------------
/go/tests/integration/workflow_test.go:
--------------------------------------------------------------------------------
  1 | package integration
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"os"
  6 | 	"path/filepath"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/stretchr/testify/assert"
 11 | 
 12 | 	"github.com/heyangxu/Re-movery/go/internal/analyzers"
 13 | 	"github.com/heyangxu/Re-movery/go/internal/detectors"
 14 | 	"github.com/heyangxu/Re-movery/go/internal/reporters"
 15 | 	"github.com/heyangxu/Re-movery/go/internal/utils"
 16 | )
 17 | 
 18 | func TestWorkflow(t *testing.T) {
 19 | 	// 创建临时目录
 20 | 	tempDir, err := os.MkdirTemp("", "workflow_test")
 21 | 	if err != nil {
 22 | 		t.Fatalf("创建临时目录失败: %v", err)
 23 | 	}
 24 | 	defer os.RemoveAll(tempDir)
 25 | 
 26 | 	// 创建测试项目结构
 27 | 	err = createTestProject(tempDir)
 28 | 	if err != nil {
 29 | 		t.Fatalf("创建测试项目失败: %v", err)
 30 | 	}
 31 | 
 32 | 	// 初始化组件
 33 | 	detector := detectors.NewVulnerabilityDetector()
 34 | 	checker := utils.NewSecurityChecker()
 35 | 	analyzer := analyzers.NewCodeAnalyzer()
 36 | 	reporter := reporters.NewHTMLReporter()
 37 | 
 38 | 	// 测试完整工作流程
 39 | 	t.Run("TestFullWorkflow", func(t *testing.T) {
 40 | 		// 加载配置
 41 | 		configFile := filepath.Join(tempDir, "config.json")
 42 | 		configData, err := os.ReadFile(configFile)
 43 | 		assert.NoError(t, err)
 44 | 
 45 | 		var config map[string]interface{}
 46 | 		err = json.Unmarshal(configData, &config)
 47 | 		assert.NoError(t, err)
 48 | 
 49 | 		// 加载签名
 50 | 		signatureFile := filepath.Join(tempDir, "signatures.json")
 51 | 		err = detector.LoadSignatures(signatureFile)
 52 | 		assert.NoError(t, err)
 53 | 
 54 | 		// 分析源代码文件
 55 | 		srcDir := filepath.Join(tempDir, "src")
 56 | 		vulnerableFile := filepath.Join(srcDir, "vulnerable.go")
 57 | 		safeFile := filepath.Join(srcDir, "safe.go")
 58 | 
 59 | 		// 检测漏洞
 60 | 		vulnerableMatches, err := detector.DetectFile(vulnerableFile)
 61 | 		assert.NoError(t, err)
 62 | 		safeMatches, err := detector.DetectFile(safeFile)
 63 | 		assert.NoError(t, err)
 64 | 
 65 | 		assert.Greater(t, len(vulnerableMatches), 0)
 66 | 		assert.Equal(t, 0, len(safeMatches))
 67 | 
 68 | 		// 执行安全检查
 69 | 		vulnerableSecurity := checker.PerformFullCheck(vulnerableFile)
 70 | 		safeSecurity := checker.PerformFullCheck(safeFile)
 71 | 
 72 | 		assert.True(t, hasIssues(vulnerableSecurity))
 73 | 		assert.False(t, hasIssues(safeSecurity))
 74 | 
 75 | 		// 代码分析
 76 | 		vulnerableAnalysis, err := analyzer.AnalyzeFile(vulnerableFile)
 77 | 		assert.NoError(t, err)
 78 | 		safeAnalysis, err := analyzer.AnalyzeFile(safeFile)
 79 | 		assert.NoError(t, err)
 80 | 
 81 | 		assert.Greater(t, vulnerableAnalysis.Complexity, safeAnalysis.Complexity)
 82 | 
 83 | 		// 生成报告
 84 | 		reportData := map[string]interface{}{
 85 | 			"project_name":  config["project_name"],
 86 | 			"scan_time":    time.Now().Format("2006-01-02 15:04:05"),
 87 | 			"files_scanned": []string{vulnerableFile, safeFile},
 88 | 			"vulnerability_results": map[string]interface{}{
 89 | 				"vulnerable.go": vulnerableMatches,
 90 | 				"safe.go":      safeMatches,
 91 | 			},
 92 | 			"security_results": map[string]interface{}{
 93 | 				"vulnerable.go": vulnerableSecurity,
 94 | 				"safe.go":      safeSecurity,
 95 | 			},
 96 | 			"analysis_results": map[string]interface{}{
 97 | 				"vulnerable.go": vulnerableAnalysis,
 98 | 				"safe.go":      safeAnalysis,
 99 | 			},
100 | 		}
101 | 
102 | 		reportFile := filepath.Join(tempDir, "reports", "report.html")
103 | 		err = reporter.GenerateReport(reportData, reportFile)
104 | 		assert.NoError(t, err)
105 | 
106 | 		assert.FileExists(t, reportFile)
107 | 		fileInfo, err := os.Stat(reportFile)
108 | 		assert.NoError(t, err)
109 | 		assert.Greater(t, fileInfo.Size(), int64(0))
110 | 	})
111 | 
112 | 	// 测试并行处理
113 | 	t.Run("TestParallelProcessing", func(t *testing.T) {
114 | 		// 创建多个测试文件
115 | 		srcDir := filepath.Join(tempDir, "src")
116 | 		testFiles := make([]string, 5)
117 | 		testCode := `package main
118 | 
119 | import "os/exec"
120 | 
121 | func main() {
122 | 	exec.Command("ls").Run()
123 | }
124 | `
125 | 		for i := range testFiles {
126 | 			filePath := filepath.Join(srcDir, "test_%d.go")
127 | 			err := os.WriteFile(filePath, []byte(testCode), 0644)
128 | 			assert.NoError(t, err)
129 | 			testFiles[i] = filePath
130 | 		}
131 | 
132 | 		// 串行处理时间
133 | 		startSerial := time.Now()
134 | 		for _, file := range testFiles {
135 | 			_, err := detector.DetectFile(file)
136 | 			assert.NoError(t, err)
137 | 			checker.PerformFullCheck(file)
138 | 			_, err = analyzer.AnalyzeFile(file)
139 | 			assert.NoError(t, err)
140 | 		}
141 | 		serialDuration := time.Since(startSerial)
142 | 
143 | 		// 并行处理时间
144 | 		startParallel := time.Now()
145 | 		resultChan := make(chan struct{}, len(testFiles))
146 | 		for _, file := range testFiles {
147 | 			go func(f string) {
148 | 				_, err := detector.DetectFile(f)
149 | 				assert.NoError(t, err)
150 | 				checker.PerformFullCheck(f)
151 | 				_, err = analyzer.AnalyzeFile(f)
152 | 				assert.NoError(t, err)
153 | 				resultChan <- struct{}{}
154 | 			}(file)
155 | 		}
156 | 
157 | 		// 等待所有并行任务完成
158 | 		for i := 0; i < len(testFiles); i++ {
159 | 			<-resultChan
160 | 		}
161 | 		parallelDuration := time.Since(startParallel)
162 | 
163 | 		assert.Less(t, parallelDuration, serialDuration)
164 | 	})
165 | 
166 | 	// 测试错误处理
167 | 	t.Run("TestErrorHandling", func(t *testing.T) {
168 | 		// 测试无效的配置文件
169 | 		invalidConfig := filepath.Join(tempDir, "invalid_config.json")
170 | 		err := os.WriteFile(invalidConfig, []byte("invalid json"), 0644)
171 | 		assert.NoError(t, err)
172 | 
173 | 		_, err = os.ReadFile(invalidConfig)
174 | 		assert.NoError(t, err)
175 | 		var config map[string]interface{}
176 | 		err = json.Unmarshal([]byte("invalid json"), &config)
177 | 		assert.Error(t, err)
178 | 
179 | 		// 测试不存在的源代码文件
180 | 		nonExistentFile := filepath.Join(tempDir, "non_existent.go")
181 | 		_, err = detector.DetectFile(nonExistentFile)
182 | 		assert.Error(t, err)
183 | 
184 | 		// 测试无效的源代码
185 | 		invalidCode := filepath.Join(tempDir, "invalid.go")
186 | 		err = os.WriteFile(invalidCode, []byte("invalid go code"), 0644)
187 | 		assert.NoError(t, err)
188 | 
189 | 		_, err = analyzer.AnalyzeFile(invalidCode)
190 | 		assert.Error(t, err)
191 | 	})
192 | }
193 | 
194 | func createTestProject(dir string) error {
195 | 	// 创建配置文件
196 | 	config := map[string]interface{}{
197 | 		"project_name":        "Test Project",
198 | 		"scan_paths":         []string{"src"},
199 | 		"exclude_paths":      []string{"tests", "docs"},
200 | 		"report_format":      "html",
201 | 		"report_path":        "reports",
202 | 		"severity_threshold": "medium",
203 | 		"parallel_processing": true,
204 | 		"max_workers":        4,
205 | 	}
206 | 
207 | 	configFile := filepath.Join(dir, "config.json")
208 | 	configData, err := json.MarshalIndent(config, "", "  ")
209 | 	if err != nil {
210 | 		return err
211 | 	}
212 | 
213 | 	err = os.WriteFile(configFile, configData, 0644)
214 | 	if err != nil {
215 | 		return err
216 | 	}
217 | 
218 | 	// 创建签名文件
219 | 	signatures := map[string]interface{}{
220 | 		"signatures": []map[string]interface{}{
221 | 			{
222 | 				"id":       "CMD001",
223 | 				"name":     "命令注入",
224 | 				"severity": "high",
225 | 				"code_patterns": []string{
226 | 					`exec\.Command\([^)]*\)`,
227 | 					`os\.exec\.Command\([^)]*\)`,
228 | 				},
229 | 			},
230 | 			{
231 | 				"id":       "SQL001",
232 | 				"name":     "SQL注入",
233 | 				"severity": "high",
234 | 				"code_patterns": []string{
235 | 					`db\.Query\([^)]*\+[^)]*\)`,
236 | 					`db\.Exec\([^)]*\+[^)]*\)`,
237 | 				},
238 | 			},
239 | 		},
240 | 	}
241 | 
242 | 	signatureFile := filepath.Join(dir, "signatures.json")
243 | 	signatureData, err := json.MarshalIndent(signatures, "", "  ")
244 | 	if err != nil {
245 | 		return err
246 | 	}
247 | 
248 | 	err = os.WriteFile(signatureFile, signatureData, 0644)
249 | 	if err != nil {
250 | 		return err
251 | 	}
252 | 
253 | 	// 创建源代码目录
254 | 	srcDir := filepath.Join(dir, "src")
255 | 	err = os.MkdirAll(srcDir, 0755)
256 | 	if err != nil {
257 | 		return err
258 | 	}
259 | 
260 | 	// 创建漏洞代码文件
261 | 	vulnerableCode := `package main
262 | 
263 | import (
264 | 	"database/sql"
265 | 	"os/exec"
266 | )
267 | 
268 | func unsafeCommand(cmd string) {
269 | 	exec.Command("bash", "-c", cmd).Run()
270 | }
271 | 
272 | func unsafeQuery(db *sql.DB, id string) {
273 | 	db.Query("SELECT * FROM users WHERE id = " + id)
274 | }
275 | 
276 | func main() {
277 | 	unsafeCommand("ls -l")
278 | 	db, _ := sql.Open("mysql", "user:password@/dbname")
279 | 	unsafeQuery(db, "1 OR 1=1")
280 | }
281 | `
282 | 
283 | 	vulnerableFile := filepath.Join(srcDir, "vulnerable.go")
284 | 	err = os.WriteFile(vulnerableFile, []byte(vulnerableCode), 0644)
285 | 	if err != nil {
286 | 		return err
287 | 	}
288 | 
289 | 	// 创建安全代码文件
290 | 	safeCode := `package main
291 | 
292 | import (
293 | 	"database/sql"
294 | )
295 | 
296 | func safeQuery(db *sql.DB, id string) {
297 | 	db.Query("SELECT * FROM users WHERE id = ?", id)
298 | }
299 | 
300 | func main() {
301 | 	db, _ := sql.Open("mysql", "user:password@/dbname")
302 | 	safeQuery(db, "1")
303 | }
304 | `
305 | 
306 | 	safeFile := filepath.Join(srcDir, "safe.go")
307 | 	err = os.WriteFile(safeFile, []byte(safeCode), 0644)
308 | 	if err != nil {
309 | 		return err
310 | 	}
311 | 
312 | 	// 创建报告目录
313 | 	reportDir := filepath.Join(dir, "reports")
314 | 	return os.MkdirAll(reportDir, 0755)
315 | }
316 | 
317 | func hasIssues(results map[string]utils.SecurityCheckResult) bool {
318 | 	for _, result := range results {
319 | 		if result.HasIssues {
320 | 			return true
321 | 		}
322 | 	}
323 | 	return false
324 | } 


--------------------------------------------------------------------------------
/go/tests/security/security_test.go:
--------------------------------------------------------------------------------
  1 | package security
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io/ioutil"
  6 | 	"os"
  7 | 	"path/filepath"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/stretchr/testify/assert"
 12 | 	"github.com/stretchr/testify/require"
 13 | 
 14 | 	"re-movery/internal/detectors"
 15 | 	"re-movery/internal/utils"
 16 | )
 17 | 
 18 | // TestSecurity 包含所有安全相关的测试
 19 | type TestSecurity struct {
 20 | 	tempDir string
 21 | 	detector *detectors.VulnerabilityDetector
 22 | 	checker *utils.SecurityChecker
 23 | }
 24 | 
 25 | // createTestFile 创建测试文件
 26 | func (ts *TestSecurity) createTestFile(content string) (string, error) {
 27 | 	file, err := ioutil.TempFile(ts.tempDir, "test-*.go")
 28 | 	if err != nil {
 29 | 		return "", fmt.Errorf("创建临时文件失败: %v", err)
 30 | 	}
 31 | 	defer file.Close()
 32 | 
 33 | 	if _, err := file.WriteString(content); err != nil {
 34 | 		return "", fmt.Errorf("写入文件内容失败: %v", err)
 35 | 	}
 36 | 
 37 | 	return file.Name(), nil
 38 | }
 39 | 
 40 | func TestMemoryLimit(t *testing.T) {
 41 | 	ts := &TestSecurity{
 42 | 		tempDir: t.TempDir(),
 43 | 		detector: detectors.NewVulnerabilityDetector(),
 44 | 		checker: utils.NewSecurityChecker(),
 45 | 	}
 46 | 
 47 | 	// 创建可能导致内存溢出的测试文件
 48 | 	content := `
 49 | 	package main
 50 | 
 51 | 	func memoryIntensive() {
 52 | 		largeSlice := make([]int, 1<<30) // 尝试分配大量内存
 53 | 		for i := range largeSlice {
 54 | 			largeSlice[i] = i
 55 | 		}
 56 | 	}
 57 | 	`
 58 | 	
 59 | 	filePath, err := ts.createTestFile(content)
 60 | 	require.NoError(t, err)
 61 | 
 62 | 	// 检查内存使用
 63 | 	memoryUsage, err := ts.checker.CheckMemoryUsage(filePath)
 64 | 	require.NoError(t, err)
 65 | 	assert.Less(t, memoryUsage, uint64(8<<30)) // 8GB限制
 66 | }
 67 | 
 68 | func TestExecutionTimeout(t *testing.T) {
 69 | 	ts := &TestSecurity{
 70 | 		tempDir: t.TempDir(),
 71 | 		detector: detectors.NewVulnerabilityDetector(),
 72 | 		checker: utils.NewSecurityChecker(),
 73 | 	}
 74 | 
 75 | 	// 创建可能导致无限循环的测试文件
 76 | 	content := `
 77 | 	package main
 78 | 
 79 | 	func infiniteLoop() {
 80 | 		for {
 81 | 			// 无限循环
 82 | 		}
 83 | 	}
 84 | 	`
 85 | 
 86 | 	filePath, err := ts.createTestFile(content)
 87 | 	require.NoError(t, err)
 88 | 
 89 | 	// 检查执行时间
 90 | 	err = ts.checker.CheckExecutionTime(filePath, 5*time.Second)
 91 | 	assert.Error(t, err)
 92 | 	assert.Contains(t, err.Error(), "timeout")
 93 | }
 94 | 
 95 | func TestFileAccess(t *testing.T) {
 96 | 	ts := &TestSecurity{
 97 | 		tempDir: t.TempDir(),
 98 | 		detector: detectors.NewVulnerabilityDetector(),
 99 | 		checker: utils.NewSecurityChecker(),
100 | 	}
101 | 
102 | 	// 创建测试文件
103 | 	content := `
104 | 	package main
105 | 
106 | 	import "os"
107 | 
108 | 	func accessSensitiveFile() {
109 | 		file, _ := os.Open("/etc/passwd")
110 | 		defer file.Close()
111 | 	}
112 | 	`
113 | 
114 | 	filePath, err := ts.createTestFile(content)
115 | 	require.NoError(t, err)
116 | 
117 | 	// 检查文件访问
118 | 	violations, err := ts.checker.CheckFileAccess(filePath)
119 | 	require.NoError(t, err)
120 | 	assert.Greater(t, len(violations), 0)
121 | 	assert.Contains(t, violations[0], "/etc/passwd")
122 | }
123 | 
124 | func TestNetworkAccess(t *testing.T) {
125 | 	ts := &TestSecurity{
126 | 		tempDir: t.TempDir(),
127 | 		detector: detectors.NewVulnerabilityDetector(),
128 | 		checker: utils.NewSecurityChecker(),
129 | 	}
130 | 
131 | 	// 创建测试文件
132 | 	content := `
133 | 	package main
134 | 
135 | 	import "net"
136 | 
137 | 	func connectExternal() {
138 | 		conn, _ := net.Dial("tcp", "example.com:80")
139 | 		defer conn.Close()
140 | 	}
141 | 	`
142 | 
143 | 	filePath, err := ts.createTestFile(content)
144 | 	require.NoError(t, err)
145 | 
146 | 	// 检查网络访问
147 | 	violations, err := ts.checker.CheckNetworkAccess(filePath)
148 | 	require.NoError(t, err)
149 | 	assert.Greater(t, len(violations), 0)
150 | 	assert.Contains(t, violations[0], "net.Dial")
151 | }
152 | 
153 | func TestCodeInjection(t *testing.T) {
154 | 	ts := &TestSecurity{
155 | 		tempDir: t.TempDir(),
156 | 		detector: detectors.NewVulnerabilityDetector(),
157 | 		checker: utils.NewSecurityChecker(),
158 | 	}
159 | 
160 | 	// 创建测试文件
161 | 	content := `
162 | 	package main
163 | 
164 | 	import "os/exec"
165 | 
166 | 	func executeInput(userInput string) {
167 | 		cmd := exec.Command("bash", "-c", userInput)
168 | 		cmd.Run()
169 | 	}
170 | 	`
171 | 
172 | 	filePath, err := ts.createTestFile(content)
173 | 	require.NoError(t, err)
174 | 
175 | 	// 检查代码注入
176 | 	vulnerabilities, err := ts.detector.DetectFile(filePath)
177 | 	require.NoError(t, err)
178 | 	assert.Greater(t, len(vulnerabilities), 0)
179 | 	assert.Equal(t, "HIGH", vulnerabilities[0].Severity)
180 | }
181 | 
182 | func TestInputValidation(t *testing.T) {
183 | 	ts := &TestSecurity{
184 | 		tempDir: t.TempDir(),
185 | 		detector: detectors.NewVulnerabilityDetector(),
186 | 		checker: utils.NewSecurityChecker(),
187 | 	}
188 | 
189 | 	// 创建测试文件
190 | 	content := `
191 | 	package main
192 | 
193 | 	import "fmt"
194 | 
195 | 	func processInput(userInput string) {
196 | 		fmt.Sprintf("%s", userInput) // 未经验证的输入
197 | 	}
198 | 	`
199 | 
200 | 	filePath, err := ts.createTestFile(content)
201 | 	require.NoError(t, err)
202 | 
203 | 	// 检查输入验证
204 | 	issues, err := ts.checker.CheckInputValidation(filePath)
205 | 	require.NoError(t, err)
206 | 	assert.Greater(t, len(issues), 0)
207 | }
208 | 
209 | func TestSecureRandom(t *testing.T) {
210 | 	ts := &TestSecurity{
211 | 		tempDir: t.TempDir(),
212 | 		detector: detectors.NewVulnerabilityDetector(),
213 | 		checker: utils.NewSecurityChecker(),
214 | 	}
215 | 
216 | 	// 创建测试文件
217 | 	content := `
218 | 	package main
219 | 
220 | 	import "math/rand"
221 | 
222 | 	func generateToken() string {
223 | 		const chars = "0123456789ABCDEF"
224 | 		result := make([]byte, 32)
225 | 		for i := range result {
226 | 			result[i] = chars[rand.Intn(len(chars))]
227 | 		}
228 | 		return string(result)
229 | 	}
230 | 	`
231 | 
232 | 	filePath, err := ts.createTestFile(content)
233 | 	require.NoError(t, err)
234 | 
235 | 	// 检查随机数生成
236 | 	issues, err := ts.checker.CheckRandomGeneration(filePath)
237 | 	require.NoError(t, err)
238 | 	assert.Greater(t, len(issues), 0)
239 | 	assert.Contains(t, issues[0], "math/rand")
240 | }
241 | 
242 | func TestSensitiveData(t *testing.T) {
243 | 	ts := &TestSecurity{
244 | 		tempDir: t.TempDir(),
245 | 		detector: detectors.NewVulnerabilityDetector(),
246 | 		checker: utils.NewSecurityChecker(),
247 | 	}
248 | 
249 | 	// 创建测试文件
250 | 	content := `
251 | 	package main
252 | 
253 | 	import "fmt"
254 | 
255 | 	func processPassword(password string) {
256 | 		fmt.Printf("Password is: %s\n", password) // 敏感信息泄露
257 | 	}
258 | 	`
259 | 
260 | 	filePath, err := ts.createTestFile(content)
261 | 	require.NoError(t, err)
262 | 
263 | 	// 检查敏感数据处理
264 | 	issues, err := ts.checker.CheckSensitiveData(filePath)
265 | 	require.NoError(t, err)
266 | 	assert.Greater(t, len(issues), 0)
267 | 	assert.Contains(t, issues[0], "password")
268 | }
269 | 
270 | func TestSandboxEscape(t *testing.T) {
271 | 	ts := &TestSecurity{
272 | 		tempDir: t.TempDir(),
273 | 		detector: detectors.NewVulnerabilityDetector(),
274 | 		checker: utils.NewSecurityChecker(),
275 | 	}
276 | 
277 | 	// 创建测试文件
278 | 	content := `
279 | 	package main
280 | 
281 | 	import (
282 | 		"os"
283 | 		"os/exec"
284 | 	)
285 | 
286 | 	func dangerousOperation() {
287 | 		os.RemoveAll("/")
288 | 		exec.Command("chmod", "777", "/etc/passwd").Run()
289 | 	}
290 | 	`
291 | 
292 | 	filePath, err := ts.createTestFile(content)
293 | 	require.NoError(t, err)
294 | 
295 | 	// 检查沙箱逃逸
296 | 	violations, err := ts.checker.CheckSandboxEscape(filePath)
297 | 	require.NoError(t, err)
298 | 	assert.Greater(t, len(violations), 0)
299 | 	assert.Contains(t, violations[0], "os.RemoveAll")
300 | } 


--------------------------------------------------------------------------------
/go/web/templates/report.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Re-movery Vulnerability Report</title>
  7 |     <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet">
  8 |     <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
  9 |     <style>
 10 |         .vulnerability-card {
 11 |             margin-bottom: 1rem;
 12 |         }
 13 |         .severity-high {
 14 |             color: #dc3545;
 15 |         }
 16 |         .severity-medium {
 17 |             color: #ffc107;
 18 |         }
 19 |         .severity-low {
 20 |             color: #28a745;
 21 |         }
 22 |         .code-block {
 23 |             background-color: #f8f9fa;
 24 |             padding: 1rem;
 25 |             border-radius: 4px;
 26 |             font-family: monospace;
 27 |         }
 28 |     </style>
 29 | </head>
 30 | <body>
 31 |     <div class="container mt-5">
 32 |         <h1 class="mb-4">Re-movery Vulnerability Report</h1>
 33 |         
 34 |         <div class="row mb-4">
 35 |             <div class="col">
 36 |                 <div class="card">
 37 |                     <div class="card-body">
 38 |                         <h5 class="card-title">Report Summary</h5>
 39 |                         <p class="card-text">Generated at: {{.GeneratedAt}}</p>
 40 |                         <p class="card-text">Total Files Scanned: {{.TotalFiles}}</p>
 41 |                         <p class="card-text">Total Vulnerabilities Found: {{.TotalMatches}}</p>
 42 |                     </div>
 43 |                 </div>
 44 |             </div>
 45 |         </div>
 46 | 
 47 |         <div class="row mb-4">
 48 |             <div class="col-md-6">
 49 |                 <div class="card">
 50 |                     <div class="card-body">
 51 |                         <div id="severityChart" style="height: 400px;"></div>
 52 |                     </div>
 53 |                 </div>
 54 |             </div>
 55 |             <div class="col-md-6">
 56 |                 <div class="card">
 57 |                     <div class="card-body">
 58 |                         <div id="typeChart" style="height: 400px;"></div>
 59 |                     </div>
 60 |                 </div>
 61 |             </div>
 62 |         </div>
 63 | 
 64 |         <h2 class="mb-4">Detailed Findings</h2>
 65 |         {{range .Vulnerabilities}}
 66 |         <div class="card vulnerability-card">
 67 |             <div class="card-header">
 68 |                 <h5 class="mb-0">
 69 |                     {{.Signature.Name}}
 70 |                     <span class="float-end severity-{{.Signature.Severity | lower}}">
 71 |                         {{.Signature.Severity}}
 72 |                     </span>
 73 |                 </h5>
 74 |             </div>
 75 |             <div class="card-body">
 76 |                 <h6 class="card-subtitle mb-2 text-muted">ID: {{.Signature.ID}}</h6>
 77 |                 <p class="card-text">{{.Signature.Description}}</p>
 78 |                 
 79 |                 <div class="mb-3">
 80 |                     <strong>File:</strong> {{.File}}<br>
 81 |                     <strong>Line:</strong> {{.Line}}<br>
 82 |                     <strong>Confidence:</strong> {{printf "%.1f%%" (mul .Confidence 100)}}
 83 |                 </div>
 84 | 
 85 |                 <div class="code-block">
 86 |                     <pre><code>{{.Code}}</code></pre>
 87 |                 </div>
 88 | 
 89 |                 {{if .Context}}
 90 |                 <div class="mt-3">
 91 |                     <h6>Context:</h6>
 92 |                     <div class="code-block">
 93 |                         <pre><code>{{range .Context}}{{.}}
 94 | {{end}}</code></pre>
 95 |                     </div>
 96 |                 </div>
 97 |                 {{end}}
 98 | 
 99 |                 {{if .Signature.References}}
100 |                 <div class="mt-3">
101 |                     <h6>References:</h6>
102 |                     <ul>
103 |                         {{range .Signature.References}}
104 |                         <li><a href="{{.}}" target="_blank">{{.}}</a></li>
105 |                         {{end}}
106 |                     </ul>
107 |                 </div>
108 |                 {{end}}
109 |             </div>
110 |         </div>
111 |         {{end}}
112 |     </div>
113 | 
114 |     <script>
115 |         // Initialize charts
116 |         var severityChart = echarts.init(document.getElementById('severityChart'));
117 |         var typeChart = echarts.init(document.getElementById('typeChart'));
118 | 
119 |         // Set chart options
120 |         severityChart.setOption({{.SeverityChart}});
121 |         typeChart.setOption({{.TypeChart}});
122 | 
123 |         // Handle window resize
124 |         window.addEventListener('resize', function() {
125 |             severityChart.resize();
126 |             typeChart.resize();
127 |         });
128 |     </script>
129 | </body>
130 | </html> 


--------------------------------------------------------------------------------
/movery/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Re-Movery - A tool for discovering modified vulnerable code clones
 3 | """
 4 | 
 5 | __version__ = "1.0.0"
 6 | __author__ = "heyangxu"
 7 | __email__ = ""
 8 | 
 9 | from .config.config import config
10 | from .detectors.vulnerability import VulnerabilityDetector
11 | from .utils.security import SecurityChecker
12 | 
13 | __all__ = ["config", "VulnerabilityDetector", "SecurityChecker"] 


--------------------------------------------------------------------------------
/movery/analyzers/__init__.py:
--------------------------------------------------------------------------------
1 | from .language import LanguageAnalyzer
2 | from .code_analyzer import CodeAnalyzer
3 | 
4 | __all__ = ['LanguageAnalyzer', 'CodeAnalyzer'] 


--------------------------------------------------------------------------------
/movery/analyzers/code_analyzer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code analysis utilities for Movery
 3 | """
 4 | from typing import Dict, List, Optional
 5 | import os
 6 | import ast
 7 | import logging
 8 | 
 9 | from movery.utils.logging import get_logger
10 | from movery.config.config import config
11 | from .language import LanguageAnalyzer, PythonAnalyzer, JavaAnalyzer, CppAnalyzer, GoAnalyzer
12 | 
13 | logger = get_logger(__name__)
14 | 
15 | class CodeAnalyzer:
16 |     """Code analyzer that supports multiple programming languages"""
17 |     
18 |     def __init__(self):
19 |         self.analyzers = {
20 |             ".py": PythonAnalyzer(),
21 |             ".java": JavaAnalyzer(),
22 |             ".cpp": CppAnalyzer(),
23 |             ".hpp": CppAnalyzer(),
24 |             ".cc": CppAnalyzer(),
25 |             ".hh": CppAnalyzer(),
26 |             ".go": GoAnalyzer()
27 |         }
28 |         
29 |     def analyze_file(self, filename: str) -> Dict:
30 |         """Analyze a source code file"""
31 |         ext = os.path.splitext(filename)[1].lower()
32 |         
33 |         if ext not in self.analyzers:
34 |             logger.warning(f"Unsupported file type: {ext}")
35 |             return {
36 |                 "complexity": 0,
37 |                 "functions": [],
38 |                 "classes": [],
39 |                 "imports": [],
40 |                 "variables": []
41 |             }
42 |             
43 |         analyzer = self.analyzers[ext]
44 |         try:
45 |             ast_node = analyzer.parse_file(filename)
46 |             
47 |             return {
48 |                 "complexity": self._calculate_complexity(ast_node),
49 |                 "functions": analyzer.get_functions(ast_node),
50 |                 "classes": analyzer.get_classes(ast_node),
51 |                 "imports": analyzer.get_imports(ast_node),
52 |                 "variables": analyzer.get_variables(ast_node)
53 |             }
54 |             
55 |         except Exception as e:
56 |             logger.error(f"Error analyzing file {filename}: {str(e)}")
57 |             return {
58 |                 "complexity": 0,
59 |                 "functions": [],
60 |                 "classes": [],
61 |                 "imports": [],
62 |                 "variables": []
63 |             }
64 |             
65 |     def _calculate_complexity(self, ast_node: any) -> int:
66 |         """Calculate code complexity"""
67 |         # 简单实现 - 仅计算函数和类的数量
68 |         if isinstance(ast_node, ast.AST):
69 |             functions = sum(1 for node in ast.walk(ast_node) 
70 |                           if isinstance(node, ast.FunctionDef))
71 |             classes = sum(1 for node in ast.walk(ast_node)
72 |                          if isinstance(node, ast.ClassDef))
73 |             return functions + classes
74 |         return 0 


--------------------------------------------------------------------------------
/movery/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "processing": {
 3 |         "num_processes": 4,
 4 |         "max_memory_usage": 8589934592,
 5 |         "chunk_size": 1048576,
 6 |         "enable_cache": true,
 7 |         "cache_dir": ".cache",
 8 |         "cache_max_size": 1073741824,
 9 |         "supported_languages": [
10 |             "c",
11 |             "cpp",
12 |             "java", 
13 |             "python",
14 |             "go",
15 |             "javascript"
16 |         ]
17 |     },
18 |     "detector": {
19 |         "min_similarity": 0.8,
20 |         "max_edit_distance": 10,
21 |         "context_lines": 3,
22 |         "max_ast_depth": 50,
23 |         "max_cfg_nodes": 1000,
24 |         "enable_semantic_match": true,
25 |         "enable_syntax_match": true,
26 |         "enable_token_match": true,
27 |         "report_format": "html",
28 |         "report_dir": "reports",
29 |         "exclude_patterns": [
30 |             "**/test/*",
31 |             "**/tests/*",
32 |             "**/vendor/*",
33 |             "**/node_modules/*"
34 |         ]
35 |     },
36 |     "logging": {
37 |         "log_level": "INFO",
38 |         "log_file": "movery.log",
39 |         "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
40 |         "enable_profiling": false,
41 |         "profile_output": "profile.stats",
42 |         "show_progress": true,
43 |         "progress_interval": 1
44 |     },
45 |     "security": {
46 |         "max_file_size": 104857600,
47 |         "allowed_schemes": [
48 |             "file",
49 |             "http",
50 |             "https"
51 |         ],
52 |         "enable_sandbox": true,
53 |         "sandbox_timeout": 60,
54 |         "require_auth": false,
55 |         "rate_limit": 100,
56 |         "rate_limit_period": 60
57 |     }
58 | } 


--------------------------------------------------------------------------------
/movery/config/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration module for Movery
3 | """
4 | from .config import config
5 | 
6 | __all__ = ['config'] 


--------------------------------------------------------------------------------
/movery/config/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "processing": {
 3 |         "num_processes": 4,
 4 |         "max_memory_usage": 8589934592,
 5 |         "chunk_size": 1048576,
 6 |         "enable_cache": true,
 7 |         "cache_dir": ".cache",
 8 |         "cache_max_size": 1073741824,
 9 |         "supported_languages": [
10 |             "c",
11 |             "cpp",
12 |             "java", 
13 |             "python",
14 |             "go",
15 |             "javascript"
16 |         ]
17 |     },
18 |     "detector": {
19 |         "min_similarity": 0.8,
20 |         "max_edit_distance": 10,
21 |         "context_lines": 3,
22 |         "max_ast_depth": 50,
23 |         "max_cfg_nodes": 1000,
24 |         "enable_semantic_match": true,
25 |         "enable_syntax_match": true,
26 |         "enable_token_match": true,
27 |         "report_format": "html",
28 |         "report_dir": "reports",
29 |         "exclude_patterns": [
30 |             "**/test/*",
31 |             "**/tests/*",
32 |             "**/vendor/*",
33 |             "**/node_modules/*"
34 |         ]
35 |     },
36 |     "logging": {
37 |         "log_level": "INFO",
38 |         "log_file": "movery.log",
39 |         "log_format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
40 |         "enable_profiling": false,
41 |         "profile_output": "profile.stats",
42 |         "show_progress": true,
43 |         "progress_interval": 1
44 |     },
45 |     "security": {
46 |         "max_file_size": 104857600,
47 |         "allowed_schemes": [
48 |             "file",
49 |             "http",
50 |             "https"
51 |         ],
52 |         "enable_sandbox": true,
53 |         "sandbox_timeout": 60,
54 |         "require_auth": false,
55 |         "rate_limit": 100,
56 |         "rate_limit_period": 60
57 |     }
58 | } 


--------------------------------------------------------------------------------
/movery/config/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Configuration module for Movery
 3 | """
 4 | import json
 5 | import os
 6 | from typing import Dict, Any, List
 7 | from dataclasses import dataclass
 8 | 
 9 | @dataclass
10 | class ProcessingConfig:
11 |     num_processes: int
12 |     max_memory_usage: int
13 |     chunk_size: int
14 |     enable_cache: bool
15 |     cache_dir: str
16 |     cache_max_size: int
17 |     supported_languages: List[str]
18 | 
19 | @dataclass
20 | class DetectorConfig:
21 |     min_similarity: float
22 |     max_edit_distance: int
23 |     context_lines: int
24 |     max_ast_depth: int
25 |     max_cfg_nodes: int
26 |     enable_semantic_match: bool
27 |     enable_syntax_match: bool
28 |     enable_token_match: bool
29 |     report_format: str
30 |     report_dir: str
31 |     exclude_patterns: List[str]
32 | 
33 | @dataclass
34 | class LoggingConfig:
35 |     log_level: str
36 |     log_file: str
37 |     log_format: str
38 |     enable_profiling: bool
39 |     profile_output: str
40 |     show_progress: bool
41 |     progress_interval: int
42 | 
43 | @dataclass
44 | class SecurityConfig:
45 |     max_file_size: int
46 |     allowed_schemes: List[str]
47 |     enable_sandbox: bool
48 |     sandbox_timeout: int
49 |     require_auth: bool
50 |     rate_limit: int
51 |     rate_limit_period: int
52 | 
53 | @dataclass
54 | class Config:
55 |     processing: ProcessingConfig
56 |     detector: DetectorConfig
57 |     logging: LoggingConfig
58 |     security: SecurityConfig
59 | 
60 | def load_config(config_path: str = None) -> Config:
61 |     """
62 |     Load configuration from JSON file
63 |     
64 |     Args:
65 |         config_path: Path to config file. If None, uses default config.json
66 |         
67 |     Returns:
68 |         Configuration object
69 |     """
70 |     if config_path is None:
71 |         config_path = os.path.join(os.path.dirname(__file__), "config.json")
72 |         
73 |     with open(config_path, "r", encoding="utf-8") as f:
74 |         data = json.load(f)
75 |         
76 |     return Config(
77 |         processing=ProcessingConfig(**data["processing"]),
78 |         detector=DetectorConfig(**data["detector"]),
79 |         logging=LoggingConfig(**data["logging"]),
80 |         security=SecurityConfig(**data["security"])
81 |     )
82 | 
83 | # Load default configuration
84 | config = load_config() 


--------------------------------------------------------------------------------
/movery/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .vulnerability import VulnerabilityDetector, Signature, VulnerabilityMatch
2 | 
3 | __all__ = ['VulnerabilityDetector', 'Signature', 'VulnerabilityMatch'] 


--------------------------------------------------------------------------------
/movery/go/cmd/movery/main.go:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heyangxu/Re-movery/aad70c570ac2c4417d7b4844cc9b4846a148cbd5/movery/go/cmd/movery/main.go


--------------------------------------------------------------------------------
/movery/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main entry point for Movery
  3 | """
  4 | import os
  5 | import sys
  6 | import argparse
  7 | import logging
  8 | import json
  9 | from typing import List, Dict, Optional
 10 | import time
 11 | from pathlib import Path
 12 | import concurrent.futures
 13 | import traceback
 14 | 
 15 | from movery.config.config import config, MoveryConfig
 16 | from movery.utils.logging import setup_logging, get_logger
 17 | from movery.utils.memory import memory_monitor
 18 | from movery.utils.parallel import worker_pool
 19 | from movery.analyzers.language import LanguageAnalyzerFactory
 20 | from movery.detectors.vulnerability import detector
 21 | from movery.reporters.html import reporter
 22 | 
 23 | logger = get_logger(__name__)
 24 | 
 25 | def parse_args():
 26 |     """Parse command line arguments"""
 27 |     parser = argparse.ArgumentParser(
 28 |         description="Movery - A tool for discovering modified vulnerable code clones"
 29 |     )
 30 |     
 31 |     parser.add_argument(
 32 |         "target",
 33 |         help="Target program or directory to analyze"
 34 |     )
 35 |     
 36 |     parser.add_argument(
 37 |         "-c", "--config",
 38 |         help="Path to configuration file",
 39 |         default="config.json"
 40 |     )
 41 |     
 42 |     parser.add_argument(
 43 |         "-s", "--signatures",
 44 |         help="Path to vulnerability signatures file",
 45 |         default="signatures.json"
 46 |     )
 47 |     
 48 |     parser.add_argument(
 49 |         "-o", "--output",
 50 |         help="Output directory for reports",
 51 |         default="reports"
 52 |     )
 53 |     
 54 |     parser.add_argument(
 55 |         "-j", "--jobs",
 56 |         help="Number of parallel jobs",
 57 |         type=int,
 58 |         default=None
 59 |     )
 60 |     
 61 |     parser.add_argument(
 62 |         "-v", "--verbose",
 63 |         help="Enable verbose output",
 64 |         action="store_true"
 65 |     )
 66 |     
 67 |     parser.add_argument(
 68 |         "--cache",
 69 |         help="Enable result caching",
 70 |         action="store_true"
 71 |     )
 72 |     
 73 |     return parser.parse_args()
 74 | 
 75 | def load_config(config_file: str) -> MoveryConfig:
 76 |     """Load configuration from file"""
 77 |     if os.path.exists(config_file):
 78 |         return MoveryConfig.from_file(config_file)
 79 |     return MoveryConfig()
 80 | 
 81 | def find_source_files(target: str) -> List[str]:
 82 |     """Find all source files in target"""
 83 |     source_files = []
 84 |     
 85 |     for root, _, files in os.walk(target):
 86 |         for file in files:
 87 |             file_path = os.path.join(root, file)
 88 |             
 89 |             # Skip files larger than limit
 90 |             if os.path.getsize(file_path) > config.security.max_file_size:
 91 |                 logger.warning(f"Skipping large file: {file_path}")
 92 |                 continue
 93 |                 
 94 |             # Skip files matching exclude patterns
 95 |             skip = False
 96 |             for pattern in config.detector.exclude_patterns:
 97 |                 if Path(file_path).match(pattern):
 98 |                     skip = True
 99 |                     break
100 |             if skip:
101 |                 continue
102 |                 
103 |             # Check if file is supported
104 |             if LanguageAnalyzerFactory.get_analyzer(file_path):
105 |                 source_files.append(file_path)
106 |                 
107 |     return source_files
108 | 
109 | def analyze_file(file: str) -> List[Dict]:
110 |     """Analyze single file for vulnerabilities"""
111 |     try:
112 |         matches = detector.detect(file)
113 |         return [match.to_dict() for match in matches]
114 |     except Exception as e:
115 |         logger.error(f"Error analyzing file {file}: {str(e)}")
116 |         logger.debug(traceback.format_exc())
117 |         return []
118 | 
119 | def main():
120 |     """Main entry point"""
121 |     start_time = time.time()
122 |     
123 |     # Parse arguments
124 |     args = parse_args()
125 |     
126 |     # Setup logging
127 |     log_level = logging.DEBUG if args.verbose else logging.INFO
128 |     setup_logging(level=log_level)
129 |     
130 |     logger.info("Starting Movery...")
131 |     
132 |     try:
133 |         # Load configuration
134 |         config = load_config(args.config)
135 |         if args.jobs:
136 |             config.processing.num_processes = args.jobs
137 |         config.processing.enable_cache = args.cache
138 |         
139 |         # Load vulnerability signatures
140 |         detector.load_signatures(args.signatures)
141 |         
142 |         # Find source files
143 |         target_path = os.path.abspath(args.target)
144 |         if not os.path.exists(target_path):
145 |             raise FileNotFoundError(f"Target not found: {target_path}")
146 |             
147 |         logger.info(f"Analyzing target: {target_path}")
148 |         source_files = find_source_files(target_path)
149 |         logger.info(f"Found {len(source_files)} source files")
150 |         
151 |         # Start worker pool
152 |         worker_pool.start()
153 |         
154 |         # Process files in parallel
155 |         all_matches = []
156 |         with concurrent.futures.ThreadPoolExecutor(
157 |             max_workers=config.processing.num_processes
158 |         ) as executor:
159 |             future_to_file = {
160 |                 executor.submit(analyze_file, file): file
161 |                 for file in source_files
162 |             }
163 |             
164 |             for future in concurrent.futures.as_completed(future_to_file):
165 |                 file = future_to_file[future]
166 |                 try:
167 |                     matches = future.result()
168 |                     if matches:
169 |                         all_matches.extend(matches)
170 |                         logger.info(
171 |                             f"Found {len(matches)} vulnerabilities in {file}")
172 |                 except Exception as e:
173 |                     logger.error(f"Error processing {file}: {str(e)}")
174 |                     
175 |         # Generate report
176 |         if all_matches:
177 |             os.makedirs(args.output, exist_ok=True)
178 |             report_file = os.path.join(
179 |                 args.output,
180 |                 f"report_{int(time.time())}.html"
181 |             )
182 |             reporter.generate_report(all_matches, report_file)
183 |             logger.info(f"Generated report: {report_file}")
184 |         else:
185 |             logger.info("No vulnerabilities found")
186 |             
187 |         elapsed_time = time.time() - start_time
188 |         logger.info(f"Analysis completed in {elapsed_time:.2f} seconds")
189 |         
190 |     except Exception as e:
191 |         logger.error(f"Error: {str(e)}")
192 |         logger.debug(traceback.format_exc())
193 |         sys.exit(1)
194 |     finally:
195 |         worker_pool.stop()
196 | 
197 | if __name__ == "__main__":
198 |     main() 


--------------------------------------------------------------------------------
/movery/reporters/__init__.py:
--------------------------------------------------------------------------------
1 | from .html import HTMLReporter
2 | 
3 | __all__ = ['HTMLReporter'] 


--------------------------------------------------------------------------------
/movery/templates/report.html:
--------------------------------------------------------------------------------
  1 | 
  2 | <!DOCTYPE html>
  3 | <html>
  4 | <head>
  5 |     <title>Movery Vulnerability Report</title>
  6 |     <style>
  7 |         body {
  8 |             font-family: Arial, sans-serif;
  9 |             line-height: 1.6;
 10 |             margin: 0;
 11 |             padding: 20px;
 12 |         }
 13 |         
 14 |         .container {
 15 |             max-width: 1200px;
 16 |             margin: 0 auto;
 17 |         }
 18 |         
 19 |         h1, h2, h3 {
 20 |             color: #333;
 21 |         }
 22 |         
 23 |         .summary {
 24 |             background: #f5f5f5;
 25 |             padding: 20px;
 26 |             border-radius: 5px;
 27 |             margin-bottom: 30px;
 28 |         }
 29 |         
 30 |         .charts {
 31 |             display: grid;
 32 |             grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
 33 |             gap: 20px;
 34 |             margin-bottom: 30px;
 35 |         }
 36 |         
 37 |         .chart {
 38 |             background: white;
 39 |             padding: 20px;
 40 |             border-radius: 5px;
 41 |             box-shadow: 0 2px 5px rgba(0,0,0,0.1);
 42 |         }
 43 |         
 44 |         .vulnerability {
 45 |             background: white;
 46 |             padding: 20px;
 47 |             border-radius: 5px;
 48 |             box-shadow: 0 2px 5px rgba(0,0,0,0.1);
 49 |             margin-bottom: 20px;
 50 |         }
 51 |         
 52 |         .vulnerability-header {
 53 |             display: flex;
 54 |             justify-content: space-between;
 55 |             align-items: center;
 56 |             margin-bottom: 10px;
 57 |         }
 58 |         
 59 |         .severity {
 60 |             padding: 5px 10px;
 61 |             border-radius: 3px;
 62 |             color: white;
 63 |             font-weight: bold;
 64 |         }
 65 |         
 66 |         .severity.CRITICAL { background: #dc3545; }
 67 |         .severity.HIGH { background: #fd7e14; }
 68 |         .severity.MEDIUM { background: #ffc107; }
 69 |         .severity.LOW { background: #28a745; }
 70 |         
 71 |         .code {
 72 |             background: #f8f9fa;
 73 |             padding: 15px;
 74 |             border-radius: 5px;
 75 |             font-family: monospace;
 76 |             white-space: pre-wrap;
 77 |             margin: 10px 0;
 78 |         }
 79 |         
 80 |         .context {
 81 |             margin-top: 10px;
 82 |             font-size: 0.9em;
 83 |             color: #666;
 84 |         }
 85 |         
 86 |         .footer {
 87 |             margin-top: 50px;
 88 |             text-align: center;
 89 |             color: #666;
 90 |             font-size: 0.8em;
 91 |         }
 92 |     </style>
 93 | </head>
 94 | <body>
 95 |     <div class="container">
 96 |         <h1>Movery Vulnerability Report</h1>
 97 |         
 98 |         <div class="summary">
 99 |             <h2>Summary</h2>
100 |             <p>Total Vulnerabilities: {{ report.summary.total_vulnerabilities }}</p>
101 |             <p>Total Files: {{ report.summary.total_files }}</p>
102 |             <p>Severity Distribution:</p>
103 |             <ul>
104 |             {% for severity, count in report.summary.severities.items() %}
105 |                 <li>{{ severity }}: {{ count }}</li>
106 |             {% endfor %}
107 |             </ul>
108 |         </div>
109 |         
110 |         <div class="charts">
111 |             <div class="chart">
112 |                 <img src="data:image/png;base64,{{ charts.severity_distribution }}"
113 |                      alt="Severity Distribution">
114 |             </div>
115 |             <div class="chart">
116 |                 <img src="data:image/png;base64,{{ charts.vulnerability_types }}"
117 |                      alt="Vulnerability Types">
118 |             </div>
119 |             <div class="chart">
120 |                 <img src="data:image/png;base64,{{ charts.file_distribution }}"
121 |                      alt="File Distribution">
122 |             </div>
123 |             <div class="chart">
124 |                 <img src="data:image/png;base64,{{ charts.confidence_distribution }}"
125 |                      alt="Confidence Distribution">
126 |             </div>
127 |         </div>
128 |         
129 |         <h2>Vulnerabilities</h2>
130 |         {% for vuln in report.vulnerabilities %}
131 |         <div class="vulnerability">
132 |             <div class="vulnerability-header">
133 |                 <h3>{{ vuln.name }}</h3>
134 |                 <span class="severity {{ vuln.severity }}">{{ vuln.severity }}</span>
135 |             </div>
136 |             
137 |             <p>{{ vuln.description }}</p>
138 |             
139 |             {% if vuln.cwe_id %}
140 |             <p>CWE: {{ vuln.cwe_id }}</p>
141 |             {% endif %}
142 |             
143 |             {% if vuln.cve_id %}
144 |             <p>CVE: {{ vuln.cve_id }}</p>
145 |             {% endif %}
146 |             
147 |             <p>File: {{ vuln.file }}:{{ vuln.line_start }}-{{ vuln.line_end }}</p>
148 |             <p>Confidence: {{ "%.2f"|format(vuln.confidence) }}</p>
149 |             
150 |             <div class="code">{{ vuln.matched_code }}</div>
151 |             
152 |             <div class="context">
153 |                 <h4>Context</h4>
154 |                 <p>Imports: {{ vuln.context.imports|length }}</p>
155 |                 <p>Functions: {{ vuln.context.functions|length }}</p>
156 |                 <p>Classes: {{ vuln.context.classes|length }}</p>
157 |                 <p>Variables: {{ vuln.context.variables|length }}</p>
158 |             </div>
159 |         </div>
160 |         {% endfor %}
161 |         
162 |         <div class="footer">
163 |             Generated at {{ generated_at }}
164 |         </div>
165 |     </div>
166 | </body>
167 | </html>
168 | 


--------------------------------------------------------------------------------
/movery/tests/security/test_security.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | import sys
  4 | import tempfile
  5 | import shutil
  6 | import subprocess
  7 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
  8 | 
  9 | from movery.detectors.vulnerability import VulnerabilityDetector
 10 | from movery.utils.security import SecurityChecker
 11 | 
 12 | class TestSecurity(unittest.TestCase):
 13 |     def setUp(self):
 14 |         """设置测试环境"""
 15 |         self.test_dir = tempfile.mkdtemp()
 16 |         self.security_checker = SecurityChecker()
 17 |         self.detector = VulnerabilityDetector()
 18 | 
 19 |     def create_test_file(self, content):
 20 |         """创建测试文件"""
 21 |         file_path = os.path.join(self.test_dir, 'test_file.py')
 22 |         with open(file_path, 'w') as f:
 23 |             f.write(content)
 24 |         return file_path
 25 | 
 26 |     def test_memory_limit(self):
 27 |         """测试内存限制"""
 28 |         # 创建一个可能导致内存溢出的文件
 29 |         test_file = self.create_test_file('''
 30 |         def memory_intensive():
 31 |             large_list = [i for i in range(10**8)]  # 尝试创建大列表
 32 |             return large_list
 33 |         ''')
 34 | 
 35 |         # 检查内存使用
 36 |         memory_usage = self.security_checker.check_memory_usage(test_file)
 37 |         self.assertLess(memory_usage, 8 * 1024 * 1024 * 1024)  # 8GB限制
 38 | 
 39 |     def test_execution_timeout(self):
 40 |         """测试执行超时"""
 41 |         # 创建一个可能导致无限循环的文件
 42 |         test_file = self.create_test_file('''
 43 |         def infinite_loop():
 44 |             while True:
 45 |                 pass
 46 |         ''')
 47 | 
 48 |         # 检查执行时间
 49 |         with self.assertRaises(TimeoutError):
 50 |             self.security_checker.check_execution_time(test_file, timeout=5)
 51 | 
 52 |     def test_file_access(self):
 53 |         """测试文件访问限制"""
 54 |         # 创建测试文件
 55 |         test_file = self.create_test_file('''
 56 |         import os
 57 | 
 58 |         def access_sensitive_file():
 59 |             with open('/etc/passwd', 'r') as f:
 60 |                 return f.read()
 61 |         ''')
 62 | 
 63 |         # 检查文件访问
 64 |         violations = self.security_checker.check_file_access(test_file)
 65 |         self.assertTrue(len(violations) > 0)
 66 |         self.assertIn('/etc/passwd', violations[0])
 67 | 
 68 |     def test_network_access(self):
 69 |         """测试网络访问限制"""
 70 |         # 创建测试文件
 71 |         test_file = self.create_test_file('''
 72 |         import socket
 73 | 
 74 |         def connect_external():
 75 |             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 76 |             sock.connect(('example.com', 80))
 77 |         ''')
 78 | 
 79 |         # 检查网络访问
 80 |         violations = self.security_checker.check_network_access(test_file)
 81 |         self.assertTrue(len(violations) > 0)
 82 |         self.assertIn('socket.connect', violations[0])
 83 | 
 84 |     def test_code_injection(self):
 85 |         """测试代码注入防护"""
 86 |         # 创建测试文件
 87 |         test_file = self.create_test_file('''
 88 |         def execute_input(user_input):
 89 |             exec(user_input)  # 危险的代码执行
 90 |         ''')
 91 | 
 92 |         # 检查代码注入
 93 |         vulnerabilities = self.detector.detect_file(test_file)
 94 |         self.assertTrue(len(vulnerabilities) > 0)
 95 |         self.assertEqual(vulnerabilities[0].severity, 'HIGH')
 96 | 
 97 |     def test_input_validation(self):
 98 |         """测试输入验证"""
 99 |         # 创建测试文件
100 |         test_file = self.create_test_file('''
101 |         def process_input(user_input):
102 |             # 没有验证的输入处理
103 |             return eval(user_input)
104 |         ''')
105 | 
106 |         # 检查输入验证
107 |         issues = self.security_checker.check_input_validation(test_file)
108 |         self.assertTrue(len(issues) > 0)
109 |         self.assertIn('eval', str(issues[0]))
110 | 
111 |     def test_secure_random(self):
112 |         """测试安全随机数生成"""
113 |         # 创建测试文件
114 |         test_file = self.create_test_file('''
115 |         import random
116 | 
117 |         def generate_token():
118 |             return ''.join(random.choice('0123456789ABCDEF') for i in range(32))
119 |         ''')
120 | 
121 |         # 检查随机数生成
122 |         issues = self.security_checker.check_random_generation(test_file)
123 |         self.assertTrue(len(issues) > 0)
124 |         self.assertIn('random.choice', str(issues[0]))
125 | 
126 |     def test_sensitive_data(self):
127 |         """测试敏感数据处理"""
128 |         # 创建测试文件
129 |         test_file = self.create_test_file('''
130 |         def process_password(password):
131 |             print(f"Password is: {password}")  # 敏感信息泄露
132 |             return hash(password)  # 不安全的哈希
133 |         ''')
134 | 
135 |         # 检查敏感数据处理
136 |         issues = self.security_checker.check_sensitive_data(test_file)
137 |         self.assertTrue(len(issues) > 0)
138 |         self.assertIn('password', str(issues[0]).lower())
139 | 
140 |     def test_sandbox_escape(self):
141 |         """测试沙箱逃逸防护"""
142 |         # 创建测试文件
143 |         test_file = self.create_test_file('''
144 |         import subprocess
145 |         import os
146 | 
147 |         def dangerous_operation():
148 |             os.system('rm -rf /')  # 危险的系统命令
149 |             subprocess.call(['chmod', '777', '/etc/passwd'])  # 危险的权限修改
150 |         ''')
151 | 
152 |         # 检查沙箱逃逸
153 |         violations = self.security_checker.check_sandbox_escape(test_file)
154 |         self.assertTrue(len(violations) > 0)
155 |         self.assertIn('os.system', str(violations[0]))
156 | 
157 |     def tearDown(self):
158 |         """清理测试环境"""
159 |         shutil.rmtree(self.test_dir)
160 | 
161 | if __name__ == '__main__':
162 |     unittest.main() 


--------------------------------------------------------------------------------
/movery/tests/unit/test_analyzer.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import sys
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | 
 6 | from movery.analyzers.code_analyzer import CodeAnalyzer
 7 | 
 8 | class TestCodeAnalyzer(unittest.TestCase):
 9 |     def setUp(self):
10 |         self.analyzer = CodeAnalyzer()
11 |         self.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
12 |         if not os.path.exists(self.test_data_dir):
13 |             os.makedirs(self.test_data_dir)
14 | 
15 |     def test_parse_python(self):
16 |         """测试Python代码解析"""
17 |         test_file = os.path.join(self.test_data_dir, 'test_python.py')
18 |         with open(test_file, 'w') as f:
19 |             f.write('''
20 |             def example_function():
21 |                 x = 1
22 |                 y = 2
23 |                 return x + y
24 |             ''')
25 | 
26 |         ast = self.analyzer.parse_file(test_file)
27 |         self.assertIsNotNone(ast)
28 |         self.assertEqual(ast.type, 'Module')
29 | 
30 |     def test_analyze_function(self):
31 |         """测试函数分析"""
32 |         test_file = os.path.join(self.test_data_dir, 'test_function.py')
33 |         with open(test_file, 'w') as f:
34 |             f.write('''
35 |             def process_data(data):
36 |                 result = []
37 |                 for item in data:
38 |                     if item > 0:
39 |                         result.append(item * 2)
40 |                 return result
41 |             ''')
42 | 
43 |         functions = self.analyzer.analyze_functions(test_file)
44 |         self.assertEqual(len(functions), 1)
45 |         self.assertEqual(functions[0].name, 'process_data')
46 |         self.assertTrue(functions[0].has_loop)
47 |         self.assertTrue(functions[0].has_condition)
48 | 
49 |     def test_data_flow(self):
50 |         """测试数据流分析"""
51 |         test_file = os.path.join(self.test_data_dir, 'test_dataflow.py')
52 |         with open(test_file, 'w') as f:
53 |             f.write('''
54 |             def data_flow_example(user_input):
55 |                 data = user_input.strip()
56 |                 processed = data.lower()
57 |                 return processed
58 |             ''')
59 | 
60 |         flows = self.analyzer.analyze_data_flow(test_file)
61 |         self.assertTrue(len(flows) > 0)
62 |         self.assertIn('user_input', flows[0].sources)
63 |         self.assertIn('processed', flows[0].sinks)
64 | 
65 |     def test_complexity_analysis(self):
66 |         """测试复杂度分析"""
67 |         test_file = os.path.join(self.test_data_dir, 'test_complexity.py')
68 |         with open(test_file, 'w') as f:
69 |             f.write('''
70 |             def complex_function(x, y):
71 |                 if x > 0:
72 |                     if y > 0:
73 |                         return x + y
74 |                     else:
75 |                         return x - y
76 |                 else:
77 |                     if y < 0:
78 |                         return -x - y
79 |                     else:
80 |                         return -x + y
81 |             ''')
82 | 
83 |         complexity = self.analyzer.analyze_complexity(test_file)
84 |         self.assertTrue(complexity > 1)
85 |         self.assertEqual(complexity, 4)  # 4个条件分支
86 | 
87 |     def tearDown(self):
88 |         """清理测试数据"""
89 |         import shutil
90 |         if os.path.exists(self.test_data_dir):
91 |             shutil.rmtree(self.test_data_dir)
92 | 
93 | if __name__ == '__main__':
94 |     unittest.main() 


--------------------------------------------------------------------------------
/movery/tests/unit/test_detector.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import os
 3 | import sys
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 5 | 
 6 | from movery.detectors.vulnerability import VulnerabilityDetector
 7 | 
 8 | class TestVulnerabilityDetector(unittest.TestCase):
 9 |     def setUp(self):
10 |         self.detector = VulnerabilityDetector()
11 |         self.test_data_dir = os.path.join(os.path.dirname(__file__), 'test_data')
12 |         if not os.path.exists(self.test_data_dir):
13 |             os.makedirs(self.test_data_dir)
14 | 
15 |     def test_load_signatures(self):
16 |         """测试加载漏洞签名"""
17 |         # 创建测试签名文件
18 |         test_sig_file = os.path.join(self.test_data_dir, 'test_signatures.json')
19 |         with open(test_sig_file, 'w') as f:
20 |             f.write('''
21 |             {
22 |                 "signatures": [
23 |                     {
24 |                         "id": "CWE-78",
25 |                         "name": "OS Command Injection",
26 |                         "severity": "HIGH",
27 |                         "code_patterns": ["os\\.system\\(.*\\)"]
28 |                     }
29 |                 ]
30 |             }
31 |             ''')
32 |         
33 |         self.detector.load_signatures(test_sig_file)
34 |         self.assertEqual(len(self.detector.signatures), 1)
35 |         self.assertEqual(self.detector.signatures[0].id, "CWE-78")
36 | 
37 |     def test_detect_vulnerability(self):
38 |         """测试漏洞检测"""
39 |         # 创建测试代码文件
40 |         test_code_file = os.path.join(self.test_data_dir, 'test_code.py')
41 |         with open(test_code_file, 'w') as f:
42 |             f.write('''
43 |             import os
44 |             def unsafe_function(cmd):
45 |                 os.system(cmd)  # 不安全的系统命令执行
46 |             ''')
47 | 
48 |         matches = self.detector.detect_file(test_code_file)
49 |         self.assertTrue(len(matches) > 0)
50 |         self.assertEqual(matches[0].signature.id, "CWE-78")
51 | 
52 |     def test_false_positive(self):
53 |         """测试误报情况"""
54 |         # 创建安全的测试代码
55 |         test_safe_file = os.path.join(self.test_data_dir, 'test_safe.py')
56 |         with open(test_safe_file, 'w') as f:
57 |             f.write('''
58 |             def safe_function():
59 |                 print("This is safe code")
60 |             ''')
61 | 
62 |         matches = self.detector.detect_file(test_safe_file)
63 |         self.assertEqual(len(matches), 0)
64 | 
65 |     def test_similarity_matching(self):
66 |         """测试相似度匹配"""
67 |         # 创建相似代码测试文件
68 |         test_similar_file = os.path.join(self.test_data_dir, 'test_similar.py')
69 |         with open(test_similar_file, 'w') as f:
70 |             f.write('''
71 |             import subprocess
72 |             def similar_unsafe(command):
73 |                 subprocess.call(command, shell=True)  # 类似的不安全模式
74 |             ''')
75 | 
76 |         matches = self.detector.detect_file(test_similar_file)
77 |         self.assertTrue(len(matches) > 0)
78 |         self.assertTrue(matches[0].confidence > 0.7)
79 | 
80 |     def tearDown(self):
81 |         """清理测试数据"""
82 |         import shutil
83 |         if os.path.exists(self.test_data_dir):
84 |             shutil.rmtree(self.test_data_dir)
85 | 
86 | if __name__ == '__main__':
87 |     unittest.main() 


--------------------------------------------------------------------------------
/movery/tests/unit/test_security.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | import tempfile
  4 | import shutil
  5 | import time
  6 | import threading
  7 | from movery.utils.security import SecurityChecker
  8 | 
  9 | class TestSecurityChecker(unittest.TestCase):
 10 |     def setUp(self):
 11 |         """测试前的准备工作"""
 12 |         self.checker = SecurityChecker()
 13 |         self.test_dir = tempfile.mkdtemp()
 14 |         
 15 |         # 创建测试代码文件
 16 |         self.test_code = '''
 17 | import os
 18 | import sys
 19 | import time
 20 | import random
 21 | import socket
 22 | import subprocess
 23 | 
 24 | def unsafe_memory():
 25 |     # 大量内存分配
 26 |     large_list = [i for i in range(10**7)]
 27 |     return large_list
 28 | 
 29 | def unsafe_execution():
 30 |     # 长时间执行
 31 |     time.sleep(5)
 32 |     return "Done"
 33 | 
 34 | def unsafe_file_access():
 35 |     # 危险的文件操作
 36 |     with open("/etc/passwd", "r") as f:
 37 |         data = f.read()
 38 |     return data
 39 | 
 40 | def unsafe_network():
 41 |     # 未经验证的网络连接
 42 |     sock = socket.socket()
 43 |     sock.connect(("example.com", 80))
 44 |     return sock
 45 | 
 46 | def unsafe_input():
 47 |     # 未验证的输入
 48 |     user_input = input("Enter command: ")
 49 |     os.system(user_input)
 50 | 
 51 | def unsafe_random():
 52 |     # 不安全的随机数生成
 53 |     return random.randint(1, 100)
 54 | 
 55 | def unsafe_sensitive_data():
 56 |     # 敏感数据暴露
 57 |     password = "super_secret_123"
 58 |     print(f"Password is: {password}")
 59 | 
 60 | def unsafe_sandbox():
 61 |     # 沙箱逃逸尝试
 62 |     subprocess.call("rm -rf /", shell=True)
 63 | '''
 64 |         self.test_file = os.path.join(self.test_dir, "test_code.py")
 65 |         with open(self.test_file, "w") as f:
 66 |             f.write(self.test_code)
 67 | 
 68 |     def tearDown(self):
 69 |         """测试后的清理工作"""
 70 |         shutil.rmtree(self.test_dir)
 71 | 
 72 |     def test_check_memory_usage(self):
 73 |         """测试内存使用检查"""
 74 |         result = self.checker.check_memory_usage(self.test_file)
 75 |         self.assertTrue(result["has_issues"])
 76 |         self.assertIn("large_list", result["details"])
 77 |         self.assertGreater(len(result["patterns"]), 0)
 78 | 
 79 |     def test_check_execution_time(self):
 80 |         """测试执行时间检查"""
 81 |         result = self.checker.check_execution_time(self.test_file)
 82 |         self.assertTrue(result["has_issues"])
 83 |         self.assertIn("time.sleep", result["details"])
 84 |         self.assertGreater(len(result["patterns"]), 0)
 85 | 
 86 |     def test_check_file_access(self):
 87 |         """测试文件访问检查"""
 88 |         result = self.checker.check_file_access(self.test_file)
 89 |         self.assertTrue(result["has_issues"])
 90 |         self.assertIn("/etc/passwd", result["details"])
 91 |         self.assertGreater(len(result["patterns"]), 0)
 92 | 
 93 |     def test_check_network_access(self):
 94 |         """测试网络访问检查"""
 95 |         result = self.checker.check_network_access(self.test_file)
 96 |         self.assertTrue(result["has_issues"])
 97 |         self.assertIn("socket.connect", result["details"])
 98 |         self.assertGreater(len(result["patterns"]), 0)
 99 | 
100 |     def test_check_input_validation(self):
101 |         """测试输入验证检查"""
102 |         result = self.checker.check_input_validation(self.test_file)
103 |         self.assertTrue(result["has_issues"])
104 |         self.assertIn("os.system", result["details"])
105 |         self.assertGreater(len(result["patterns"]), 0)
106 | 
107 |     def test_check_random_generation(self):
108 |         """测试随机数生成检查"""
109 |         result = self.checker.check_random_generation(self.test_file)
110 |         self.assertTrue(result["has_issues"])
111 |         self.assertIn("random.randint", result["details"])
112 |         self.assertGreater(len(result["patterns"]), 0)
113 | 
114 |     def test_check_sensitive_data(self):
115 |         """测试敏感数据检查"""
116 |         result = self.checker.check_sensitive_data(self.test_file)
117 |         self.assertTrue(result["has_issues"])
118 |         self.assertIn("password", result["details"])
119 |         self.assertGreater(len(result["patterns"]), 0)
120 | 
121 |     def test_check_sandbox_escape(self):
122 |         """测试沙箱逃逸检查"""
123 |         result = self.checker.check_sandbox_escape(self.test_file)
124 |         self.assertTrue(result["has_issues"])
125 |         self.assertIn("subprocess.call", result["details"])
126 |         self.assertGreater(len(result["patterns"]), 0)
127 | 
128 |     def test_perform_full_check(self):
129 |         """测试完整安全检查"""
130 |         results = self.checker.perform_full_check(self.test_file)
131 |         
132 |         self.assertIsInstance(results, dict)
133 |         self.assertGreater(len(results), 0)
134 |         
135 |         # 验证所有检查项都已执行
136 |         expected_checks = [
137 |             "memory_usage",
138 |             "execution_time",
139 |             "file_access",
140 |             "network_access",
141 |             "input_validation",
142 |             "random_generation",
143 |             "sensitive_data",
144 |             "sandbox_escape"
145 |         ]
146 |         
147 |         for check in expected_checks:
148 |             self.assertIn(check, results)
149 |             self.assertTrue(results[check]["has_issues"])
150 |             self.assertGreater(len(results[check]["patterns"]), 0)
151 | 
152 |     def test_concurrent_checks(self):
153 |         """测试并发安全检查"""
154 |         # 创建多个测试文件
155 |         test_files = []
156 |         for i in range(5):
157 |             file_path = os.path.join(self.test_dir, f"test_code_{i}.py")
158 |             with open(file_path, "w") as f:
159 |                 f.write(self.test_code)
160 |             test_files.append(file_path)
161 |         
162 |         # 并发执行检查
163 |         results = []
164 |         threads = []
165 |         
166 |         def check_file(file_path):
167 |             result = self.checker.perform_full_check(file_path)
168 |             results.append(result)
169 |         
170 |         for file_path in test_files:
171 |             thread = threading.Thread(target=check_file, args=(file_path,))
172 |             threads.append(thread)
173 |             thread.start()
174 |         
175 |         for thread in threads:
176 |             thread.join()
177 |         
178 |         self.assertEqual(len(results), len(test_files))
179 |         for result in results:
180 |             self.assertIsInstance(result, dict)
181 |             self.assertGreater(len(result), 0)
182 | 
183 | if __name__ == '__main__':
184 |     unittest.main() 


--------------------------------------------------------------------------------
/movery/tests/unit/test_vulnerability.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import os
  3 | import json
  4 | import tempfile
  5 | import shutil
  6 | import ast
  7 | from movery.detectors.vulnerability import VulnerabilityDetector, Signature, VulnerabilityMatch
  8 | 
  9 | class TestVulnerabilityDetector(unittest.TestCase):
 10 |     def setUp(self):
 11 |         """测试前的准备工作"""
 12 |         self.detector = VulnerabilityDetector()
 13 |         self.test_dir = tempfile.mkdtemp()
 14 |         
 15 |         # 创建测试签名文件
 16 |         self.signatures = {
 17 |             "signatures": [
 18 |                 {
 19 |                     "id": "CMD001",
 20 |                     "name": "命令注入",
 21 |                     "severity": "high",
 22 |                     "code_patterns": [
 23 |                         "os\\.system\\([^)]*\\)",
 24 |                         "subprocess\\.call\\([^)]*\\)"
 25 |                     ]
 26 |                 },
 27 |                 {
 28 |                     "id": "SQL001",
 29 |                     "name": "SQL注入",
 30 |                     "severity": "high",
 31 |                     "code_patterns": [
 32 |                         "execute\\(['\"][^'\"]*%[^'\"]*['\"]\\)",
 33 |                         "executemany\\(['\"][^'\"]*%[^'\"]*['\"]\\)"
 34 |                     ]
 35 |                 }
 36 |             ]
 37 |         }
 38 |         
 39 |         self.signature_file = os.path.join(self.test_dir, "signatures.json")
 40 |         with open(self.signature_file, "w") as f:
 41 |             json.dump(self.signatures, f)
 42 |             
 43 |         # 创建测试代码文件
 44 |         self.test_code = '''
 45 | import os
 46 | import subprocess
 47 | 
 48 | def unsafe_command():
 49 |     cmd = "ls -l"
 50 |     os.system(cmd)
 51 |     subprocess.call(["echo", "hello"])
 52 | 
 53 | def unsafe_sql():
 54 |     query = "SELECT * FROM users WHERE id = %s"
 55 |     cursor.execute(query % user_id)
 56 | '''
 57 |         self.test_file = os.path.join(self.test_dir, "test_code.py")
 58 |         with open(self.test_file, "w") as f:
 59 |             f.write(self.test_code)
 60 | 
 61 |     def tearDown(self):
 62 |         """测试后的清理工作"""
 63 |         shutil.rmtree(self.test_dir)
 64 | 
 65 |     def test_load_signatures(self):
 66 |         """测试加载签名文件"""
 67 |         self.detector.load_signatures(self.signature_file)
 68 |         
 69 |         self.assertEqual(len(self.detector.signatures), 2)
 70 |         self.assertEqual(self.detector.signatures[0].id, "CMD001")
 71 |         self.assertEqual(self.detector.signatures[0].name, "命令注入")
 72 |         self.assertEqual(len(self.detector.signatures[0].code_patterns), 2)
 73 | 
 74 |     def test_detect_file(self):
 75 |         """测试文件漏洞检测"""
 76 |         self.detector.load_signatures(self.signature_file)
 77 |         matches = self.detector.detect_file(self.test_file)
 78 |         
 79 |         self.assertGreater(len(matches), 0)
 80 |         for match in matches:
 81 |             self.assertIsInstance(match, VulnerabilityMatch)
 82 |             self.assertIsInstance(match.signature, Signature)
 83 |             self.assertGreater(match.confidence, 0.7)
 84 | 
 85 |     def test_analyze_ast(self):
 86 |         """测试AST分析"""
 87 |         self.detector.load_signatures(self.signature_file)
 88 |         with open(self.test_file, 'r') as f:
 89 |             tree = ast.parse(f.read())
 90 |         matches = self.detector.analyze_ast(tree)
 91 |         
 92 |         self.assertGreater(len(matches), 0)
 93 |         for match in matches:
 94 |             self.assertIsInstance(match, VulnerabilityMatch)
 95 |             self.assertGreater(match.line_number, 0)
 96 | 
 97 |     def test_detect_similar_patterns(self):
 98 |         """测试相似模式检测"""
 99 |         similar_code = '''
100 | import os
101 | import subprocess
102 | 
103 | def custom_system(cmd):
104 |     os.system(cmd)  # 直接模式
105 |     
106 | def modified_system(command):
107 |     os.system(command)  # 相似模式
108 | '''
109 |         similar_file = os.path.join(self.test_dir, "similar_code.py")
110 |         with open(similar_file, "w") as f:
111 |             f.write(similar_code)
112 |             
113 |         self.detector.load_signatures(self.signature_file)
114 |         matches = self.detector.detect_similar_patterns(similar_code)
115 |         
116 |         self.assertGreater(len(matches), 0)
117 |         for match in matches:
118 |             self.assertIsInstance(match, VulnerabilityMatch)
119 |             self.assertGreater(match.confidence, 0.8)
120 | 
121 |     def test_calculate_confidence(self):
122 |         """测试置信度计算"""
123 |         test_cases = [
124 |             ("os.system('ls')", r"os\.system\([^)]*\)", 0.8),
125 |             ("subprocess.call(['ls'])", r"subprocess\.call\([^)]*\)", 0.9),
126 |             ("import os; os.system('ls')", r"os\.system\([^)]*\)", 1.0)
127 |         ]
128 |         
129 |         for code, pattern, expected in test_cases:
130 |             confidence = self.detector._calculate_confidence(code, pattern)
131 |             self.assertGreaterEqual(confidence, expected)
132 |             self.assertLessEqual(confidence, 1.0)
133 | 
134 |     def test_calculate_similarity(self):
135 |         """测试相似度计算"""
136 |         test_cases = [
137 |             ("os.system", "os.system", 1.0),
138 |             ("os.system", "subprocess.system", 0.5),
139 |             ("execute", "executemany", 0.7)
140 |         ]
141 |         
142 |         for str1, str2, expected in test_cases:
143 |             similarity = self.detector._calculate_similarity(str1, str2)
144 |             self.assertGreaterEqual(similarity, expected - 0.1)
145 |             self.assertLessEqual(similarity, 1.0)
146 | 
147 | if __name__ == '__main__':
148 |     unittest.main() 


--------------------------------------------------------------------------------
/movery/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .security import SecurityChecker
2 | from .parallel import WorkerPool, ParallelExecutor
3 | from .logging import get_logger
4 | from .memory import MemoryMonitor
5 | 
6 | __all__ = ['SecurityChecker', 'WorkerPool', 'ParallelExecutor', 'get_logger', 'MemoryMonitor'] 


--------------------------------------------------------------------------------
/movery/utils/logging.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Logging utilities for Movery
  3 | """
  4 | import logging
  5 | import sys
  6 | import os
  7 | import time
  8 | from typing import Optional
  9 | from datetime import datetime
 10 | from functools import wraps
 11 | import threading
 12 | from concurrent.futures import ThreadPoolExecutor
 13 | import queue
 14 | import json
 15 | 
 16 | from movery.config.config import config
 17 | 
 18 | class AsyncLogHandler(logging.Handler):
 19 |     """Asynchronous log handler that processes logs in a separate thread"""
 20 |     
 21 |     def __init__(self, capacity: int = 1000):
 22 |         super().__init__()
 23 |         self.queue = queue.Queue(maxsize=capacity)
 24 |         self.executor = ThreadPoolExecutor(max_workers=1)
 25 |         self.running = True
 26 |         self.worker = threading.Thread(target=self._process_logs)
 27 |         self.worker.daemon = True
 28 |         self.worker.start()
 29 |         
 30 |     def emit(self, record: logging.LogRecord):
 31 |         try:
 32 |             self.queue.put_nowait(record)
 33 |         except queue.Full:
 34 |             sys.stderr.write(f"Log queue full, dropping message: {record.getMessage()}\n")
 35 |             
 36 |     def _process_logs(self):
 37 |         while self.running:
 38 |             try:
 39 |                 record = self.queue.get(timeout=0.1)
 40 |                 self.executor.submit(self._write_log, record)
 41 |             except queue.Empty:
 42 |                 continue
 43 |             except Exception as e:
 44 |                 sys.stderr.write(f"Error processing log: {str(e)}\n")
 45 |                 
 46 |     def _write_log(self, record: logging.LogRecord):
 47 |         try:
 48 |             message = self.format(record)
 49 |             with open(config.logging.log_file, "a", encoding="utf-8") as f:
 50 |                 f.write(message + "\n")
 51 |         except Exception as e:
 52 |             sys.stderr.write(f"Error writing log: {str(e)}\n")
 53 |             
 54 |     def close(self):
 55 |         self.running = False
 56 |         self.worker.join()
 57 |         self.executor.shutdown()
 58 |         super().close()
 59 | 
 60 | class ProgressLogger:
 61 |     """Logger for tracking and displaying progress"""
 62 |     
 63 |     def __init__(self, total: int, desc: str = "", interval: float = 0.1):
 64 |         self.total = total
 65 |         self.desc = desc
 66 |         self.interval = interval
 67 |         self.current = 0
 68 |         self.start_time = time.time()
 69 |         self.last_update = 0
 70 |         
 71 |     def update(self, n: int = 1):
 72 |         self.current += n
 73 |         now = time.time()
 74 |         if now - self.last_update >= self.interval:
 75 |             self._display_progress()
 76 |             self.last_update = now
 77 |             
 78 |     def _display_progress(self):
 79 |         percentage = (self.current / self.total) * 100
 80 |         elapsed = time.time() - self.start_time
 81 |         rate = self.current / elapsed if elapsed > 0 else 0
 82 |         eta = (self.total - self.current) / rate if rate > 0 else 0
 83 |         
 84 |         sys.stdout.write(f"\r{self.desc}: [{self.current}/{self.total}] "
 85 |                         f"{percentage:.1f}% Rate: {rate:.1f}/s ETA: {eta:.1f}s")
 86 |         sys.stdout.flush()
 87 |         
 88 |     def finish(self):
 89 |         self._display_progress()
 90 |         sys.stdout.write("\n")
 91 |         sys.stdout.flush()
 92 | 
 93 | class JsonFormatter(logging.Formatter):
 94 |     """Format logs as JSON for better parsing"""
 95 |     
 96 |     def format(self, record: logging.LogRecord) -> str:
 97 |         data = {
 98 |             "timestamp": datetime.fromtimestamp(record.created).isoformat(),
 99 |             "level": record.levelname,
100 |             "logger": record.name,
101 |             "message": record.getMessage(),
102 |             "module": record.module,
103 |             "function": record.funcName,
104 |             "line": record.lineno
105 |         }
106 |         
107 |         if record.exc_info:
108 |             data["exception"] = self.formatException(record.exc_info)
109 |             
110 |         if hasattr(record, "extra"):
111 |             data.update(record.extra)
112 |             
113 |         return json.dumps(data)
114 | 
115 | def setup_logging(log_file: Optional[str] = None, level: str = None):
116 |     """Setup logging configuration"""
117 |     if log_file:
118 |         config.logging.log_file = log_file
119 |     if level:
120 |         config.logging.log_level = level
121 |         
122 |     # Create logs directory in current directory for relative paths
123 |     if not os.path.isabs(config.logging.log_file):
124 |         log_dir = os.path.join(os.getcwd(), "logs")
125 |         config.logging.log_file = os.path.join(log_dir, config.logging.log_file)
126 |     
127 |     # Create log directory if needed
128 |     os.makedirs(os.path.dirname(config.logging.log_file), exist_ok=True)
129 |     
130 |     # Setup root logger
131 |     root_logger = logging.getLogger()
132 |     root_logger.setLevel(config.logging.log_level)
133 |     
134 |     # Console handler
135 |     console_handler = logging.StreamHandler(sys.stdout)
136 |     console_handler.setLevel(logging.INFO)
137 |     console_formatter = logging.Formatter(config.logging.log_format)
138 |     console_handler.setFormatter(console_formatter)
139 |     root_logger.addHandler(console_handler)
140 |     
141 |     # File handler
142 |     file_handler = AsyncLogHandler()
143 |     file_handler.setLevel(logging.DEBUG)
144 |     file_formatter = JsonFormatter()
145 |     file_handler.setFormatter(file_formatter)
146 |     root_logger.addHandler(file_handler)
147 |     
148 | def log_execution_time(logger: Optional[logging.Logger] = None):
149 |     """Decorator to log function execution time"""
150 |     def decorator(func):
151 |         @wraps(func)
152 |         def wrapper(*args, **kwargs):
153 |             start_time = time.time()
154 |             result = func(*args, **kwargs)
155 |             elapsed_time = time.time() - start_time
156 |             
157 |             log = logger or logging.getLogger(func.__module__)
158 |             log.debug(f"{func.__name__} executed in {elapsed_time:.2f} seconds")
159 |             
160 |             return result
161 |         return wrapper
162 |     return decorator
163 | 
164 | def get_logger(name: str) -> logging.Logger:
165 |     """Get a logger instance with the given name"""
166 |     return logging.getLogger(name)
167 | 
168 | # Initialize logging when module is imported
169 | setup_logging() 


--------------------------------------------------------------------------------
/movery/utils/memory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Memory management utilities for Movery
  3 | """
  4 | import os
  5 | import mmap
  6 | import psutil
  7 | import gc
  8 | import sys
  9 | from typing import Optional, Generator, Any
 10 | from contextlib import contextmanager
 11 | import threading
 12 | import weakref
 13 | from collections import OrderedDict
 14 | import logging
 15 | 
 16 | from movery.config.config import config
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | class MemoryMonitor:
 21 |     """Monitor memory usage and enforce limits"""
 22 |     
 23 |     def __init__(self, max_memory: Optional[int] = None):
 24 |         self.max_memory = max_memory or config.processing.max_memory_usage
 25 |         self.process = psutil.Process()
 26 |         self._lock = threading.Lock()
 27 |         self._last_check = 0
 28 |         
 29 |     def get_memory_usage(self) -> int:
 30 |         """Get current memory usage in bytes"""
 31 |         return self.process.memory_info().rss
 32 |         
 33 |     def check_memory(self) -> bool:
 34 |         """Check if memory usage is within limits"""
 35 |         with self._lock:
 36 |             current_usage = self.get_memory_usage()
 37 |             if current_usage > self.max_memory:
 38 |                 logger.warning(f"Memory usage ({current_usage} bytes) exceeds limit "
 39 |                              f"({self.max_memory} bytes)")
 40 |                 return False
 41 |             return True
 42 |             
 43 |     def force_garbage_collection(self):
 44 |         """Force garbage collection"""
 45 |         gc.collect()
 46 |         
 47 |     @contextmanager
 48 |     def monitor_operation(self, operation_name: str):
 49 |         """Context manager to monitor memory during an operation"""
 50 |         start_usage = self.get_memory_usage()
 51 |         try:
 52 |             yield
 53 |         finally:
 54 |             end_usage = self.get_memory_usage()
 55 |             delta = end_usage - start_usage
 56 |             logger.debug(f"Memory delta for {operation_name}: {delta} bytes")
 57 |             if not self.check_memory():
 58 |                 self.force_garbage_collection()
 59 | 
 60 | class LRUCache:
 61 |     """Least Recently Used Cache with memory limit"""
 62 |     
 63 |     def __init__(self, max_size: Optional[int] = None):
 64 |         self.max_size = max_size or config.processing.cache_max_size
 65 |         self._cache = OrderedDict()
 66 |         self._size = 0
 67 |         self._lock = threading.Lock()
 68 |         
 69 |     def get(self, key: str) -> Optional[Any]:
 70 |         """Get item from cache"""
 71 |         with self._lock:
 72 |             if key in self._cache:
 73 |                 value = self._cache.pop(key)
 74 |                 self._cache[key] = value
 75 |                 return value
 76 |             return None
 77 |             
 78 |     def put(self, key: str, value: Any, size: Optional[int] = None):
 79 |         """Put item in cache"""
 80 |         if not size:
 81 |             size = sys.getsizeof(value)
 82 |             
 83 |         if size > self.max_size:
 84 |             logger.warning(f"Item size ({size} bytes) exceeds cache limit "
 85 |                          f"({self.max_size} bytes)")
 86 |             return
 87 |             
 88 |         with self._lock:
 89 |             if key in self._cache:
 90 |                 self._size -= sys.getsizeof(self._cache[key])
 91 |                 
 92 |             while self._size + size > self.max_size and self._cache:
 93 |                 _, removed = self._cache.popitem(last=False)
 94 |                 self._size -= sys.getsizeof(removed)
 95 |                 
 96 |             self._cache[key] = value
 97 |             self._size += size
 98 |             
 99 |     def clear(self):
100 |         """Clear cache"""
101 |         with self._lock:
102 |             self._cache.clear()
103 |             self._size = 0
104 | 
105 | class MemoryMappedFile:
106 |     """Memory mapped file for efficient large file handling"""
107 |     
108 |     def __init__(self, filename: str, mode: str = "r"):
109 |         self.filename = filename
110 |         self.mode = mode
111 |         self._file = None
112 |         self._mmap = None
113 |         
114 |     def __enter__(self):
115 |         access = mmap.ACCESS_READ
116 |         if "w" in self.mode:
117 |             access = mmap.ACCESS_WRITE
118 |             
119 |         self._file = open(self.filename, mode=self.mode + "b")
120 |         self._mmap = mmap.mmap(self._file.fileno(), 0, access=access)
121 |         return self
122 |         
123 |     def __exit__(self, exc_type, exc_val, exc_tb):
124 |         if self._mmap:
125 |             self._mmap.close()
126 |         if self._file:
127 |             self._file.close()
128 |             
129 |     def read(self, size: int = -1) -> bytes:
130 |         """Read from memory mapped file"""
131 |         if size == -1:
132 |             return self._mmap[:]
133 |         return self._mmap[:size]
134 |         
135 |     def write(self, data: bytes):
136 |         """Write to memory mapped file"""
137 |         if "w" not in self.mode:
138 |             raise IOError("File not opened for writing")
139 |         self._mmap.write(data)
140 |         
141 |     def seek(self, offset: int):
142 |         """Seek to position in file"""
143 |         self._mmap.seek(offset)
144 | 
145 | def chunk_iterator(data: Any, chunk_size: Optional[int] = None) -> Generator:
146 |     """Iterator that yields chunks of data"""
147 |     if not chunk_size:
148 |         chunk_size = config.processing.chunk_size
149 |         
150 |     if isinstance(data, (bytes, str)):
151 |         for i in range(0, len(data), chunk_size):
152 |             yield data[i:i + chunk_size]
153 |     elif hasattr(data, "__iter__"):
154 |         chunk = []
155 |         for item in data:
156 |             chunk.append(item)
157 |             if len(chunk) >= chunk_size:
158 |                 yield chunk
159 |                 chunk = []
160 |         if chunk:
161 |             yield chunk
162 |     else:
163 |         raise TypeError(f"Unsupported data type: {type(data)}")
164 | 
165 | # Global memory monitor instance
166 | memory_monitor = MemoryMonitor()
167 | 
168 | # Global cache instance
169 | cache = LRUCache() 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | jinja2>=3.0.0
 2 | plotly>=5.0.0
 3 | pandas>=1.3.0
 4 | psutil>=5.8.0
 5 | tqdm>=4.61.0
 6 | colorama>=0.4.4
 7 | requests>=2.26.0
 8 | beautifulsoup4>=4.9.3
 9 | lxml>=4.6.3
10 | pygments>=2.9.0
11 | typing-extensions>=3.10.0
12 | dataclasses>=0.8;python_version<"3.7" 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Setup script for Re-Movery
 3 | """
 4 | from setuptools import setup, find_packages
 5 | 
 6 | with open("README.md", "r", encoding="utf-8") as f:
 7 |     long_description = f.read()
 8 | 
 9 | setup(
10 |     name="movery",
11 |     version="0.1.0",
12 |     author="heyangxu",
13 |     author_email="",
14 |     description="A tool for discovering modified vulnerable code clones",
15 |     long_description=long_description,
16 |     long_description_content_type="text/markdown",
17 |     url="https://github.com/heyangxu/Re-movery",
18 |     packages=find_packages(),
19 |     classifiers=[
20 |         "Development Status :: 4 - Beta",
21 |         "Intended Audience :: Developers",
22 |         "Topic :: Security",
23 |         "Topic :: Software Development :: Quality Assurance",
24 |         "License :: OSI Approved :: MIT License",
25 |         "Programming Language :: Python :: 3",
26 |         "Programming Language :: Python :: 3.7",
27 |         "Programming Language :: Python :: 3.8",
28 |         "Programming Language :: Python :: 3.9",
29 |         "Programming Language :: Python :: 3.10",
30 |         "Operating System :: OS Independent",
31 |     ],
32 |     python_requires=">=3.7",
33 |     install_requires=[
34 |         "pytest>=7.3.1",
35 |         "coverage>=7.2.7",
36 |         "jinja2>=3.0.0",
37 |         "plotly>=5.0.0",
38 |         "pandas>=1.3.0",
39 |         "psutil>=5.8.0",
40 |         "tqdm>=4.61.0",
41 |         "colorama>=0.4.4",
42 |         "requests>=2.26.0",
43 |         "beautifulsoup4>=4.9.3",
44 |         "lxml>=4.6.3",
45 |         "pygments>=2.9.0",
46 |         "typing-extensions>=3.10.0",
47 |         "dataclasses>=0.8;python_version<'3.7'",
48 |     ],
49 |     entry_points={
50 |         "console_scripts": [
51 |             "movery=movery.main:main",
52 |         ],
53 |     },
54 |     package_data={
55 |         "movery": [
56 |             "templates/*.html",
57 |             "config/*.json",
58 |         ],
59 |     },
60 |     include_package_data=True,
61 |     zip_safe=False,
62 | ) 


--------------------------------------------------------------------------------
/signatures.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "signatures": [
  3 |         {
  4 |             "id": "CWE-78",
  5 |             "name": "OS Command Injection",
  6 |             "description": "The software constructs all or part of an OS command using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the intended OS command when it is sent to a downstream component.",
  7 |             "severity": "CRITICAL",
  8 |             "cwe_id": "CWE-78",
  9 |             "affected_languages": ["python", "php", "javascript"],
 10 |             "code_patterns": [
 11 |                 "os\\.system\\(.*\\)",
 12 |                 "subprocess\\.call\\(.*shell\\s*=\\s*True.*\\)",
 13 |                 "exec\\(.*\\)",
 14 |                 "eval\\(.*\\)"
 15 |             ],
 16 |             "fix_patterns": [
 17 |                 "shlex.quote(command)",
 18 |                 "subprocess.run([command], shell=False)",
 19 |                 "ast.literal_eval(input)"
 20 |             ],
 21 |             "context_patterns": [
 22 |                 "import\\s+os",
 23 |                 "import\\s+subprocess",
 24 |                 "import\\s+shlex"
 25 |             ]
 26 |         },
 27 |         {
 28 |             "id": "CWE-89",
 29 |             "name": "SQL Injection",
 30 |             "description": "The software constructs all or part of an SQL command using externally-influenced input from an upstream component, but it does not neutralize or incorrectly neutralizes special elements that could modify the intended SQL command when it is sent to a downstream component.",
 31 |             "severity": "CRITICAL",
 32 |             "cwe_id": "CWE-89",
 33 |             "affected_languages": ["python", "php", "java"],
 34 |             "code_patterns": [
 35 |                 "cursor\\.execute\\(.*%.*\\)",
 36 |                 "cursor\\.execute\\(.*\\+.*\\)",
 37 |                 "cursor\\.executemany\\(.*%.*\\)",
 38 |                 "mysql_query\\(.*\\$.*\\)"
 39 |             ],
 40 |             "fix_patterns": [
 41 |                 "cursor.execute(query, params)",
 42 |                 "cursor.executemany(query, params)",
 43 |                 "prepared_statement.setString(1, input)"
 44 |             ],
 45 |             "context_patterns": [
 46 |                 "import\\s+sqlite3",
 47 |                 "import\\s+mysql",
 48 |                 "import\\s+psycopg2"
 49 |             ]
 50 |         },
 51 |         {
 52 |             "id": "CWE-22",
 53 |             "name": "Path Traversal",
 54 |             "description": "The software uses external input to construct a pathname that is intended to identify a file or directory that is located underneath a restricted parent directory, but the software does not properly neutralize special elements within the pathname that can cause the pathname to resolve to a location that is outside of the restricted directory.",
 55 |             "severity": "HIGH",
 56 |             "cwe_id": "CWE-22",
 57 |             "affected_languages": ["python", "php", "java", "javascript"],
 58 |             "code_patterns": [
 59 |                 "open\\(.*\\+.*\\)",
 60 |                 "file_get_contents\\(.*\\$.*\\)",
 61 |                 "new\\s+File\\(.*\\+.*\\)"
 62 |             ],
 63 |             "fix_patterns": [
 64 |                 "os.path.abspath(os.path.join(base_dir, filename))",
 65 |                 "os.path.normpath(path)",
 66 |                 "Path(path).resolve().is_relative_to(base_dir)"
 67 |             ],
 68 |             "context_patterns": [
 69 |                 "import\\s+os",
 70 |                 "from\\s+pathlib\\s+import\\s+Path"
 71 |             ]
 72 |         },
 73 |         {
 74 |             "id": "CWE-79",
 75 |             "name": "Cross-site Scripting (XSS)",
 76 |             "description": "The software does not neutralize or incorrectly neutralizes user-controllable input before it is placed in output that is used as a web page that is served to other users.",
 77 |             "severity": "HIGH",
 78 |             "cwe_id": "CWE-79",
 79 |             "affected_languages": ["python", "php", "javascript"],
 80 |             "code_patterns": [
 81 |                 "innerHTML\\s*=.*",
 82 |                 "document\\.write\\(.*\\)",
 83 |                 "\\$\\(.*\\)\\.html\\(.*\\)"
 84 |             ],
 85 |             "fix_patterns": [
 86 |                 "textContent = content",
 87 |                 "innerText = content",
 88 |                 "createElement('div')"
 89 |             ],
 90 |             "context_patterns": [
 91 |                 "<script>",
 92 |                 "document\\.",
 93 |                 "\\$\\("
 94 |             ]
 95 |         },
 96 |         {
 97 |             "id": "CWE-200",
 98 |             "name": "Information Exposure",
 99 |             "description": "The software exposes sensitive information to an actor that is not explicitly authorized to have access to that information.",
100 |             "severity": "MEDIUM",
101 |             "cwe_id": "CWE-200",
102 |             "affected_languages": ["python", "php", "java", "javascript"],
103 |             "code_patterns": [
104 |                 "print\\(.*exception.*\\)",
105 |                 "console\\.log\\(.*error.*\\)",
106 |                 "printStackTrace\\(\\)"
107 |             ],
108 |             "fix_patterns": [
109 |                 "logger.error(str(e))",
110 |                 "log.error('Error occurred', exc_info=True)",
111 |                 "console.error('Error:', error.message)"
112 |             ],
113 |             "context_patterns": [
114 |                 "try\\s*:",
115 |                 "catch\\s*\\(",
116 |                 "except\\s+"
117 |             ]
118 |         }
119 |     ]
120 | } 


--------------------------------------------------------------------------------
/signatures.json.example:
--------------------------------------------------------------------------------
  1 | {
  2 |     "signatures": [
  3 |         {
  4 |             "id": "CWE-78",
  5 |             "name": "OS命令注入",
  6 |             "description": "应用程序在构造操作系统命令时，未对用户输入进行适当的验证，可能导致命令注入攻击。",
  7 |             "severity": "高",
  8 |             "references": [
  9 |                 "https://cwe.mitre.org/data/definitions/78.html",
 10 |                 "https://owasp.org/www-community/attacks/Command_Injection"
 11 |             ],
 12 |             "code_patterns": [
 13 |                 "os\\.system\\(.*\\)",
 14 |                 "exec\\.Command\\(.*\\)",
 15 |                 "shell\\.Run\\(.*\\)"
 16 |             ],
 17 |             "fix_suggestions": [
 18 |                 "使用参数化命令执行",
 19 |                 "对用户输入进行严格的验证和过滤",
 20 |                 "使用安全的API替代直接的命令执行"
 21 |             ]
 22 |         },
 23 |         {
 24 |             "id": "CWE-89",
 25 |             "name": "SQL注入",
 26 |             "description": "应用程序在构造SQL查询时，未对用户输入进行适当的验证，可能导致SQL注入攻击。",
 27 |             "severity": "高",
 28 |             "references": [
 29 |                 "https://cwe.mitre.org/data/definitions/89.html",
 30 |                 "https://owasp.org/www-community/attacks/SQL_Injection"
 31 |             ],
 32 |             "code_patterns": [
 33 |                 "db\\.Query\\(.*\\+.*\\)",
 34 |                 "db\\.Exec\\(.*\\+.*\\)",
 35 |                 "sql\\.Raw\\(.*\\)"
 36 |             ],
 37 |             "fix_suggestions": [
 38 |                 "使用参数化查询",
 39 |                 "使用ORM框架",
 40 |                 "对用户输入进行验证和转义"
 41 |             ]
 42 |         },
 43 |         {
 44 |             "id": "CWE-200",
 45 |             "name": "敏感信息泄露",
 46 |             "description": "应用程序可能在日志、错误消息或响应中泄露敏感信息。",
 47 |             "severity": "中",
 48 |             "references": [
 49 |                 "https://cwe.mitre.org/data/definitions/200.html"
 50 |             ],
 51 |             "code_patterns": [
 52 |                 "log\\.Print\\(.*password.*\\)",
 53 |                 "fmt\\.Printf\\(.*secret.*\\)",
 54 |                 "\\.Debug\\(.*key.*\\)"
 55 |             ],
 56 |             "fix_suggestions": [
 57 |                 "避免记录敏感信息",
 58 |                 "使用适当的日志级别",
 59 |                 "实现敏感数据的脱敏处理"
 60 |             ]
 61 |         },
 62 |         {
 63 |             "id": "CWE-22",
 64 |             "name": "路径遍历",
 65 |             "description": "应用程序在处理文件路径时，未对用户输入进行适当的验证，可能导致路径遍历攻击。",
 66 |             "severity": "高",
 67 |             "references": [
 68 |                 "https://cwe.mitre.org/data/definitions/22.html",
 69 |                 "https://owasp.org/www-community/attacks/Path_Traversal"
 70 |             ],
 71 |             "code_patterns": [
 72 |                 "os\\.Open\\(.*\\)",
 73 |                 "ioutil\\.ReadFile\\(.*\\)",
 74 |                 "os\\.ReadFile\\(.*\\)"
 75 |             ],
 76 |             "fix_suggestions": [
 77 |                 "使用filepath.Clean()规范化路径",
 78 |                 "限制文件操作在特定目录内",
 79 |                 "验证文件路径不包含危险字符"
 80 |             ]
 81 |         },
 82 |         {
 83 |             "id": "CWE-326",
 84 |             "name": "弱加密",
 85 |             "description": "应用程序使用了不安全或已过时的加密算法。",
 86 |             "severity": "中",
 87 |             "references": [
 88 |                 "https://cwe.mitre.org/data/definitions/326.html"
 89 |             ],
 90 |             "code_patterns": [
 91 |                 "md5\\.New\\(\\)",
 92 |                 "sha1\\.New\\(\\)",
 93 |                 "des\\.NewCipher\\(.*\\)"
 94 |             ],
 95 |             "fix_suggestions": [
 96 |                 "使用强加密算法（如AES）",
 97 |                 "使用足够长度的密钥",
 98 |                 "定期更新加密算法"
 99 |             ]
100 |         }
101 |     ]
102 | } 


--------------------------------------------------------------------------------
/src/config/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration settings for Movery
  3 | """
  4 | import os
  5 | from typing import Dict, List, Optional
  6 | from dataclasses import dataclass
  7 | 
  8 | @dataclass
  9 | class ProcessingConfig:
 10 |     # Number of parallel processes to use
 11 |     num_processes: int = os.cpu_count() or 4
 12 |     
 13 |     # Memory settings
 14 |     max_memory_usage: int = 8 * 1024 * 1024 * 1024  # 8GB
 15 |     chunk_size: int = 1024 * 1024  # 1MB
 16 |     
 17 |     # Cache settings
 18 |     enable_cache: bool = True
 19 |     cache_dir: str = ".cache"
 20 |     cache_max_size: int = 1024 * 1024 * 1024  # 1GB
 21 |     
 22 |     # Language support
 23 |     supported_languages: List[str] = ["c", "cpp", "java", "python", "go", "javascript"]
 24 |     file_extensions: Dict[str, List[str]] = {
 25 |         "c": [".c", ".h"],
 26 |         "cpp": [".cpp", ".hpp", ".cc", ".hh"],
 27 |         "java": [".java"],
 28 |         "python": [".py"],
 29 |         "go": [".go"],
 30 |         "javascript": [".js", ".jsx", ".ts", ".tsx"]
 31 |     }
 32 | 
 33 | @dataclass 
 34 | class DetectorConfig:
 35 |     # Vulnerability detection settings
 36 |     min_similarity: float = 0.8
 37 |     max_edit_distance: int = 10
 38 |     context_lines: int = 3
 39 |     
 40 |     # Analysis depth
 41 |     max_ast_depth: int = 50
 42 |     max_cfg_nodes: int = 1000
 43 |     
 44 |     # Pattern matching
 45 |     enable_semantic_match: bool = True
 46 |     enable_syntax_match: bool = True
 47 |     enable_token_match: bool = True
 48 |     
 49 |     # Reporting
 50 |     report_format: str = "html"
 51 |     report_dir: str = "reports"
 52 |     
 53 |     # Filtering
 54 |     exclude_patterns: List[str] = [
 55 |         "**/test/*",
 56 |         "**/tests/*", 
 57 |         "**/vendor/*",
 58 |         "**/node_modules/*"
 59 |     ]
 60 | 
 61 | @dataclass
 62 | class LoggingConfig:
 63 |     # Log settings
 64 |     log_level: str = "INFO"
 65 |     log_file: str = "movery.log"
 66 |     log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 67 |     
 68 |     # Performance monitoring
 69 |     enable_profiling: bool = False
 70 |     profile_output: str = "profile.stats"
 71 |     
 72 |     # Progress reporting
 73 |     show_progress: bool = True
 74 |     progress_interval: int = 1  # seconds
 75 | 
 76 | @dataclass
 77 | class SecurityConfig:
 78 |     # Security settings
 79 |     max_file_size: int = 100 * 1024 * 1024  # 100MB
 80 |     allowed_schemes: List[str] = ["file", "http", "https"]
 81 |     enable_sandbox: bool = True
 82 |     sandbox_timeout: int = 60  # seconds
 83 |     
 84 |     # Access control
 85 |     require_auth: bool = False
 86 |     auth_token: Optional[str] = None
 87 |     
 88 |     # Rate limiting
 89 |     rate_limit: int = 100  # requests per minute
 90 |     rate_limit_period: int = 60  # seconds
 91 | 
 92 | class MoveryConfig:
 93 |     def __init__(self):
 94 |         self.processing = ProcessingConfig()
 95 |         self.detector = DetectorConfig()
 96 |         self.logging = LoggingConfig()
 97 |         self.security = SecurityConfig()
 98 |         
 99 |     @classmethod
100 |     def from_file(cls, config_file: str) -> "MoveryConfig":
101 |         """Load configuration from file"""
102 |         # TODO: Implement config file loading
103 |         return cls()
104 |         
105 |     def to_file(self, config_file: str):
106 |         """Save configuration to file"""
107 |         # TODO: Implement config file saving
108 |         pass
109 |         
110 |     def validate(self) -> bool:
111 |         """Validate configuration settings"""
112 |         # TODO: Add validation logic
113 |         return True
114 | 
115 | # Global configuration instance
116 | config = MoveryConfig() 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Main entry point for Movery
  3 | """
  4 | import os
  5 | import sys
  6 | import argparse
  7 | import logging
  8 | import json
  9 | from typing import List, Dict, Optional
 10 | import time
 11 | from pathlib import Path
 12 | import concurrent.futures
 13 | import traceback
 14 | 
 15 | from .config.config import config, MoveryConfig
 16 | from .utils.logging import setup_logging, get_logger
 17 | from .utils.memory import memory_monitor
 18 | from .utils.parallel import worker_pool
 19 | from .analyzers.language import LanguageAnalyzerFactory
 20 | from .detectors.vulnerability import detector
 21 | from .reporters.html import reporter
 22 | 
 23 | logger = get_logger(__name__)
 24 | 
 25 | def parse_args():
 26 |     """Parse command line arguments"""
 27 |     parser = argparse.ArgumentParser(
 28 |         description="Movery - A tool for discovering modified vulnerable code clones"
 29 |     )
 30 |     
 31 |     parser.add_argument(
 32 |         "target",
 33 |         help="Target program or directory to analyze"
 34 |     )
 35 |     
 36 |     parser.add_argument(
 37 |         "-c", "--config",
 38 |         help="Path to configuration file",
 39 |         default="config.json"
 40 |     )
 41 |     
 42 |     parser.add_argument(
 43 |         "-s", "--signatures",
 44 |         help="Path to vulnerability signatures file",
 45 |         default="signatures.json"
 46 |     )
 47 |     
 48 |     parser.add_argument(
 49 |         "-o", "--output",
 50 |         help="Output directory for reports",
 51 |         default="reports"
 52 |     )
 53 |     
 54 |     parser.add_argument(
 55 |         "-j", "--jobs",
 56 |         help="Number of parallel jobs",
 57 |         type=int,
 58 |         default=None
 59 |     )
 60 |     
 61 |     parser.add_argument(
 62 |         "-v", "--verbose",
 63 |         help="Enable verbose output",
 64 |         action="store_true"
 65 |     )
 66 |     
 67 |     parser.add_argument(
 68 |         "--cache",
 69 |         help="Enable result caching",
 70 |         action="store_true"
 71 |     )
 72 |     
 73 |     return parser.parse_args()
 74 | 
 75 | def load_config(config_file: str) -> MoveryConfig:
 76 |     """Load configuration from file"""
 77 |     if os.path.exists(config_file):
 78 |         return MoveryConfig.from_file(config_file)
 79 |     return MoveryConfig()
 80 | 
 81 | def find_source_files(target: str) -> List[str]:
 82 |     """Find all source files in target"""
 83 |     source_files = []
 84 |     
 85 |     for root, _, files in os.walk(target):
 86 |         for file in files:
 87 |             file_path = os.path.join(root, file)
 88 |             
 89 |             # Skip files larger than limit
 90 |             if os.path.getsize(file_path) > config.security.max_file_size:
 91 |                 logger.warning(f"Skipping large file: {file_path}")
 92 |                 continue
 93 |                 
 94 |             # Skip files matching exclude patterns
 95 |             skip = False
 96 |             for pattern in config.detector.exclude_patterns:
 97 |                 if Path(file_path).match(pattern):
 98 |                     skip = True
 99 |                     break
100 |             if skip:
101 |                 continue
102 |                 
103 |             # Check if file is supported
104 |             if LanguageAnalyzerFactory.get_analyzer(file_path):
105 |                 source_files.append(file_path)
106 |                 
107 |     return source_files
108 | 
109 | def analyze_file(file: str) -> List[Dict]:
110 |     """Analyze single file for vulnerabilities"""
111 |     try:
112 |         matches = detector.detect(file)
113 |         return [match.to_dict() for match in matches]
114 |     except Exception as e:
115 |         logger.error(f"Error analyzing file {file}: {str(e)}")
116 |         logger.debug(traceback.format_exc())
117 |         return []
118 | 
119 | def main():
120 |     """Main entry point"""
121 |     start_time = time.time()
122 |     
123 |     # Parse arguments
124 |     args = parse_args()
125 |     
126 |     # Setup logging
127 |     log_level = logging.DEBUG if args.verbose else logging.INFO
128 |     setup_logging(log_level=log_level)
129 |     
130 |     logger.info("Starting Movery...")
131 |     
132 |     try:
133 |         # Load configuration
134 |         config = load_config(args.config)
135 |         if args.jobs:
136 |             config.processing.num_processes = args.jobs
137 |         config.processing.enable_cache = args.cache
138 |         
139 |         # Load vulnerability signatures
140 |         detector.load_signatures(args.signatures)
141 |         
142 |         # Find source files
143 |         target_path = os.path.abspath(args.target)
144 |         if not os.path.exists(target_path):
145 |             raise FileNotFoundError(f"Target not found: {target_path}")
146 |             
147 |         logger.info(f"Analyzing target: {target_path}")
148 |         source_files = find_source_files(target_path)
149 |         logger.info(f"Found {len(source_files)} source files")
150 |         
151 |         # Start worker pool
152 |         worker_pool.start()
153 |         
154 |         # Process files in parallel
155 |         all_matches = []
156 |         with concurrent.futures.ThreadPoolExecutor(
157 |             max_workers=config.processing.num_processes
158 |         ) as executor:
159 |             future_to_file = {
160 |                 executor.submit(analyze_file, file): file
161 |                 for file in source_files
162 |             }
163 |             
164 |             for future in concurrent.futures.as_completed(future_to_file):
165 |                 file = future_to_file[future]
166 |                 try:
167 |                     matches = future.result()
168 |                     if matches:
169 |                         all_matches.extend(matches)
170 |                         logger.info(
171 |                             f"Found {len(matches)} vulnerabilities in {file}")
172 |                 except Exception as e:
173 |                     logger.error(f"Error processing {file}: {str(e)}")
174 |                     
175 |         # Generate report
176 |         if all_matches:
177 |             os.makedirs(args.output, exist_ok=True)
178 |             report_file = os.path.join(
179 |                 args.output,
180 |                 f"report_{int(time.time())}.html"
181 |             )
182 |             reporter.generate_report(all_matches, report_file)
183 |             logger.info(f"Generated report: {report_file}")
184 |         else:
185 |             logger.info("No vulnerabilities found")
186 |             
187 |         elapsed_time = time.time() - start_time
188 |         logger.info(f"Analysis completed in {elapsed_time:.2f} seconds")
189 |         
190 |     except Exception as e:
191 |         logger.error(f"Error: {str(e)}")
192 |         logger.debug(traceback.format_exc())
193 |         sys.exit(1)
194 |     finally:
195 |         worker_pool.stop()
196 | 
197 | if __name__ == "__main__":
198 |     main() 


--------------------------------------------------------------------------------
/src/utils/logging.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Logging utilities for Movery
  3 | """
  4 | import logging
  5 | import sys
  6 | import os
  7 | import time
  8 | from typing import Optional
  9 | from datetime import datetime
 10 | from functools import wraps
 11 | import threading
 12 | from concurrent.futures import ThreadPoolExecutor
 13 | import queue
 14 | import json
 15 | 
 16 | from ..config.config import config
 17 | 
 18 | class AsyncLogHandler(logging.Handler):
 19 |     """Asynchronous log handler that processes logs in a separate thread"""
 20 |     
 21 |     def __init__(self, capacity: int = 1000):
 22 |         super().__init__()
 23 |         self.queue = queue.Queue(maxsize=capacity)
 24 |         self.executor = ThreadPoolExecutor(max_workers=1)
 25 |         self.running = True
 26 |         self.worker = threading.Thread(target=self._process_logs)
 27 |         self.worker.daemon = True
 28 |         self.worker.start()
 29 |         
 30 |     def emit(self, record: logging.LogRecord):
 31 |         try:
 32 |             self.queue.put_nowait(record)
 33 |         except queue.Full:
 34 |             sys.stderr.write(f"Log queue full, dropping message: {record.getMessage()}\n")
 35 |             
 36 |     def _process_logs(self):
 37 |         while self.running:
 38 |             try:
 39 |                 record = self.queue.get(timeout=0.1)
 40 |                 self.executor.submit(self._write_log, record)
 41 |             except queue.Empty:
 42 |                 continue
 43 |             except Exception as e:
 44 |                 sys.stderr.write(f"Error processing log: {str(e)}\n")
 45 |                 
 46 |     def _write_log(self, record: logging.LogRecord):
 47 |         try:
 48 |             message = self.format(record)
 49 |             with open(config.logging.log_file, "a", encoding="utf-8") as f:
 50 |                 f.write(message + "\n")
 51 |         except Exception as e:
 52 |             sys.stderr.write(f"Error writing log: {str(e)}\n")
 53 |             
 54 |     def close(self):
 55 |         self.running = False
 56 |         self.worker.join()
 57 |         self.executor.shutdown()
 58 |         super().close()
 59 | 
 60 | class ProgressLogger:
 61 |     """Logger for tracking and displaying progress"""
 62 |     
 63 |     def __init__(self, total: int, desc: str = "", interval: float = 0.1):
 64 |         self.total = total
 65 |         self.desc = desc
 66 |         self.interval = interval
 67 |         self.current = 0
 68 |         self.start_time = time.time()
 69 |         self.last_update = 0
 70 |         
 71 |     def update(self, n: int = 1):
 72 |         self.current += n
 73 |         now = time.time()
 74 |         if now - self.last_update >= self.interval:
 75 |             self._display_progress()
 76 |             self.last_update = now
 77 |             
 78 |     def _display_progress(self):
 79 |         percentage = (self.current / self.total) * 100
 80 |         elapsed = time.time() - self.start_time
 81 |         rate = self.current / elapsed if elapsed > 0 else 0
 82 |         eta = (self.total - self.current) / rate if rate > 0 else 0
 83 |         
 84 |         sys.stdout.write(f"\r{self.desc}: [{self.current}/{self.total}] "
 85 |                         f"{percentage:.1f}% Rate: {rate:.1f}/s ETA: {eta:.1f}s")
 86 |         sys.stdout.flush()
 87 |         
 88 |     def finish(self):
 89 |         self._display_progress()
 90 |         sys.stdout.write("\n")
 91 |         sys.stdout.flush()
 92 | 
 93 | class JsonFormatter(logging.Formatter):
 94 |     """Format logs as JSON for better parsing"""
 95 |     
 96 |     def format(self, record: logging.LogRecord) -> str:
 97 |         data = {
 98 |             "timestamp": datetime.fromtimestamp(record.created).isoformat(),
 99 |             "level": record.levelname,
100 |             "logger": record.name,
101 |             "message": record.getMessage(),
102 |             "module": record.module,
103 |             "function": record.funcName,
104 |             "line": record.lineno
105 |         }
106 |         
107 |         if record.exc_info:
108 |             data["exception"] = self.formatException(record.exc_info)
109 |             
110 |         if hasattr(record, "extra"):
111 |             data.update(record.extra)
112 |             
113 |         return json.dumps(data)
114 | 
115 | def setup_logging(log_file: Optional[str] = None):
116 |     """Setup logging configuration"""
117 |     if log_file:
118 |         config.logging.log_file = log_file
119 |         
120 |     # Create log directory if needed
121 |     os.makedirs(os.path.dirname(config.logging.log_file), exist_ok=True)
122 |     
123 |     # Setup root logger
124 |     root_logger = logging.getLogger()
125 |     root_logger.setLevel(config.logging.log_level)
126 |     
127 |     # Console handler
128 |     console_handler = logging.StreamHandler(sys.stdout)
129 |     console_handler.setLevel(logging.INFO)
130 |     console_formatter = logging.Formatter(config.logging.log_format)
131 |     console_handler.setFormatter(console_formatter)
132 |     root_logger.addHandler(console_handler)
133 |     
134 |     # File handler
135 |     file_handler = AsyncLogHandler()
136 |     file_handler.setLevel(logging.DEBUG)
137 |     file_formatter = JsonFormatter()
138 |     file_handler.setFormatter(file_formatter)
139 |     root_logger.addHandler(file_handler)
140 |     
141 | def log_execution_time(logger: Optional[logging.Logger] = None):
142 |     """Decorator to log function execution time"""
143 |     def decorator(func):
144 |         @wraps(func)
145 |         def wrapper(*args, **kwargs):
146 |             start_time = time.time()
147 |             result = func(*args, **kwargs)
148 |             elapsed_time = time.time() - start_time
149 |             
150 |             log = logger or logging.getLogger(func.__module__)
151 |             log.debug(f"{func.__name__} executed in {elapsed_time:.2f} seconds")
152 |             
153 |             return result
154 |         return wrapper
155 |     return decorator
156 | 
157 | def get_logger(name: str) -> logging.Logger:
158 |     """Get a logger instance with the given name"""
159 |     return logging.getLogger(name)
160 | 
161 | # Initialize logging when module is imported
162 | setup_logging() 


--------------------------------------------------------------------------------
/src/utils/memory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Memory management utilities for Movery
  3 | """
  4 | import os
  5 | import mmap
  6 | import psutil
  7 | import gc
  8 | from typing import Optional, Generator, Any
  9 | from contextlib import contextmanager
 10 | import threading
 11 | import weakref
 12 | from collections import OrderedDict
 13 | import logging
 14 | 
 15 | from ..config.config import config
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | class MemoryMonitor:
 20 |     """Monitor memory usage and enforce limits"""
 21 |     
 22 |     def __init__(self, max_memory: Optional[int] = None):
 23 |         self.max_memory = max_memory or config.processing.max_memory_usage
 24 |         self.process = psutil.Process()
 25 |         self._lock = threading.Lock()
 26 |         self._last_check = 0
 27 |         
 28 |     def get_memory_usage(self) -> int:
 29 |         """Get current memory usage in bytes"""
 30 |         return self.process.memory_info().rss
 31 |         
 32 |     def check_memory(self) -> bool:
 33 |         """Check if memory usage is within limits"""
 34 |         with self._lock:
 35 |             current_usage = self.get_memory_usage()
 36 |             if current_usage > self.max_memory:
 37 |                 logger.warning(f"Memory usage ({current_usage} bytes) exceeds limit "
 38 |                              f"({self.max_memory} bytes)")
 39 |                 return False
 40 |             return True
 41 |             
 42 |     def force_garbage_collection(self):
 43 |         """Force garbage collection"""
 44 |         gc.collect()
 45 |         
 46 |     @contextmanager
 47 |     def monitor_operation(self, operation_name: str):
 48 |         """Context manager to monitor memory during an operation"""
 49 |         start_usage = self.get_memory_usage()
 50 |         try:
 51 |             yield
 52 |         finally:
 53 |             end_usage = self.get_memory_usage()
 54 |             delta = end_usage - start_usage
 55 |             logger.debug(f"Memory delta for {operation_name}: {delta} bytes")
 56 |             if not self.check_memory():
 57 |                 self.force_garbage_collection()
 58 | 
 59 | class LRUCache:
 60 |     """Least Recently Used Cache with memory limit"""
 61 |     
 62 |     def __init__(self, max_size: Optional[int] = None):
 63 |         self.max_size = max_size or config.processing.cache_max_size
 64 |         self._cache = OrderedDict()
 65 |         self._size = 0
 66 |         self._lock = threading.Lock()
 67 |         
 68 |     def get(self, key: str) -> Optional[Any]:
 69 |         """Get item from cache"""
 70 |         with self._lock:
 71 |             if key in self._cache:
 72 |                 value = self._cache.pop(key)
 73 |                 self._cache[key] = value
 74 |                 return value
 75 |             return None
 76 |             
 77 |     def put(self, key: str, value: Any, size: Optional[int] = None):
 78 |         """Put item in cache"""
 79 |         if not size:
 80 |             size = sys.getsizeof(value)
 81 |             
 82 |         if size > self.max_size:
 83 |             logger.warning(f"Item size ({size} bytes) exceeds cache limit "
 84 |                          f"({self.max_size} bytes)")
 85 |             return
 86 |             
 87 |         with self._lock:
 88 |             if key in self._cache:
 89 |                 self._size -= sys.getsizeof(self._cache[key])
 90 |                 
 91 |             while self._size + size > self.max_size and self._cache:
 92 |                 _, removed = self._cache.popitem(last=False)
 93 |                 self._size -= sys.getsizeof(removed)
 94 |                 
 95 |             self._cache[key] = value
 96 |             self._size += size
 97 |             
 98 |     def clear(self):
 99 |         """Clear cache"""
100 |         with self._lock:
101 |             self._cache.clear()
102 |             self._size = 0
103 | 
104 | class MemoryMappedFile:
105 |     """Memory mapped file for efficient large file handling"""
106 |     
107 |     def __init__(self, filename: str, mode: str = "r"):
108 |         self.filename = filename
109 |         self.mode = mode
110 |         self._file = None
111 |         self._mmap = None
112 |         
113 |     def __enter__(self):
114 |         access = mmap.ACCESS_READ
115 |         if "w" in self.mode:
116 |             access = mmap.ACCESS_WRITE
117 |             
118 |         self._file = open(self.filename, mode=self.mode + "b")
119 |         self._mmap = mmap.mmap(self._file.fileno(), 0, access=access)
120 |         return self
121 |         
122 |     def __exit__(self, exc_type, exc_val, exc_tb):
123 |         if self._mmap:
124 |             self._mmap.close()
125 |         if self._file:
126 |             self._file.close()
127 |             
128 |     def read(self, size: int = -1) -> bytes:
129 |         """Read from memory mapped file"""
130 |         if size == -1:
131 |             return self._mmap[:]
132 |         return self._mmap[:size]
133 |         
134 |     def write(self, data: bytes):
135 |         """Write to memory mapped file"""
136 |         if "w" not in self.mode:
137 |             raise IOError("File not opened for writing")
138 |         self._mmap.write(data)
139 |         
140 |     def seek(self, offset: int):
141 |         """Seek to position in file"""
142 |         self._mmap.seek(offset)
143 | 
144 | def chunk_iterator(data: Any, chunk_size: Optional[int] = None) -> Generator:
145 |     """Iterator that yields chunks of data"""
146 |     if not chunk_size:
147 |         chunk_size = config.processing.chunk_size
148 |         
149 |     if isinstance(data, (bytes, str)):
150 |         for i in range(0, len(data), chunk_size):
151 |             yield data[i:i + chunk_size]
152 |     elif hasattr(data, "__iter__"):
153 |         chunk = []
154 |         for item in data:
155 |             chunk.append(item)
156 |             if len(chunk) >= chunk_size:
157 |                 yield chunk
158 |                 chunk = []
159 |         if chunk:
160 |             yield chunk
161 |     else:
162 |         raise TypeError(f"Unsupported data type: {type(data)}")
163 | 
164 | # Global memory monitor instance
165 | memory_monitor = MemoryMonitor()
166 | 
167 | # Global cache instance
168 | cache = LRUCache() 


--------------------------------------------------------------------------------