├── fastsymapi ├── sql_db │ ├── __init__.py │ ├── database.py │ ├── models.py │ └── crud.py ├── logging.py ├── __init__.py └── symbols.py ├── .gitignore ├── setup.py ├── requirements.txt ├── fastsymapi_tests.py ├── CONFIGURATION.md ├── README.md ├── .github └── workflows │ └── main.yml └── test_symbols_improved.py /fastsymapi/sql_db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /fastsymapi/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger("uvicorn") 4 | 5 | logger.setLevel("DEBUG") -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .fsymapi/** 2 | *.pdb 3 | *.dll 4 | *.pd_ 5 | *.pyc 6 | *.db 7 | .fsa 8 | .fastsymapi 9 | .fapi 10 | fastsymapi.egg-info 11 | dist 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='fastsymapi', 5 | version='1.2', 6 | packages=find_packages() 7 | ) 8 | -------------------------------------------------------------------------------- /fastsymapi/sql_db/database.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy.orm import sessionmaker 4 | 5 | SQLALCHEMY_DATABASE_URL = "sqlite:///./fsymapi.db" 6 | 7 | engine = create_engine( 8 | SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread":False}, 9 | pool_size=20, 10 | max_overflow=30 11 | ) 12 | 13 | session_local = sessionmaker(autocommit=False, autoflush=False, bind=engine) 14 | 15 | base = declarative_base() 16 | 17 | def get_db() -> sessionmaker: 18 | db = session_local() 19 | try: 20 | yield db 21 | finally: 22 | db.close() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.6.0 3 | certifi==2024.8.30 4 | charset-normalizer==3.4.0 5 | click==8.1.7 6 | colorama==0.4.6 7 | construct==2.10.70 8 | exceptiongroup==1.2.2 9 | fastapi==0.100.0 10 | greenlet==3.1.1 11 | h11==0.14.0 12 | httpcore==1.0.6 13 | httpx==0.27.2 14 | idna==3.4 15 | iniconfig==2.0.0 16 | packaging==24.1 17 | pip-review==1.3.0 18 | pipdeptree==2.23.4 19 | pluggy==1.5.0 20 | pydantic==2.1.1 21 | pydantic_core==2.4.0 22 | pytest==8.3.3 23 | requests==2.31.0 24 | setuptools==75.1.0 25 | sniffio==1.3.0 26 | SQLAlchemy==2.0.19 27 | starlette==0.27.0 28 | typing_extensions==4.7.1 29 | urllib3==2.2.3 30 | uvicorn==0.31.1 31 | -------------------------------------------------------------------------------- /fastsymapi/sql_db/models.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Integer, String, Boolean, UniqueConstraint 2 | from fastsymapi.sql_db.database import base 3 | 4 | class SymbolEntry(base): 5 | __tablename__ = "symbolentry" 6 | id = Column(Integer, primary_key=True, index=True, unique=True) 7 | guid = Column(String, index=True) 8 | pdbname = Column(String, index=True) 9 | pdbfile = Column(String, index=True) 10 | downloading = Column(Boolean, index=True, default=False) 11 | found = Column(Boolean, index=True, default=False) 12 | 13 | # Adds a unique constraint on the guid, pdbfile 14 | __table_args__ = (UniqueConstraint('guid', 'pdbfile'),) 15 | 16 | -------------------------------------------------------------------------------- /fastsymapi/__init__.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastsymapi.sql_db import models 3 | from fastsymapi.logging import logger 4 | from fastsymapi.sql_db.database import engine 5 | from fastsymapi.symbols import sym 6 | 7 | 8 | def create_app(): 9 | """ Create the application context """ 10 | 11 | # Create the database tables 12 | models.base.metadata.create_all(bind=engine) 13 | 14 | # instantiate FastAPI 15 | app = FastAPI() 16 | 17 | # Symbol API 18 | app.include_router(sym) 19 | 20 | logger.info("Starting FastSymApi server...") 21 | 22 | return app 23 | 24 | 25 | app = create_app() 26 | 27 | 28 | @app.get("/health") 29 | def health_check(): 30 | return {"status": "ok"} 31 | -------------------------------------------------------------------------------- /fastsymapi/sql_db/crud.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.orm import Session 2 | from fastsymapi.sql_db import models 3 | 4 | def find_pdb_entry(db: Session, guid: str, pdbfile: str): 5 | """ Find a PDB entry """ 6 | return db.query(models.SymbolEntry).filter(models.SymbolEntry.guid == guid, 7 | models.SymbolEntry.pdbfile == pdbfile).first() 8 | 9 | def find_still_downloading(db: Session): 10 | """ Return all still downloading PDB entries """ 11 | return db.query(models.SymbolEntry).filter(models.SymbolEntry.downloading == True).all() 12 | 13 | def create_pdb_entry(db: Session, guid: str, pdbname: str, pdbfile: str, found: bool = False): 14 | """ Create a new PDB entry """ 15 | pdb_entry = models.SymbolEntry(pdbname=pdbname, guid=guid, pdbfile=pdbfile, found=found) 16 | db.add(pdb_entry) 17 | db.commit() 18 | db.refresh(pdb_entry) 19 | return pdb_entry 20 | 21 | def modify_pdb_entry(db: Session, pdbentry: models.SymbolEntry) -> None: 22 | """ Modify a PDB entry """ 23 | db.add(pdbentry) 24 | db.commit() 25 | db.refresh(pdbentry) -------------------------------------------------------------------------------- /fastsymapi_tests.py: -------------------------------------------------------------------------------- 1 | from fastsymapi.symbols import download_symbol 2 | from fastsymapi.sql_db import models 3 | from unittest.mock import patch, MagicMock 4 | from fastapi.testclient import TestClient 5 | from fastsymapi import app 6 | import pytest 7 | 8 | client = TestClient(app) 9 | 10 | 11 | @pytest.fixture 12 | def mock_gzip_open(): 13 | return MagicMock() 14 | 15 | 16 | def test_fail_get_symbol_api(): 17 | """Test a failed symbol retrieval""" 18 | 19 | response = client.get("/download/symbols/notreal/notreal/pdbfile") 20 | 21 | assert response.status_code == 404 # or whatever status code you expect 22 | 23 | 24 | @patch("gzip.open") 25 | @patch("fastsymapi.symbols.requests.get") 26 | @patch("fastsymapi.symbols.crud.modify_pdb_entry") 27 | @patch("fastsymapi.symbols.crud.create_pdb_entry") 28 | @patch("fastsymapi.symbols.os.path.exists") 29 | @patch("fastsymapi.symbols.os.makedirs") 30 | @patch("fastsymapi.symbols.open", new_callable=MagicMock) 31 | @patch("fastsymapi.symbols.shutil.move") 32 | def test_successful_pdb_download( 33 | self, 34 | mock_move, 35 | mock_open, 36 | mock_makedirs, 37 | mock_exists, 38 | mock_create_pdb_entry, 39 | mock_modify_pdb_entry, 40 | mock_get, 41 | mock_gzip_open, 42 | ): 43 | # Arrange 44 | mock_response = MagicMock() 45 | mock_response.status_code = 200 46 | mock_get.return_value = mock_response 47 | mock_exists.return_value = False 48 | mock_open.return_value.__enter__.return_value = MagicMock() 49 | mock_gzip_open.return_value.__enter__.return_value = MagicMock() 50 | pdbentry = models.SymbolEntry(pdbname="test", guid="test", pdbfile="test") 51 | db = MagicMock() 52 | 53 | # Act 54 | download_symbol(pdbentry, db) 55 | -------------------------------------------------------------------------------- /CONFIGURATION.md: -------------------------------------------------------------------------------- 1 | # FastSymApi Configuration 2 | 3 | This document describes the environment variables that can be used to configure FastSymApi behavior. 4 | 5 | ## Performance Configuration 6 | 7 | ### FASTSYM_CHUNK_SIZE 8 | - **Description**: Size of chunks used for file downloads and streaming (in bytes) 9 | - **Default**: 2097152 (2MB) 10 | - **Example**: `FASTSYM_CHUNK_SIZE=1048576` (1MB) 11 | 12 | ### FASTSYM_MAX_MEMORY_MB 13 | - **Description**: Maximum memory usage limit for streaming operations (in MB) 14 | - **Default**: 100 15 | - **Example**: `FASTSYM_MAX_MEMORY_MB=200` 16 | 17 | ## Network Reliability Configuration 18 | 19 | ### FASTSYM_MAX_RETRIES 20 | - **Description**: Maximum number of retry attempts for failed network requests 21 | - **Default**: 3 22 | - **Example**: `FASTSYM_MAX_RETRIES=5` 23 | 24 | ### FASTSYM_RETRY_BACKOFF 25 | - **Description**: Backoff factor for exponential retry delays 26 | - **Default**: 0.3 27 | - **Example**: `FASTSYM_RETRY_BACKOFF=0.5` 28 | 29 | ## Usage Examples 30 | 31 | ### Development Environment 32 | ```bash 33 | export FASTSYM_CHUNK_SIZE=1048576 # 1MB chunks for faster testing 34 | export FASTSYM_MAX_RETRIES=1 # Fewer retries for faster feedback 35 | export FASTSYM_MAX_MEMORY_MB=50 # Lower memory limit 36 | ``` 37 | 38 | ### Production Environment 39 | ```bash 40 | export FASTSYM_CHUNK_SIZE=4194304 # 4MB chunks for efficiency 41 | export FASTSYM_MAX_RETRIES=5 # More retries for reliability 42 | export FASTSYM_MAX_MEMORY_MB=500 # Higher memory limit 43 | export FASTSYM_RETRY_BACKOFF=1.0 # Longer backoff to avoid overwhelming servers 44 | ``` 45 | 46 | ### High-Performance Environment 47 | ```bash 48 | export FASTSYM_CHUNK_SIZE=8388608 # 8MB chunks 49 | export FASTSYM_MAX_MEMORY_MB=1000 # 1GB memory limit 50 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastSymApi 2 | 3 | The FastSymApi server is a Fast API server designed for debugging and development environments. It allows users to download and cache symbols from Microsoft, Google, and Mozilla symbol servers. Additionally, users can easily add support for other symbol servers. 4 | 5 | When clients connect to FastSymApi and attempt to download a symbol, the server first checks if the symbol exists within its `./fastsymapi/symbols` cache. If found, the server returns the symbol; otherwise, it responds with a status `404` and proceeds to download the symbol. On subsequent requests, if the symbol is already downloaded and cached, the server returns it, either compressed using GZIP or decompressed based on the presence of the Accept-Encoding: gzip header. GZIP compression reduces bandwidth usage and improves download speed for clients. 6 | 7 | ## Security and Robustness Improvements 8 | 9 | FastSymApi includes comprehensive security and robustness features: 10 | 11 | - **Path Sanitization**: Prevents directory traversal attacks by validating all path components 12 | - **Input Validation**: Validates all PDB entry fields to prevent injection attacks 13 | - **File Locking**: Thread-safe file operations prevent race conditions during concurrent downloads 14 | - **Retry Logic**: Automatic retry with exponential backoff for network requests 15 | - **Memory Management**: Configurable memory limits for streaming operations 16 | - **Error Handling**: Comprehensive error logging and graceful failure handling 17 | - **Configurable Performance**: Environment variables for tuning chunk sizes and retry behavior 18 | 19 | See [CONFIGURATION.md](CONFIGURATION.md) for detailed configuration options. 20 | 21 | FastSymApi has been tested and works with the following tools: 22 | 23 | - x64dbg 24 | - WinDbg 25 | - Symchk 26 | 27 | Supports the following symbol servers: 28 | 29 | - 30 | - 31 | - 32 | 33 | ## Setup FastSymApi 34 | 35 | Clone the repository 36 | 37 | ``` 38 | git clone https://github.com/P1tt1cus/FastSymApi 39 | ``` 40 | 41 | Install the requirements 42 | 43 | ``` 44 | pip install requirements.txt 45 | ``` 46 | 47 | Start the server 48 | 49 | ``` 50 | uvicorn fastsymapi:app --host 0.0.0.0 --port 80 51 | ``` 52 | 53 | Debug Mode 54 | 55 | ``` 56 | uvicorn fastsymapi:app --reload 57 | ``` 58 | 59 | ## Run Tests 60 | 61 | Run the original tests: 62 | ``` 63 | pytest fastsymapi_tests.py 64 | ``` 65 | 66 | Run comprehensive robustness tests: 67 | ``` 68 | pytest test_symbols_improved.py 69 | ``` 70 | 71 | Run all tests: 72 | ``` 73 | pytest 74 | ``` 75 | 76 | ## Configure x64dbg 77 | 78 | **options** >> **preferences** >> **misc** 79 | 80 | Symbol store 81 | 82 | ``` 83 | http://FastSymApiServerIp/ 84 | ``` 85 | 86 | ## Configure WinDbg 87 | 88 | ``` 89 | .sympath srv*C:\symbols*http://FastSymApiServerIp/ 90 | ``` 91 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: FastSymApi CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - "**" 7 | pull_request: 8 | branches: 9 | - main 10 | types: 11 | - closed 12 | 13 | permissions: 14 | contents: write # Grant write permissions to the contents 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python 3.12 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: 3.12 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install -r requirements.txt 29 | - name: Run tests 30 | run: | 31 | pytest fastsymapi_tests.py 32 | - name: Build project 33 | run: | 34 | python setup.py bdist_wheel 35 | - name: Start Application 36 | run: | 37 | nohup uvicorn fastsymapi:app --host 0.0.0.0 --port 8000 & 38 | timeout-minutes: 1 39 | - name: Health Check 40 | run: | 41 | for i in {1..10}; do 42 | if curl -s http://localhost:8000/health | grep "ok"; then 43 | echo "Application started successfully" 44 | exit 0 45 | fi 46 | sleep 3 47 | done 48 | echo "Application failed to start" 49 | exit 1 50 | release: 51 | if: github.ref == 'refs/heads/main' 52 | needs: build 53 | runs-on: ubuntu-latest 54 | steps: 55 | - uses: actions/checkout@v2 56 | - name: Set up Python 3.12 57 | uses: actions/setup-python@v2 58 | with: 59 | python-version: 3.12 60 | - name: Install dependencies 61 | run: | 62 | python -m pip install --upgrade pip 63 | pip install -r requirements.txt 64 | - name: Build project 65 | run: | 66 | python setup.py bdist_wheel 67 | - name: Extract version 68 | id: extract_version 69 | run: | 70 | echo "VERSION=$(python setup.py --version)" >> $GITHUB_ENV 71 | - name: Check if Tag Exists 72 | id: check_tag 73 | run: | 74 | if git rev-parse "v${{ env.VERSION }}" >/dev/null 2>&1; then 75 | echo "TAG_EXISTS=true" >> $GITHUB_ENV 76 | else 77 | echo "TAG_EXISTS=false" >> $GITHUB_ENV 78 | fi 79 | - name: Create Tag 80 | if: ${{ env.TAG_EXISTS == false }} 81 | continue-on-error: true 82 | run: | 83 | git config --global user.email "30161177+P1tt1cus@users.noreply.github.com" 84 | git config --global user.name "P1tt1cus" 85 | git tag v${{ env.VERSION }} 86 | git push origin v${{ env.VERSION }} 87 | - name: Create GitHub Release 88 | if: ${{ env.TAG_EXISTS == false }} 89 | id: create_release 90 | uses: actions/create-release@v1 91 | env: 92 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 93 | with: 94 | tag_name: v${{ env.VERSION }} 95 | release_name: Release v${{ env.VERSION }} 96 | draft: false 97 | prerelease: false 98 | -------------------------------------------------------------------------------- /test_symbols_improved.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comprehensive unit tests for the improved symbols.py module. 3 | Tests cover all the robustness improvements including: 4 | - Path sanitization and security 5 | - Input validation 6 | - Error handling 7 | - File locking 8 | - Memory management 9 | - Retry logic 10 | """ 11 | 12 | import pytest 13 | import os 14 | import tempfile 15 | import threading 16 | import time 17 | from unittest.mock import patch, MagicMock, mock_open 18 | from fastsymapi.symbols import ( 19 | sanitize_path_component, 20 | validate_pdb_entry_fields, 21 | get_file_lock, 22 | create_requests_session, 23 | create_or_find_pdb_entry, 24 | download_symbol, 25 | download_and_save_symbol, 26 | get_symbol 27 | ) 28 | from fastsymapi.sql_db import models 29 | from fastapi import Response 30 | from requests.exceptions import RequestException 31 | 32 | 33 | class TestPathSanitization: 34 | """Test path sanitization functions.""" 35 | 36 | def test_sanitize_valid_path_component(self): 37 | """Test that valid path components pass through unchanged.""" 38 | valid_components = ["test.pdb", "valid_file", "normal-file", "file123"] 39 | for component in valid_components: 40 | assert sanitize_path_component(component) == component 41 | 42 | def test_sanitize_prevents_directory_traversal(self): 43 | """Test that directory traversal attempts are blocked.""" 44 | with pytest.raises(ValueError): 45 | sanitize_path_component("../evil") 46 | 47 | with pytest.raises(ValueError): 48 | sanitize_path_component("..\\evil") 49 | 50 | with pytest.raises(ValueError): 51 | sanitize_path_component("normal/../traversal") 52 | 53 | def test_sanitize_prevents_path_separators(self): 54 | """Test that path separators are blocked.""" 55 | with pytest.raises(ValueError): 56 | sanitize_path_component("path/with/slashes") 57 | 58 | with pytest.raises(ValueError): 59 | sanitize_path_component("path\\with\\backslashes") 60 | 61 | def test_sanitize_prevents_invalid_characters(self): 62 | """Test that invalid characters are blocked.""" 63 | with pytest.raises(ValueError): 64 | sanitize_path_component("fileinvalid") 65 | 66 | with pytest.raises(ValueError): 67 | sanitize_path_component("file|with|pipes") 68 | 69 | with pytest.raises(ValueError): 70 | sanitize_path_component("file*with*wildcards") 71 | 72 | def test_sanitize_empty_component(self): 73 | """Test that empty components are rejected.""" 74 | with pytest.raises(ValueError): 75 | sanitize_path_component("") 76 | 77 | with pytest.raises(ValueError): 78 | sanitize_path_component(None) 79 | 80 | 81 | class TestInputValidation: 82 | """Test input validation functions.""" 83 | 84 | def test_validate_pdb_entry_fields_valid(self): 85 | """Test validation with valid inputs.""" 86 | # Should not raise any exception 87 | validate_pdb_entry_fields("test.pdb", "guid123", "file.pdb") 88 | 89 | def test_validate_pdb_entry_fields_empty(self): 90 | """Test validation rejects empty fields.""" 91 | with pytest.raises(ValueError): 92 | validate_pdb_entry_fields("", "guid", "file") 93 | 94 | with pytest.raises(ValueError): 95 | validate_pdb_entry_fields("name", "", "file") 96 | 97 | with pytest.raises(ValueError): 98 | validate_pdb_entry_fields("name", "guid", "") 99 | 100 | def test_validate_pdb_entry_fields_too_long(self): 101 | """Test validation rejects fields that are too long.""" 102 | long_string = "a" * 256 103 | 104 | with pytest.raises(ValueError): 105 | validate_pdb_entry_fields(long_string, "guid", "file") 106 | 107 | with pytest.raises(ValueError): 108 | validate_pdb_entry_fields("name", long_string, "file") 109 | 110 | with pytest.raises(ValueError): 111 | validate_pdb_entry_fields("name", "guid", long_string) 112 | 113 | def test_validate_pdb_entry_fields_invalid_characters(self): 114 | """Test validation rejects invalid characters.""" 115 | with pytest.raises(ValueError): 116 | validate_pdb_entry_fields("../evil", "guid", "file") 117 | 118 | with pytest.raises(ValueError): 119 | validate_pdb_entry_fields("name", "guid|invalid", "file") 120 | 121 | 122 | class TestFileLocking: 123 | """Test file locking mechanisms.""" 124 | 125 | def test_get_file_lock_same_path(self): 126 | """Test that the same path returns the same lock.""" 127 | lock1 = get_file_lock("/test/path") 128 | lock2 = get_file_lock("/test/path") 129 | assert lock1 is lock2 130 | 131 | def test_get_file_lock_different_paths(self): 132 | """Test that different paths return different locks.""" 133 | lock1 = get_file_lock("/test/path1") 134 | lock2 = get_file_lock("/test/path2") 135 | assert lock1 is not lock2 136 | 137 | def test_file_lock_concurrency(self): 138 | """Test that file locks prevent concurrent access.""" 139 | results = [] 140 | 141 | def worker(worker_id): 142 | lock = get_file_lock("/test/concurrent") 143 | with lock: 144 | results.append(f"start_{worker_id}") 145 | time.sleep(0.1) # Simulate work 146 | results.append(f"end_{worker_id}") 147 | 148 | threads = [threading.Thread(target=worker, args=(i,)) for i in range(2)] 149 | for t in threads: 150 | t.start() 151 | for t in threads: 152 | t.join() 153 | 154 | # Results should show that one worker completes entirely before the other starts 155 | assert len(results) == 4 156 | # Either worker 0 completes first or worker 1 completes first 157 | assert (results == ["start_0", "end_0", "start_1", "end_1"] or 158 | results == ["start_1", "end_1", "start_0", "end_0"]) 159 | 160 | 161 | class TestRetryLogic: 162 | """Test retry logic in requests session.""" 163 | 164 | def test_create_requests_session(self): 165 | """Test that requests session is created with retry strategy.""" 166 | session = create_requests_session() 167 | assert session is not None 168 | 169 | # Check that adapters are mounted 170 | assert "http://" in session.adapters 171 | assert "https://" in session.adapters 172 | 173 | 174 | class TestHelperFunctions: 175 | """Test helper functions for reducing code duplication.""" 176 | 177 | @patch('fastsymapi.symbols.crud.find_pdb_entry') 178 | @patch('fastsymapi.symbols.crud.create_pdb_entry') 179 | def test_create_or_find_pdb_entry_existing(self, mock_create, mock_find): 180 | """Test helper function when entry already exists.""" 181 | mock_db = MagicMock() 182 | mock_entry = MagicMock() 183 | mock_find.return_value = mock_entry 184 | 185 | result = create_or_find_pdb_entry(mock_db, "guid", "name", "file") 186 | 187 | assert result == mock_entry 188 | mock_find.assert_called_once_with(mock_db, "guid", "file") 189 | mock_create.assert_not_called() 190 | 191 | @patch('fastsymapi.symbols.crud.find_pdb_entry') 192 | @patch('fastsymapi.symbols.crud.create_pdb_entry') 193 | def test_create_or_find_pdb_entry_new(self, mock_create, mock_find): 194 | """Test helper function when entry doesn't exist.""" 195 | mock_db = MagicMock() 196 | mock_entry = MagicMock() 197 | mock_find.return_value = None 198 | mock_create.return_value = mock_entry 199 | 200 | result = create_or_find_pdb_entry(mock_db, "guid", "name", "file", True) 201 | 202 | assert result == mock_entry 203 | mock_find.assert_called_once_with(mock_db, "guid", "file") 204 | mock_create.assert_called_once_with(mock_db, "guid", "name", "file", True) 205 | 206 | 207 | class TestDownloadSymbol: 208 | """Test the improved download_symbol function.""" 209 | 210 | @patch('fastsymapi.symbols.create_requests_session') 211 | @patch('fastsymapi.symbols.download_and_save_symbol') 212 | @patch('fastsymapi.symbols.crud.modify_pdb_entry') 213 | def test_download_symbol_success(self, mock_modify, mock_download_save, mock_session): 214 | """Test successful symbol download.""" 215 | # Setup mocks 216 | mock_session_obj = MagicMock() 217 | mock_session.return_value = mock_session_obj 218 | mock_response = MagicMock() 219 | mock_response.status_code = 200 220 | mock_session_obj.get.return_value = mock_response 221 | 222 | # Create test PDB entry 223 | pdbentry = models.SymbolEntry(pdbname="test", guid="guid123", pdbfile="test.pdb") 224 | mock_db = MagicMock() 225 | 226 | # Call function 227 | download_symbol(pdbentry, mock_db) 228 | 229 | # Verify calls 230 | assert pdbentry.found == True 231 | assert pdbentry.downloading == False 232 | mock_download_save.assert_called_once() 233 | mock_modify.assert_called() 234 | 235 | @patch('fastsymapi.symbols.create_requests_session') 236 | @patch('fastsymapi.symbols.crud.modify_pdb_entry') 237 | def test_download_symbol_all_servers_fail(self, mock_modify, mock_session): 238 | """Test when all symbol servers fail.""" 239 | # Setup mocks 240 | mock_session_obj = MagicMock() 241 | mock_session.return_value = mock_session_obj 242 | mock_response = MagicMock() 243 | mock_response.status_code = 404 244 | mock_session_obj.get.return_value = mock_response 245 | 246 | # Create test PDB entry 247 | pdbentry = models.SymbolEntry(pdbname="test", guid="guid123", pdbfile="test.pdb") 248 | mock_db = MagicMock() 249 | 250 | # Call function 251 | download_symbol(pdbentry, mock_db) 252 | 253 | # Verify error handling 254 | assert pdbentry.found == False 255 | assert pdbentry.downloading == False 256 | mock_modify.assert_called() 257 | 258 | @patch('fastsymapi.symbols.create_requests_session') 259 | @patch('fastsymapi.symbols.crud.modify_pdb_entry') 260 | def test_download_symbol_network_error(self, mock_modify, mock_session): 261 | """Test network error handling.""" 262 | # Setup mocks 263 | mock_session_obj = MagicMock() 264 | mock_session.return_value = mock_session_obj 265 | mock_session_obj.get.side_effect = RequestException("Network error") 266 | 267 | # Create test PDB entry 268 | pdbentry = models.SymbolEntry(pdbname="test", guid="guid123", pdbfile="test.pdb") 269 | mock_db = MagicMock() 270 | 271 | # Call function 272 | download_symbol(pdbentry, mock_db) 273 | 274 | # Verify error handling 275 | assert pdbentry.downloading == False 276 | mock_modify.assert_called() 277 | 278 | @patch('fastsymapi.symbols.crud.modify_pdb_entry') 279 | def test_download_symbol_invalid_input(self, mock_modify): 280 | """Test invalid input handling.""" 281 | # Create test PDB entry with invalid characters 282 | pdbentry = models.SymbolEntry(pdbname="../evil", guid="guid123", pdbfile="test.pdb") 283 | mock_db = MagicMock() 284 | 285 | # Call function 286 | download_symbol(pdbentry, mock_db) 287 | 288 | # Verify error handling 289 | assert pdbentry.downloading == False 290 | mock_modify.assert_called() 291 | 292 | 293 | class TestMemoryManagement: 294 | """Test memory management in streaming functions.""" 295 | 296 | @patch('gzip.open') 297 | def test_stream_memory_limit(self, mock_gzip_open): 298 | """Test that streaming respects memory limits.""" 299 | # Mock gzip file that returns large chunks 300 | mock_file = MagicMock() 301 | large_chunk = b"x" * (50 * 1024 * 1024) # 50MB chunk 302 | mock_file.read.side_effect = [large_chunk, large_chunk, b""] # Return large chunks then EOF 303 | mock_gzip_open.return_value.__enter__.return_value = mock_file 304 | 305 | # Create a temporary file for testing 306 | with tempfile.NamedTemporaryFile() as temp_file: 307 | from fastsymapi.symbols import get_symbol 308 | 309 | # Mock the necessary components 310 | with patch('fastsymapi.symbols.os.path.isfile', return_value=True), \ 311 | patch('fastsymapi.symbols.create_or_find_pdb_entry'): 312 | 313 | mock_db = MagicMock() 314 | mock_background_tasks = MagicMock() 315 | 316 | # This should work without running out of memory 317 | response = get_symbol("test", "test.pdb", "guid", mock_background_tasks, mock_db, False) 318 | 319 | # Verify it's a streaming response 320 | from fastapi.responses import StreamingResponse 321 | assert isinstance(response, StreamingResponse) 322 | 323 | 324 | class TestErrorHandling: 325 | """Test comprehensive error handling.""" 326 | 327 | def test_get_symbol_invalid_parameters(self): 328 | """Test get_symbol with invalid parameters.""" 329 | from fastsymapi.symbols import get_symbol 330 | 331 | mock_db = MagicMock() 332 | mock_background_tasks = MagicMock() 333 | 334 | # Test with invalid characters 335 | response = get_symbol("../evil", "test.pdb", "guid", mock_background_tasks, mock_db, False) 336 | 337 | # Should return 400 error 338 | assert isinstance(response, Response) 339 | assert response.status_code == 400 340 | 341 | 342 | if __name__ == "__main__": 343 | pytest.main([__file__]) -------------------------------------------------------------------------------- /fastsymapi/symbols.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, Response, BackgroundTasks, Request 2 | from fastapi.responses import FileResponse, StreamingResponse 3 | from fastapi.encoders import jsonable_encoder 4 | from fastsymapi.sql_db.database import get_db, session_local 5 | from fastsymapi.sql_db import crud, models 6 | from fastsymapi.logging import logger 7 | import requests 8 | import click 9 | from sqlalchemy.orm import Session 10 | import os 11 | import shutil 12 | import gzip 13 | import threading 14 | import time 15 | import re 16 | from urllib.parse import quote 17 | from requests.adapters import HTTPAdapter 18 | from urllib3.util.retry import Retry 19 | 20 | sym = APIRouter() 21 | 22 | # Make CHUNK_SIZE configurable via environment variable 23 | CHUNK_SIZE = int(os.environ.get('FASTSYM_CHUNK_SIZE', 1024*1024*2)) 24 | MAX_RETRY_ATTEMPTS = int(os.environ.get('FASTSYM_MAX_RETRIES', 3)) 25 | RETRY_BACKOFF_FACTOR = float(os.environ.get('FASTSYM_RETRY_BACKOFF', 0.3)) 26 | MAX_MEMORY_USAGE = int(os.environ.get('FASTSYM_MAX_MEMORY_MB', 100)) * 1024 * 1024 27 | 28 | SYMBOL_PATH = os.path.join(os.path.dirname(__file__), "symbols") 29 | 30 | # File lock for concurrent downloads 31 | _download_locks = {} 32 | _locks_lock = threading.Lock() 33 | 34 | SYM_URLS = [ 35 | "http://msdl.microsoft.com/download/symbols", 36 | "http://chromium-browser-symsrv.commondatastorage.googleapis.com", 37 | "http://symbols.mozilla.org", 38 | "http://symbols.mozilla.org/try" 39 | ] 40 | 41 | 42 | def sanitize_path_component(component: str) -> str: 43 | """Sanitize a path component to prevent directory traversal attacks.""" 44 | if not component: 45 | raise ValueError("Path component cannot be empty") 46 | 47 | # Remove any path traversal sequences and path separators 48 | if '..' in component or '/' in component or '\\' in component: 49 | raise ValueError(f"Path traversal or separator characters not allowed: {component}") 50 | 51 | # Only allow alphanumeric characters, hyphens, underscores, and dots 52 | if not re.match(r'^[a-zA-Z0-9._-]+$', component): 53 | raise ValueError(f"Invalid characters in path component: {component}") 54 | 55 | return component 56 | 57 | 58 | def validate_pdb_entry_fields(pdbname: str, guid: str, pdbfile: str) -> None: 59 | """Validate PDB entry fields to prevent injection attacks.""" 60 | if not pdbname or len(pdbname) > 255: 61 | raise ValueError("Invalid pdbname: must be non-empty and <= 255 characters") 62 | 63 | if not guid or len(guid) > 255: 64 | raise ValueError("Invalid guid: must be non-empty and <= 255 characters") 65 | 66 | if not pdbfile or len(pdbfile) > 255: 67 | raise ValueError("Invalid pdbfile: must be non-empty and <= 255 characters") 68 | 69 | # Sanitize each component 70 | sanitize_path_component(pdbname) 71 | sanitize_path_component(guid) 72 | sanitize_path_component(pdbfile) 73 | 74 | 75 | def get_file_lock(file_path: str) -> threading.Lock: 76 | """Get or create a lock for a specific file path.""" 77 | with _locks_lock: 78 | if file_path not in _download_locks: 79 | _download_locks[file_path] = threading.Lock() 80 | return _download_locks[file_path] 81 | 82 | 83 | def create_requests_session() -> requests.Session: 84 | """Create a requests session with retry logic.""" 85 | session = requests.Session() 86 | 87 | retry_strategy = Retry( 88 | total=MAX_RETRY_ATTEMPTS, 89 | status_forcelist=[429, 500, 502, 503, 504], 90 | allowed_methods=["HEAD", "GET", "OPTIONS"], # Updated parameter name 91 | backoff_factor=RETRY_BACKOFF_FACTOR 92 | ) 93 | 94 | adapter = HTTPAdapter(max_retries=retry_strategy) 95 | session.mount("http://", adapter) 96 | session.mount("https://", adapter) 97 | 98 | return session 99 | 100 | 101 | def create_or_find_pdb_entry(db: Session, guid: str, pdbname: str, pdbfile: str, found: bool = False) -> models.SymbolEntry: 102 | """Helper function to create or find a PDB entry, reducing code duplication.""" 103 | validate_pdb_entry_fields(pdbname, guid, pdbfile) 104 | 105 | pdbentry = crud.find_pdb_entry(db, guid, pdbfile) 106 | if not pdbentry: 107 | pdbentry = crud.create_pdb_entry(db, guid, pdbname, pdbfile, found) 108 | return pdbentry 109 | 110 | 111 | def download_symbol(pdbentry: models.SymbolEntry, db: Session) -> None: 112 | """ Iterate over SYM_URLs looking for the requested PDB file """ 113 | 114 | # Validate PDB entry fields 115 | try: 116 | validate_pdb_entry_fields(pdbentry.pdbname, pdbentry.guid, pdbentry.pdbfile) 117 | except ValueError as e: 118 | logger.error(f"Invalid PDB entry fields: {e}") 119 | pdbentry.downloading = False 120 | crud.modify_pdb_entry(db, pdbentry) 121 | return 122 | 123 | session = create_requests_session() 124 | found = False 125 | 126 | # Iterate over the symbol server URLs 127 | for sym_url in SYM_URLS: 128 | try: 129 | # Check if symbol exists on the server 130 | symbol_url = sym_url + \ 131 | f"/{quote(pdbentry.pdbname)}/{quote(pdbentry.guid)}/{quote(pdbentry.pdbfile)}" 132 | 133 | logger.debug(f"Trying to download from: {symbol_url}") 134 | resp = session.get(symbol_url, stream=True, timeout=30) 135 | 136 | # If the symbol was found download it 137 | if resp.status_code == 200: 138 | pdbentry.found = True 139 | download_and_save_symbol(pdbentry, resp, db) 140 | found = True 141 | break 142 | 143 | # Unable to find PDB at this Symbol Server 144 | else: 145 | logger.debug(f"Could not find symbol: {symbol_url} {resp.status_code}") 146 | 147 | except requests.exceptions.RequestException as e: 148 | logger.warning(f"Network error while downloading from {sym_url}: {e}") 149 | continue 150 | except Exception as e: 151 | logger.error(f"Unexpected error while downloading from {sym_url}: {e}") 152 | continue 153 | 154 | # If no symbol server had the file, log an explicit error 155 | if not found: 156 | logger.error(f"Failed to download symbol {pdbentry.pdbname}/{pdbentry.guid}/{pdbentry.pdbfile} from all available servers") 157 | pdbentry.found = False 158 | 159 | # Set the PDB entry to 'finished' downloading 160 | pdbentry.downloading = False 161 | crud.modify_pdb_entry(db, pdbentry) 162 | 163 | 164 | def download_and_save_symbol(pdbentry, resp, db): 165 | """ Download the symbol and save it to disk """ 166 | 167 | # Notify that the download is beginning 168 | logger.warning(f"Downloading... {pdbentry.guid} {pdbentry.pdbfile}") 169 | 170 | # Create the PDB directory with GUID if it does not exist 171 | pdb_file_path = os.path.join(SYMBOL_PATH, 172 | sanitize_path_component(pdbentry.pdbname), 173 | sanitize_path_component(pdbentry.guid)) 174 | 175 | # Get file lock to prevent race conditions 176 | file_lock = get_file_lock(pdb_file_path) 177 | 178 | with file_lock: 179 | try: 180 | if not os.path.exists(pdb_file_path): 181 | os.makedirs(pdb_file_path, mode=0o755) 182 | 183 | # Logic that identifies whether its a gzip or not 184 | content_encoding = resp.headers.get("Content-Encoding", "") 185 | is_gzip_supported = "gzip" in content_encoding.lower() 186 | 187 | # Create the PDB file and iterate over it writing the chunks 188 | pdb_tmp_file_path = os.path.join( 189 | pdb_file_path, f"tmp_{sanitize_path_component(pdbentry.pdbfile)}.gzip") 190 | 191 | # if the file is already compressed, just write the raw bytes 192 | if is_gzip_supported: 193 | pdbfile_handle = open(pdb_tmp_file_path, 'wb') 194 | # else, we must compress it ourselves 195 | else: 196 | pdbfile_handle = gzip.open(pdb_tmp_file_path, 'wb') 197 | 198 | # Get the size of the PDB buffer being downloaded 199 | pdb_size = get_pdb_size(resp) 200 | if pdb_size is None: 201 | pdbentry.downloading = False 202 | crud.modify_pdb_entry(db, pdbentry) 203 | if pdbfile_handle: 204 | pdbfile_handle.close() 205 | if os.path.exists(pdb_tmp_file_path): 206 | os.remove(pdb_tmp_file_path) 207 | return 208 | 209 | # Download with memory usage monitoring 210 | downloaded = 0 211 | percent = 0 212 | last_logged_percent = -1 213 | chunks_in_memory = 0 214 | 215 | while downloaded < pdb_size: 216 | remaining = pdb_size - downloaded 217 | chunk_size = min(CHUNK_SIZE, remaining) 218 | chunk = resp.raw.read(chunk_size) 219 | 220 | if not chunk: 221 | break 222 | 223 | pdbfile_handle.write(chunk) 224 | downloaded += len(chunk) 225 | chunks_in_memory += 1 226 | 227 | # Monitor memory usage 228 | if chunks_in_memory * CHUNK_SIZE > MAX_MEMORY_USAGE: 229 | pdbfile_handle.flush() 230 | chunks_in_memory = 0 231 | 232 | percent = int((downloaded / pdb_size) * 100) 233 | if percent // 5 > last_logged_percent: # Log every 5% 234 | last_logged_percent = percent // 5 235 | logger.warning(f"Downloading... {pdbentry.guid} {pdbentry.pdbfile} {percent}%") 236 | 237 | # Close the file handle 238 | pdbfile_handle.close() 239 | 240 | # Finished downloading PDB 241 | logger.info(f"Successfully downloaded... {pdbentry.guid} {pdbentry.pdbfile}") 242 | 243 | # Move the temporary file to final location 244 | final_pdb_file_path = os.path.join(pdb_file_path, f"{sanitize_path_component(pdbentry.pdbfile)}.gzip") 245 | shutil.move(pdb_tmp_file_path, final_pdb_file_path) 246 | 247 | except Exception as e: 248 | logger.error(f"Error downloading symbol {pdbentry.guid}/{pdbentry.pdbfile}: {e}") 249 | pdbentry.downloading = False 250 | crud.modify_pdb_entry(db, pdbentry) 251 | # Clean up temporary file 252 | if os.path.exists(pdb_tmp_file_path): 253 | try: 254 | os.remove(pdb_tmp_file_path) 255 | except OSError: 256 | pass 257 | raise 258 | 259 | 260 | def get_pdb_size(resp): 261 | """ Get the size of the PDB buffer being downloaded """ 262 | 263 | for header in ["Content-Length", "x-goog-stored-content-length"]: 264 | if resp.headers.get(header): 265 | return int(resp.headers[header]) 266 | 267 | # Output an error stating the content-length could not be found. 268 | logger.error(f"Could not get content length from server: { 269 | resp.status_code}") 270 | return None 271 | 272 | 273 | def get_symbol(pdbname: str, pdbfile: str, guid: str, background_tasks: BackgroundTasks, db: Session, is_gzip_supported: bool): 274 | # Validate input parameters first, before any file system operations 275 | try: 276 | validate_pdb_entry_fields(pdbname, guid, pdbfile) 277 | except ValueError as e: 278 | logger.error(f"Invalid parameters in get_symbol: {e}") 279 | return Response(status_code=400, content=f"Invalid parameters: {e}") 280 | 281 | try: 282 | pdb_file_path = os.path.join(SYMBOL_PATH, 283 | sanitize_path_component(pdbname), 284 | sanitize_path_component(guid), 285 | f"{sanitize_path_component(pdbfile)}.gzip") 286 | 287 | if not os.path.isfile(pdb_file_path): 288 | # Use helper function to reduce code duplication 289 | pdbentry = create_or_find_pdb_entry(db, guid, pdbname, pdbfile) 290 | 291 | if pdbentry.downloading: 292 | return Response(status_code=404) 293 | 294 | pdbentry.downloading = True 295 | crud.modify_pdb_entry(db, pdbentry) 296 | background_tasks.add_task(download_symbol, pdbentry, db) 297 | return Response(status_code=404) 298 | 299 | # Use helper function to reduce code duplication 300 | pdbentry = create_or_find_pdb_entry(db, guid, pdbname, pdbfile, True) 301 | 302 | if is_gzip_supported: 303 | logger.debug("Returning gzip compressed stream...") 304 | return FileResponse(pdb_file_path, headers={"content-encoding": "gzip"}, media_type="application/octet-stream") 305 | 306 | def stream_decompressed_data(chunk_size=CHUNK_SIZE): 307 | """Stream decompressed data with memory usage monitoring.""" 308 | bytes_streamed = 0 309 | with gzip.open(pdb_file_path, 'rb') as gzip_file: 310 | while True: 311 | # Monitor memory usage 312 | if bytes_streamed > MAX_MEMORY_USAGE: 313 | logger.warning(f"Memory usage limit reached while streaming {pdbfile}") 314 | break 315 | 316 | chunk = gzip_file.read(chunk_size) 317 | if not chunk: 318 | break 319 | 320 | bytes_streamed += len(chunk) 321 | yield chunk 322 | 323 | logger.debug("Returning decompressed stream...") 324 | return StreamingResponse(stream_decompressed_data(), media_type="application/octet-stream") 325 | 326 | except ValueError as e: 327 | logger.error(f"Validation error in get_symbol: {e}") 328 | return Response(status_code=400, content=f"Invalid parameters: {e}") 329 | except Exception as e: 330 | logger.error(f"Unexpected error in get_symbol: {e}") 331 | return Response(status_code=500, content="Internal server error") 332 | 333 | 334 | @sym.get("/{pdbname}/{guid}/{pdbfile}") 335 | @sym.get("/download/symbols/{pdbname}/{guid}/{pdbfile}") 336 | async def get_symbol_api(pdbname: str, guid: str, pdbfile: str, request: Request, background_tasks: BackgroundTasks, db: Session = Depends(get_db)): 337 | try: 338 | accept_encoding = request.headers.get("Accept-Encoding", "") 339 | is_gzip_supported = "gzip" in accept_encoding.lower() 340 | return get_symbol(pdbname, pdbfile, guid, background_tasks, db, is_gzip_supported) 341 | except ValueError as e: 342 | logger.error(f"Validation error in get_symbol_api: {e}") 343 | return Response(status_code=400, content=f"Invalid parameters: {e}") 344 | except Exception as e: 345 | logger.error(f"Unexpected error in get_symbol_api: {e}") 346 | return Response(status_code=500, content="Internal server error") 347 | 348 | 349 | @sym.get("/symbols") 350 | def get_symbol_entries(db: Session = Depends(get_db)) -> list: 351 | return jsonable_encoder(db.query(models.SymbolEntry).all()) 352 | 353 | 354 | @sym.on_event("startup") 355 | def fastsym_init(): 356 | db = session_local() 357 | downloads = crud.find_still_downloading(db) 358 | for download in downloads: 359 | try: 360 | # Use path sanitization for security 361 | failed_tmp_download = os.path.join( 362 | SYMBOL_PATH, 363 | sanitize_path_component(download.pdbname), 364 | sanitize_path_component(download.guid), 365 | f"tmp_{sanitize_path_component(download.pdbfile)}.gzip") 366 | if os.path.exists(failed_tmp_download): 367 | os.remove(failed_tmp_download) 368 | except ValueError as e: 369 | logger.warning(f"Invalid path components in existing download entry: {e}") 370 | except Exception as e: 371 | logger.error(f"Error cleaning up failed download: {e}") 372 | 373 | download.downloading = False 374 | crud.modify_pdb_entry(db, download) 375 | --------------------------------------------------------------------------------