├── commitlint.config.js ├── src └── binaryai │ ├── cve.py │ ├── client_stub │ ├── file_size.py │ ├── filename.py │ ├── sha_256.py │ ├── mime_type.py │ ├── download_link.py │ ├── license_short_name.py │ ├── create_file.py │ ├── cve_name.py │ ├── sca.py │ ├── base_model.py │ ├── function_list.py │ ├── reanalyze.py │ ├── file_malware_probability.py │ ├── overview.py │ ├── file_k_hash.py │ ├── compressed_file.py │ ├── function_match.py │ ├── function_info.py │ ├── functions_info.py │ ├── check_state.py │ ├── check_or_upload.py │ ├── ascii_string.py │ ├── license.py │ ├── exceptions.py │ ├── enums.py │ ├── input_types.py │ ├── base_client.py │ ├── __init__.py │ └── client.py │ ├── compressed_file.py │ ├── component.py │ ├── exceptions.py │ ├── __init__.py │ ├── function.py │ ├── license.py │ ├── upload.py │ ├── binaryai_file.py │ ├── query.graphql │ └── utils.py ├── docs ├── index.md ├── make.bat ├── locale │ └── zh_CN │ │ └── LC_MESSAGES │ │ ├── autoapi │ │ ├── binaryai │ │ │ ├── index.po │ │ │ ├── cve │ │ │ │ └── index.po │ │ │ ├── component │ │ │ │ └── index.po │ │ │ ├── exceptions │ │ │ │ └── index.po │ │ │ ├── compressed_file │ │ │ │ └── index.po │ │ │ ├── upload │ │ │ │ └── index.po │ │ │ ├── license │ │ │ │ └── index.po │ │ │ ├── function │ │ │ │ └── index.po │ │ │ ├── utils │ │ │ │ └── index.po │ │ │ ├── binaryai_file │ │ │ │ └── index.po │ │ │ └── client │ │ │ │ └── index.po │ │ └── index.po │ │ ├── CONTRIBUTING.po │ │ ├── quick_start.po │ │ └── index.po ├── Makefile ├── conf.py └── quick_start.ipynb ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── examples ├── md5_sha256.py ├── binaryai_file.py ├── sca.py ├── strings.py ├── decompress.py ├── upload_and_analysis.py ├── multithreads.py ├── info.py └── decompile.py ├── .flake8 ├── CITATION.cff ├── .readthedocs.yml ├── .github └── workflows │ ├── linter.yml │ └── publish.yml ├── .editorconfig ├── CONTRIBUTING.md ├── pyproject.toml ├── .pylintrc ├── .gitignore ├── README.md └── tests └── test_utils.py /commitlint.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { extends: ['@commitlint/config-conventional'] }; 2 | -------------------------------------------------------------------------------- /src/binaryai/cve.py: -------------------------------------------------------------------------------- 1 | class CVE(object): 2 | """A CVE entity.""" 3 | 4 | def __init__(self, name: str) -> None: 5 | super().__init__() 6 | self.name = name 7 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../README.md 2 | ``` 3 | 4 | ```{toctree} 5 | :maxdepth: 1 6 | :hidden: 7 | 8 | ./quick_start.ipynb 9 | ../CONTRIBUTING.md 10 | autoapi/index 11 | ``` -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/devcontainers/python:1-3.9-bullseye 2 | 3 | # Replace pypi source to tencent source 4 | RUN apt update && apt install -yy pandoc 5 | 6 | # Add devcontainer user 7 | RUN groupdel users ; useradd -mUs /bin/bash devcontainer 8 | USER devcontainer 9 | 10 | ENV LC_ALL=C.UTF-8 11 | ENV LANG=C.UTF-8 12 | ENV LANGUAGE=C.UTF-8 13 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/file_size.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from .base_model import BaseModel 7 | 8 | 9 | class FileSize(BaseModel): 10 | file: Optional["FileSizeFile"] 11 | 12 | 13 | class FileSizeFile(BaseModel): 14 | size: int 15 | 16 | 17 | FileSize.model_rebuild() 18 | -------------------------------------------------------------------------------- /src/binaryai/compressed_file.py: -------------------------------------------------------------------------------- 1 | class CompressedFile(object): 2 | """A compressed file entity. 3 | Note that a file may have no sha256 which should be empty string, 4 | e.g. /dev/console and /dev/null. 5 | """ 6 | 7 | def __init__(self, path: str, sha256: str) -> None: 8 | super().__init__() 9 | self.path = path 10 | self.sha256 = sha256 11 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/filename.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from .base_model import BaseModel 7 | 8 | 9 | class Filename(BaseModel): 10 | file: Optional["FilenameFile"] 11 | 12 | 13 | class FilenameFile(BaseModel): 14 | name: List[str] 15 | 16 | 17 | Filename.model_rebuild() 18 | -------------------------------------------------------------------------------- /examples/md5_sha256.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from binaryai import BinaryAI 5 | 6 | # sha256 and md5 of the same file 7 | DEFAULT_MD5 = "c46b449d5460d45ecec2bb88a1975b3b" 8 | 9 | 10 | def main(): 11 | bai = BinaryAI() 12 | 13 | sha256 = bai.get_sha256(DEFAULT_MD5) 14 | print(sha256) 15 | print("done") 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | ;W503 line break before binary operator 4 | W503, 5 | ;E203 whitespace before ':' 6 | E203, 7 | 8 | ; exclude file 9 | exclude = 10 | .tox, 11 | .git, 12 | __pycache__, 13 | build, 14 | dist, 15 | *.pyc, 16 | *.egg-info, 17 | .cache, 18 | .eggs 19 | .venv 20 | venv, 21 | src/binaryai/client_stub/* 22 | 23 | max-line-length = 120 24 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/sha_256.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class Sha256(BaseModel): 12 | file: Optional["Sha256File"] 13 | 14 | 15 | class Sha256File(BaseModel): 16 | sha_256: str = Field(alias="sha256") 17 | 18 | 19 | Sha256.model_rebuild() 20 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/mime_type.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class MIMEType(BaseModel): 12 | file: Optional["MIMETypeFile"] 13 | 14 | 15 | class MIMETypeFile(BaseModel): 16 | mime_type: str = Field(alias="mimeType") 17 | 18 | 19 | MIMEType.model_rebuild() 20 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/download_link.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class DownloadLink(BaseModel): 12 | file: Optional["DownloadLinkFile"] 13 | 14 | 15 | class DownloadLinkFile(BaseModel): 16 | download_link: Optional[str] = Field(alias="downloadLink") 17 | 18 | 19 | DownloadLink.model_rebuild() 20 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: BinaryAI 6 | message: >- 7 | If you use this software, please cite it using the 8 | metadata from this file. 9 | type: software 10 | authors: 11 | - name: Tencent Security Keen Lab 12 | website: 'https://keenlab.tencent.com' 13 | email: KeenSecurityLab@tencent.com 14 | url: 'https://www.binaryai.cn' 15 | date-released: '2021-08-11' 16 | -------------------------------------------------------------------------------- /src/binaryai/component.py: -------------------------------------------------------------------------------- 1 | class Component(object): 2 | """A component entity that represents a SCA result.""" 3 | 4 | def __init__( 5 | self, 6 | name: str, 7 | version: str, 8 | description: str, 9 | summary: str, 10 | source_code_url: str, 11 | ) -> None: 12 | super().__init__() 13 | self.name = name 14 | self.version = version 15 | self.description = description 16 | self.summary = summary 17 | self.source_code_url = source_code_url 18 | -------------------------------------------------------------------------------- /src/binaryai/exceptions.py: -------------------------------------------------------------------------------- 1 | class FileNotExistError(Exception): 2 | """ 3 | FileNotExistError means the sha256 just uploaded is not found. 4 | Normally this error does not occur. If it does, it means that there is 5 | a problem with the server 6 | """ 7 | 8 | pass 9 | 10 | 11 | class FileRequiredError(Exception): 12 | """ 13 | FileRequiredError means BinaryAI requires the file, but you are not providing it. 14 | This error might occur if you are only providing hash to the BinaryAI. Consider provide the original file as well. 15 | """ 16 | 17 | pass 18 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/license_short_name.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from .base_model import BaseModel 7 | 8 | 9 | class LicenseShortName(BaseModel): 10 | file: Optional["LicenseShortNameFile"] 11 | 12 | 13 | class LicenseShortNameFile(BaseModel): 14 | scainfo: Optional[List["LicenseShortNameFileScainfo"]] 15 | 16 | 17 | class LicenseShortNameFileScainfo(BaseModel): 18 | license: Optional[str] 19 | 20 | 21 | LicenseShortName.model_rebuild() 22 | LicenseShortNameFile.model_rebuild() 23 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/create_file.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class CreateFile(BaseModel): 12 | create_file: "CreateFileCreateFile" = Field(alias="createFile") 13 | 14 | 15 | class CreateFileCreateFile(BaseModel): 16 | sha_256: str = Field(alias="sha256") 17 | md_5: str = Field(alias="md5") 18 | name: List[str] 19 | size: int 20 | mime_type: str = Field(alias="mimeType") 21 | 22 | 23 | CreateFile.model_rebuild() 24 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/cve_name.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from .base_model import BaseModel 7 | 8 | 9 | class CVEName(BaseModel): 10 | file: Optional["CVENameFile"] 11 | 12 | 13 | class CVENameFile(BaseModel): 14 | scainfo: Optional[List["CVENameFileScainfo"]] 15 | 16 | 17 | class CVENameFileScainfo(BaseModel): 18 | cves: Optional[List["CVENameFileScainfoCves"]] 19 | 20 | 21 | class CVENameFileScainfoCves(BaseModel): 22 | name: str 23 | 24 | 25 | CVEName.model_rebuild() 26 | CVENameFile.model_rebuild() 27 | CVENameFileScainfo.model_rebuild() 28 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/sca.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class SCA(BaseModel): 12 | file: Optional["SCAFile"] 13 | 14 | 15 | class SCAFile(BaseModel): 16 | scainfo: Optional[List["SCAFileScainfo"]] 17 | 18 | 19 | class SCAFileScainfo(BaseModel): 20 | name: str 21 | version: Optional[str] 22 | description: Optional[str] 23 | source_code_url: Optional[str] = Field(alias="sourceCodeURL") 24 | summary: Optional[str] 25 | 26 | 27 | SCA.model_rebuild() 28 | SCAFile.model_rebuild() 29 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-22.04" 5 | tools: 6 | python: "3.10" 7 | jobs: 8 | post_create_environment: 9 | # Install poetry 10 | # https://python-poetry.org/docs/#installing-manually 11 | - pip install poetry 12 | post_install: 13 | # Install dependencies with 'docs' dependency group 14 | # https://python-poetry.org/docs/managing-dependencies/#dependency-groups 15 | # VIRTUAL_ENV needs to be set manually for now. 16 | # See https://github.com/readthedocs/readthedocs.org/pull/11152/ 17 | - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with docs 18 | 19 | sphinx: 20 | configuration: docs/conf.py -------------------------------------------------------------------------------- /.github/workflows/linter.yml: -------------------------------------------------------------------------------- 1 | name: FLAKE8 CHECK 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Set up Python 11 | uses: actions/setup-python@v4 12 | with: 13 | python-version: '3.9' 14 | - name: Set up poetry 15 | uses: abatilo/actions-poetry@v2 16 | with: 17 | poetry-version: '1.6' 18 | - name: Install dependencies 19 | run: | 20 | poetry install --with=dev 21 | - name: Perform linting 22 | run: | 23 | poetry run flake8 . 24 | poetry run black --check --line-length=120 . 25 | poetry run isort --profile=black --line-length=120 -c . 26 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/base_model.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | 3 | from io import IOBase 4 | 5 | from pydantic import BaseModel as PydanticBaseModel, ConfigDict 6 | 7 | 8 | class UnsetType: 9 | def __bool__(self) -> bool: 10 | return False 11 | 12 | 13 | UNSET = UnsetType() 14 | 15 | 16 | class BaseModel(PydanticBaseModel): 17 | model_config = ConfigDict( 18 | populate_by_name=True, 19 | validate_assignment=True, 20 | arbitrary_types_allowed=True, 21 | protected_namespaces=(), 22 | ) 23 | 24 | 25 | class Upload: 26 | def __init__(self, filename: str, content: IOBase, content_type: str): 27 | self.filename = filename 28 | self.content = content 29 | self.content_type = content_type 30 | -------------------------------------------------------------------------------- /src/binaryai/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from .binaryai_file import BinaryAIFile 4 | from .client import SDK_VERSION, BinaryAI 5 | from .component import Component 6 | from .compressed_file import CompressedFile 7 | from .cve import CVE 8 | from .exceptions import FileNotExistError, FileRequiredError 9 | from .function import Function 10 | from .license import License 11 | 12 | __version__ = SDK_VERSION 13 | 14 | # Add deprecation warnings 15 | warnings.filterwarnings("default", category=DeprecationWarning) 16 | warnings.filterwarnings("default", category=PendingDeprecationWarning) 17 | 18 | __all__ = [ 19 | BinaryAI, 20 | BinaryAIFile, 21 | Component, 22 | CompressedFile, 23 | CVE, 24 | FileNotExistError, 25 | FileRequiredError, 26 | Function, 27 | License, 28 | ] 29 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 4 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | 14 | [*.{yml,yaml,toml}] 15 | indent_style = space 16 | indent_size = 2 17 | 18 | [*.json] 19 | indent_style = space 20 | indent_size = 2 21 | 22 | [{Makefile,makefile,*.mk}] 23 | indent_style = tab 24 | indent_size = 8 25 | 26 | [*.go] 27 | indent_style = tab 28 | indent_size = 4 29 | 30 | [*.py] 31 | indent_style = space 32 | indent_size = 4 33 | max_line_length = 120 34 | 35 | [*.{ts,tsx}] 36 | indent_style = space 37 | indent_size = 2 38 | 39 | [*.bat] 40 | indent_style = tab 41 | end_of_line = crlf 42 | 43 | [LICENSE] 44 | insert_final_newline = false 45 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/function_list.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class FunctionList(BaseModel): 12 | file: Optional["FunctionListFile"] 13 | 14 | 15 | class FunctionListFile(BaseModel): 16 | decompile_result: Optional["FunctionListFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class FunctionListFileDecompileResult(BaseModel): 22 | functions: Optional[List["FunctionListFileDecompileResultFunctions"]] 23 | 24 | 25 | class FunctionListFileDecompileResultFunctions(BaseModel): 26 | offset: Any 27 | 28 | 29 | FunctionList.model_rebuild() 30 | FunctionListFile.model_rebuild() 31 | FunctionListFileDecompileResult.model_rebuild() 32 | -------------------------------------------------------------------------------- /examples/binaryai_file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from binaryai import BinaryAI, BinaryAIFile 4 | 5 | # sha256 and md5 of the same file 6 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 7 | DEFAULT_MD5 = "c46b449d5460d45ecec2bb88a1975b3b" 8 | 9 | 10 | def main(): 11 | # Initial BinaryAIFile 12 | # param sha256 and md5 can not empty at the same time 13 | bf1 = BinaryAIFile(BinaryAI(), sha256=DEFAULT_SHA256) 14 | bf2 = BinaryAIFile(BinaryAI(), md5=DEFAULT_MD5) 15 | 16 | # bf1 and bf2 represent the same file 17 | bf1_files = bf1.get_filenames() 18 | bf2_files = bf2.get_filenames() 19 | assert bf1_files == bf2_files 20 | 21 | print(bf1_files) 22 | print(bf2_files) 23 | print(bf1.get_khash_info()) 24 | 25 | print("done") 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /examples/sca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description="get SCA results of a given file") 12 | parser.add_argument("--sha256", required=False, default=DEFAULT_SHA256) 13 | args = parser.parse_args() 14 | 15 | sha256 = args.sha256 16 | 17 | # Initial BinaryAI client 18 | bai = BinaryAI() 19 | 20 | # Analyze the file just in case it's not been analyzed. 21 | bai.wait_until_analysis_done(sha256) 22 | 23 | # Get sca result 24 | component_list = bai.get_sca_result(sha256) 25 | for component in component_list: 26 | print(component.__dict__) 27 | 28 | print("done") 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /examples/strings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description="get a list of strings found in a given file") 12 | parser.add_argument("--sha256", required=False, default=DEFAULT_SHA256) 13 | args = parser.parse_args() 14 | 15 | sha256 = args.sha256 16 | 17 | # Initial BinaryAI client 18 | bai = BinaryAI() 19 | 20 | # Analyze the file just in case it's not been analyzed. 21 | bai.wait_until_analysis_done(sha256) 22 | 23 | # Get all ASCII strings 24 | ascii_strings = bai.get_all_ascii_strings(sha256) 25 | for s in ascii_strings: 26 | print(s) 27 | 28 | print("done") 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/reanalyze.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | from .enums import NoopReason, Status 10 | 11 | 12 | class Reanalyze(BaseModel): 13 | reanalyze: "ReanalyzeReanalyze" 14 | 15 | 16 | class ReanalyzeReanalyze(BaseModel): 17 | noop_reason: Optional[NoopReason] = Field(alias="noopReason") 18 | file: "ReanalyzeReanalyzeFile" 19 | 20 | 21 | class ReanalyzeReanalyzeFile(BaseModel): 22 | analyze_status: Optional["ReanalyzeReanalyzeFileAnalyzeStatus"] = Field( 23 | alias="analyzeStatus" 24 | ) 25 | 26 | 27 | class ReanalyzeReanalyzeFileAnalyzeStatus(BaseModel): 28 | status: Status 29 | 30 | 31 | Reanalyze.model_rebuild() 32 | ReanalyzeReanalyze.model_rebuild() 33 | ReanalyzeReanalyzeFile.model_rebuild() 34 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /examples/decompress.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | DEFAULT_SHA256 = "472aa646840dda3036dd1a2ec2c3f8383fbda1b3c588b079b757fa0522cc16c3" 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description="get a list of files from compressed file") 12 | parser.add_argument("--sha256", required=False, default=DEFAULT_SHA256) 13 | args = parser.parse_args() 14 | 15 | sha256 = args.sha256 16 | 17 | # Initial BinaryAI client 18 | bai = BinaryAI() 19 | 20 | # Analyze the file just in case it's not been analyzed. 21 | bai.wait_until_analysis_done(sha256) 22 | 23 | # Get all compreessed files 24 | compressed_files = bai.get_compressed_files(sha256) 25 | for compressed_file in compressed_files: 26 | print(compressed_file.__dict__) 27 | 28 | print("done") 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/index.rst:2 24 | msgid ":py:mod:`binaryai`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/index.rst:8 28 | msgid "Submodules" 29 | msgstr "" 30 | 31 | -------------------------------------------------------------------------------- /src/binaryai/function.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | 4 | class Function(object): 5 | """A function entity that represents a decompiled function.""" 6 | 7 | def __init__(self, name: str, offset: int, pseudocode: str, embedding: Optional[List[float]] = None): 8 | self.name = name 9 | self.offset = offset 10 | self.pseudocode = pseudocode 11 | self.embedding = embedding 12 | 13 | 14 | class MatchedFunction(object): 15 | """Matched function entity by using similarity search. 16 | Differ from Function class, this class is a matched function result 17 | with only score, code and other fields, but no bytes and fileoffset 18 | which represents an function in a executable binary. 19 | So this is rather not an actual decompiled function. 20 | 21 | Note that this class is experiment and maybe changed in the future. 22 | """ 23 | 24 | def __init__(self, score: float, code: str) -> None: 25 | super().__init__() 26 | self.score = score 27 | self.code = code 28 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/file_malware_probability.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | from .enums import Status 10 | 11 | 12 | class FileMalwareProbability(BaseModel): 13 | file: Optional["FileMalwareProbabilityFile"] 14 | 15 | 16 | class FileMalwareProbabilityFile(BaseModel): 17 | decompile_result: Optional["FileMalwareProbabilityFileDecompileResult"] = Field( 18 | alias="decompileResult" 19 | ) 20 | analyze_status: Optional["FileMalwareProbabilityFileAnalyzeStatus"] = Field( 21 | alias="analyzeStatus" 22 | ) 23 | 24 | 25 | class FileMalwareProbabilityFileDecompileResult(BaseModel): 26 | malware_probability: Optional[float] = Field(alias="malwareProbability") 27 | 28 | 29 | class FileMalwareProbabilityFileAnalyzeStatus(BaseModel): 30 | status: Status 31 | 32 | 33 | FileMalwareProbability.model_rebuild() 34 | FileMalwareProbabilityFile.model_rebuild() 35 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/overview.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class Overview(BaseModel): 12 | file: Optional["OverviewFile"] 13 | 14 | 15 | class OverviewFile(BaseModel): 16 | decompile_result: Optional["OverviewFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class OverviewFileDecompileResult(BaseModel): 22 | basic_info: "OverviewFileDecompileResultBasicInfo" = Field(alias="basicInfo") 23 | 24 | 25 | class OverviewFileDecompileResultBasicInfo(BaseModel): 26 | file_type: str = Field(alias="fileType") 27 | machine: str 28 | platform: str 29 | endian: str 30 | loader: str 31 | entry_point: Optional[Any] = Field(alias="entryPoint") 32 | base_address: Optional[Any] = Field(alias="baseAddress") 33 | 34 | 35 | Overview.model_rebuild() 36 | OverviewFile.model_rebuild() 37 | OverviewFileDecompileResult.model_rebuild() 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | ## Install dependencies 4 | 5 | Install development dependencies: 6 | 7 | ```bash 8 | poetry install --with=dev 9 | ``` 10 | 11 | If you also want to modify documents, install related documents: 12 | 13 | ```bash 14 | poetry install --with=dev --with=docs 15 | ``` 16 | 17 | ## (Optional) Re-generate query file 18 | 19 | If the client is modified for a new verion's schema, run the code generator: 20 | 21 | ```bash 22 | rm -r src/binaryai/client_stub && \ 23 | poetry run ariadne-codegen 24 | ``` 25 | 26 | ## Document and translation generation 27 | 28 | We use Sphinx for document generation. After modified texts, you should re-generate the LOCALE files: 29 | 30 | ```bash 31 | cd docs/ 32 | make getpo 33 | ``` 34 | 35 | And you can modify `*.po` files. To have a preview: 36 | 37 | ```bash 38 | make all 39 | ``` 40 | 41 | ## Formatter and linter 42 | 43 | Run following commands before commit: 44 | 45 | ```sh 46 | poetry run flake8 . && \ 47 | poetry run black --line-length=120 . && \ 48 | poetry run isort --profile=black --line-length=120 . 49 | ``` 50 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/file_k_hash.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class FileKHash(BaseModel): 12 | file: Optional["FileKHashFile"] 13 | 14 | 15 | class FileKHashFile(BaseModel): 16 | decompile_result: Optional["FileKHashFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class FileKHashFileDecompileResult(BaseModel): 22 | k_hash_info: Optional["FileKHashFileDecompileResultKHashInfo"] = Field( 23 | alias="kHashInfo" 24 | ) 25 | 26 | 27 | class FileKHashFileDecompileResultKHashInfo(BaseModel): 28 | hash: "FileKHashFileDecompileResultKHashInfoHash" 29 | 30 | 31 | class FileKHashFileDecompileResultKHashInfoHash(BaseModel): 32 | hash: Any 33 | version: str 34 | 35 | 36 | FileKHash.model_rebuild() 37 | FileKHashFile.model_rebuild() 38 | FileKHashFileDecompileResult.model_rebuild() 39 | FileKHashFileDecompileResultKHashInfo.model_rebuild() 40 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= poetry run sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | 18 | gettext: 19 | @$(SPHINXBUILD) -M gettext "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | 21 | getpo: gettext 22 | poetry run sphinx-intl update -p _build/gettext -l zh_CN 23 | 24 | en: 25 | @$(SPHINXBUILD) -D language='en' -b html "$(SOURCEDIR)" "$(BUILDDIR)"/html $(SPHINXOPTS) $(O) 26 | 27 | cn: 28 | @$(SPHINXBUILD) -D language='zh_CN' -b html "$(SOURCEDIR)" "$(BUILDDIR)"/html/zh_CN $(SPHINXOPTS) $(O) 29 | 30 | all: en cn 31 | @ 32 | 33 | # Catch-all target: route all unknown targets to Sphinx using the new 34 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 35 | %: Makefile 36 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 37 | -------------------------------------------------------------------------------- /examples/upload_and_analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser(description="upload a file and analyze it") 10 | parser.add_argument("--file", "-f", required=True) 11 | parser.add_argument("--public-upload", required=False, action=argparse.BooleanOptionalAction, default=False) 12 | args = parser.parse_args() 13 | 14 | # Initial BinaryAI client 15 | bai = BinaryAI() 16 | 17 | # Upload a filepath 18 | # If file exists on server, it will not actually 19 | # upload the file; otherwise, it will upload the 20 | # file to the server. 21 | sha256 = bai.upload(args.file, is_private=(not args.public_upload)) 22 | print(f"uploaded file: {sha256}") 23 | 24 | # Analyze a file identified by a sha256 25 | # If it's the first time to analyze, this may takes 26 | # some time, otherwise it should be a very quick call. 27 | # Wait for unlimited time 28 | bai.wait_until_analysis_done(sha256, timeout=-1) 29 | 30 | # Retreive analyze results of this file 31 | # ..... 32 | 33 | print("done") 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/compressed_file.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Annotated, List, Literal, Optional, Union 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class CompressedFile(BaseModel): 12 | file: Optional["CompressedFileFile"] 13 | 14 | 15 | class CompressedFileFile(BaseModel): 16 | decompressed: Optional[ 17 | List[ 18 | Annotated[ 19 | Union[ 20 | "CompressedFileFileDecompressedCompressedFile", 21 | "CompressedFileFileDecompressedCompressedDirectory", 22 | ], 23 | Field(discriminator="typename__"), 24 | ] 25 | ] 26 | ] 27 | 28 | 29 | class CompressedFileFileDecompressedCompressedFile(BaseModel): 30 | typename__: Literal["CompressedFile"] = Field(alias="__typename") 31 | path: str 32 | sha_256: str = Field(alias="sha256") 33 | 34 | 35 | class CompressedFileFileDecompressedCompressedDirectory(BaseModel): 36 | typename__: Literal["CompressedDirectory"] = Field(alias="__typename") 37 | 38 | 39 | CompressedFile.model_rebuild() 40 | CompressedFileFile.model_rebuild() 41 | -------------------------------------------------------------------------------- /examples/multithreads.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from multiprocessing.pool import ThreadPool 4 | from typing import List, Tuple 5 | 6 | from binaryai import BinaryAI, Function 7 | 8 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 9 | DEFAULT_SHA256_1 = "289616b59a145e2033baddb8a8a9b5a8fb01bdbba1b8cf9acadcdd92e6cc0562" 10 | 11 | 12 | def run_thread(sha256: str) -> Tuple[str, List[Function]]: 13 | # Initial BinaryAI client 14 | bai = BinaryAI() 15 | 16 | result: List[Function] = [] 17 | count = 3 18 | print(f"list funcs for sha256: {sha256}") 19 | for func in bai.list_funcs(sha256): 20 | if count <= 0: 21 | break 22 | result.append(func) 23 | count -= 1 24 | print("done") 25 | return (sha256, result) 26 | 27 | 28 | def main(): 29 | sha256_list = [DEFAULT_SHA256, DEFAULT_SHA256_1] 30 | params = [(x,) for x in sha256_list] 31 | with ThreadPool(2) as pool: 32 | all_results = pool.starmap(run_thread, params) 33 | for result in all_results: 34 | print(result[0]) 35 | for func in result[1]: 36 | print(func.name) 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/index.rst:2 24 | msgid "API Reference" 25 | msgstr "SDK API文档" 26 | 27 | #: ../../autoapi/index.rst:4 28 | msgid "This page contains auto-generated API reference documentation [#f1]_." 29 | msgstr "本页包含自动生成的API文档(不含翻译) [#f1]_ 。" 30 | 31 | #: ../../autoapi/index.rst:11 32 | msgid "" 33 | "Created with `sphinx-autoapi `_" 35 | msgstr "使用 `sphinx-autoapi `_ 生成" 37 | 38 | -------------------------------------------------------------------------------- /examples/info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description="fetch file infos and analysis overview") 12 | parser.add_argument("--sha256", required=False, default=DEFAULT_SHA256) 13 | args = parser.parse_args() 14 | 15 | sha256 = args.sha256 16 | 17 | # Initial BinaryAI client 18 | bai = BinaryAI() 19 | 20 | # Analyze the file just in case it's not been analyzed. 21 | bai.wait_until_analysis_done(sha256) 22 | 23 | # Get all uploaded filenames 24 | print("get all uploaded filenames") 25 | filenames = bai.get_filenames(sha256) 26 | print(filenames) 27 | 28 | # Get MIME type 29 | print("get MIME type") 30 | mime_type = bai.get_mime_type(sha256) 31 | print(mime_type) 32 | 33 | # Get size in bytes 34 | print("get size in bytes") 35 | size = bai.get_size(sha256) 36 | print(size) 37 | 38 | # Get analysis overview 39 | print("get analysis overview") 40 | overview = bai.get_overview(sha256) 41 | print(overview) 42 | 43 | print("done") 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/function_match.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class FunctionMatch(BaseModel): 12 | file: Optional["FunctionMatchFile"] 13 | 14 | 15 | class FunctionMatchFile(BaseModel): 16 | decompile_result: Optional["FunctionMatchFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class FunctionMatchFileDecompileResult(BaseModel): 22 | function: Optional["FunctionMatchFileDecompileResultFunction"] 23 | 24 | 25 | class FunctionMatchFileDecompileResultFunction(BaseModel): 26 | match: Optional[List["FunctionMatchFileDecompileResultFunctionMatch"]] 27 | 28 | 29 | class FunctionMatchFileDecompileResultFunctionMatch(BaseModel): 30 | score: float 31 | function: "FunctionMatchFileDecompileResultFunctionMatchFunction" 32 | 33 | 34 | class FunctionMatchFileDecompileResultFunctionMatchFunction(BaseModel): 35 | code: str 36 | 37 | 38 | FunctionMatch.model_rebuild() 39 | FunctionMatchFile.model_rebuild() 40 | FunctionMatchFileDecompileResult.model_rebuild() 41 | FunctionMatchFileDecompileResultFunction.model_rebuild() 42 | FunctionMatchFileDecompileResultFunctionMatch.model_rebuild() 43 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/function_info.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class FunctionInfo(BaseModel): 12 | file: Optional["FunctionInfoFile"] 13 | 14 | 15 | class FunctionInfoFile(BaseModel): 16 | decompile_result: Optional["FunctionInfoFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class FunctionInfoFileDecompileResult(BaseModel): 22 | function: Optional["FunctionInfoFileDecompileResultFunction"] 23 | 24 | 25 | class FunctionInfoFileDecompileResultFunction(BaseModel): 26 | offset: Any 27 | name: str 28 | embedding: Optional["FunctionInfoFileDecompileResultFunctionEmbedding"] = None 29 | pseudo_code: Optional["FunctionInfoFileDecompileResultFunctionPseudoCode"] = Field( 30 | alias="pseudoCode" 31 | ) 32 | 33 | 34 | class FunctionInfoFileDecompileResultFunctionEmbedding(BaseModel): 35 | vector: List[float] 36 | version: str 37 | 38 | 39 | class FunctionInfoFileDecompileResultFunctionPseudoCode(BaseModel): 40 | code: str 41 | 42 | 43 | FunctionInfo.model_rebuild() 44 | FunctionInfoFile.model_rebuild() 45 | FunctionInfoFileDecompileResult.model_rebuild() 46 | FunctionInfoFileDecompileResultFunction.model_rebuild() 47 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: PUBLISH 2 | 3 | on: 4 | release: 5 | types: [ published ] 6 | 7 | jobs: 8 | lintesting: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 13 | uses: actions/setup-python@v4 14 | with: 15 | python-version: '3.9' 16 | - name: Set up poetry 17 | uses: abatilo/actions-poetry@v2 18 | with: 19 | poetry-version: '1.6' 20 | - name: Install dependencies 21 | run: | 22 | poetry install --with=dev 23 | - name: Perform linting 24 | run: | 25 | poetry run flake8 . 26 | poetry run black --check --line-length=120 . 27 | poetry run isort --profile=black --line-length=120 -c . 28 | - name: Perform local testing 29 | run: | 30 | poetry run pytest 31 | 32 | deploy: 33 | needs: [ lintesting ] 34 | runs-on: ubuntu-latest 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: '3.9' 42 | - name: Set up poetry 43 | uses: abatilo/actions-poetry@v2 44 | with: 45 | poetry-version: '1.6' 46 | - name: Build and publish 47 | env: 48 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.BINARYAI_SDK_PYPI_TOKEN }} 49 | run: | 50 | poetry publish --build 51 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/functions_info.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class FunctionsInfo(BaseModel): 12 | file: Optional["FunctionsInfoFile"] 13 | 14 | 15 | class FunctionsInfoFile(BaseModel): 16 | decompile_result: Optional["FunctionsInfoFileDecompileResult"] = Field( 17 | alias="decompileResult" 18 | ) 19 | 20 | 21 | class FunctionsInfoFileDecompileResult(BaseModel): 22 | functions: Optional[List["FunctionsInfoFileDecompileResultFunctions"]] 23 | 24 | 25 | class FunctionsInfoFileDecompileResultFunctions(BaseModel): 26 | offset: Any 27 | name: str 28 | embedding: Optional["FunctionsInfoFileDecompileResultFunctionsEmbedding"] = None 29 | pseudo_code: Optional["FunctionsInfoFileDecompileResultFunctionsPseudoCode"] = ( 30 | Field(alias="pseudoCode") 31 | ) 32 | 33 | 34 | class FunctionsInfoFileDecompileResultFunctionsEmbedding(BaseModel): 35 | vector: List[float] 36 | version: str 37 | 38 | 39 | class FunctionsInfoFileDecompileResultFunctionsPseudoCode(BaseModel): 40 | code: str 41 | 42 | 43 | FunctionsInfo.model_rebuild() 44 | FunctionsInfoFile.model_rebuild() 45 | FunctionsInfoFileDecompileResult.model_rebuild() 46 | FunctionsInfoFileDecompileResultFunctions.model_rebuild() 47 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/check_state.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | from .enums import Status 10 | 11 | 12 | class CheckState(BaseModel): 13 | file: Optional["CheckStateFile"] 14 | 15 | 16 | class CheckStateFile(BaseModel): 17 | smart_binary_status: Optional["CheckStateFileSmartBinaryStatus"] = Field( 18 | alias="smartBinaryStatus" 19 | ) 20 | smart_beat_status: Optional["CheckStateFileSmartBeatStatus"] = Field( 21 | alias="smartBeatStatus" 22 | ) 23 | text: Optional["CheckStateFileText"] 24 | decompile_result: Optional["CheckStateFileDecompileResult"] = Field( 25 | alias="decompileResult" 26 | ) 27 | 28 | 29 | class CheckStateFileSmartBinaryStatus(BaseModel): 30 | status: Status 31 | 32 | 33 | class CheckStateFileSmartBeatStatus(BaseModel): 34 | status: Status 35 | 36 | 37 | class CheckStateFileText(BaseModel): 38 | content: Optional[str] 39 | 40 | 41 | class CheckStateFileDecompileResult(BaseModel): 42 | basic_info: "CheckStateFileDecompileResultBasicInfo" = Field(alias="basicInfo") 43 | 44 | 45 | class CheckStateFileDecompileResultBasicInfo(BaseModel): 46 | file_type: str = Field(alias="fileType") 47 | 48 | 49 | CheckState.model_rebuild() 50 | CheckStateFile.model_rebuild() 51 | CheckStateFileDecompileResult.model_rebuild() 52 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/cve/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/cve/index.rst:2 24 | msgid ":py:mod:`binaryai.cve`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/cve/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/cve/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/cve/index.rst:19::1 36 | msgid ":py:obj:`CVE `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/cve/index.rst:25 40 | #: ../../autoapi/binaryai/cve/index.rst:19::1 41 | msgid "A CVE entity." 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/cve/index.rst:23 45 | msgid "Bases: :py:obj:`object`" 46 | msgstr "" 47 | 48 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/CONTRIBUTING.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../CONTRIBUTING.md:1 24 | msgid "Contributing" 25 | msgstr "" 26 | 27 | #: ../../CONTRIBUTING.md:3 28 | msgid "Install dependencies" 29 | msgstr "" 30 | 31 | #: ../../CONTRIBUTING.md:5 32 | msgid "Install development dependencies:" 33 | msgstr "" 34 | 35 | #: ../../CONTRIBUTING.md:11 36 | msgid "(Optional) Re-generate query file" 37 | msgstr "" 38 | 39 | #: ../../CONTRIBUTING.md:13 40 | msgid "" 41 | "If the client is modified for a new verion's schema, run the code " 42 | "generator:" 43 | msgstr "" 44 | 45 | #: ../../CONTRIBUTING.md:20 46 | msgid "Formatter and linter" 47 | msgstr "" 48 | 49 | #: ../../CONTRIBUTING.md:22 50 | msgid "Recommend to run following commands" 51 | msgstr "" 52 | 53 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "BinaryAI SDK" 10 | copyright = "2020-2023, binaryai " 11 | author = "binaryai " 12 | 13 | # -- General configuration --------------------------------------------------- 14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 15 | 16 | extensions = [ 17 | "nbsphinx", 18 | "myst_parser", 19 | "autoapi.extension", 20 | "sphinx.ext.napoleon", 21 | "sphinx.ext.viewcode", 22 | ] 23 | autoapi_dirs = ["../src"] # location to parse for API reference 24 | autoapi_ignore = ["*client_stub*"] # ignore auto generated stub code 25 | 26 | templates_path = ["_templates"] 27 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 28 | 29 | # -- i18n 30 | 31 | locale_dirs = ["locale/"] # path is example but recommended. 32 | gettext_compact = False # optional. 33 | 34 | 35 | # -- Options for HTML output ------------------------------------------------- 36 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 37 | 38 | html_theme = "sphinx_rtd_theme" 39 | html_static_path = ["_static"] 40 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "binaryai" 7 | version = "0.8.5.dev2" 8 | description = "BinaryAI-SDK is a library provides an abstracted client module to simplify the procedure of uploading file for analysis." 9 | license = "GPLv3" 10 | authors = ["binaryai "] 11 | readme = "README.md" 12 | homepage = "https://www.binaryai.cn/" 13 | documentation = "https://www.binaryai.cn/doc/" 14 | repository = "https://github.com/binaryai/sdk/" 15 | exclude = ["examples"] 16 | 17 | [tool.poetry.dependencies] 18 | python = "^3.9" 19 | deprecated = "^1.2.14" 20 | httpx = ">=0.25" 21 | pydantic = "^2.3" 22 | 23 | [tool.poetry.group.dev.dependencies] 24 | flake8 = "4.0.1" 25 | black = "24.3.0" 26 | isort = "5.10.1" 27 | ariadne-codegen = "^0.13.0" 28 | pytest = "^7.4.2" 29 | scipy = "^1.13.0" 30 | 31 | [tool.poetry.group.docs.dependencies] 32 | sphinx-autoapi = "^3.0.0" 33 | sphinx-rtd-theme = "^2.0.0" 34 | sphinx = "^7.3.7" 35 | myst-parser = "^3.0.1" 36 | sphinx-intl = "^2.2.0" 37 | nbsphinx = "^0.9.3" 38 | ipykernel = "^6.25.2" 39 | 40 | [tool.ariadne-codegen] 41 | remote_schema_url = "https://api.binaryai.cn/v1/endpoint" 42 | queries_path = "./src/binaryai/query.graphql" 43 | target_package_name = "client_stub" 44 | target_package_path = "./src/binaryai" 45 | async_client = false 46 | 47 | [tool.black] 48 | exclude = "src/binaryai/client_stub/*" 49 | 50 | [tool.isort] 51 | skip_glob = "src/binaryai/client_stub/*" 52 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/component/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/component/index.rst:2 24 | msgid ":py:mod:`binaryai.component`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/component/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/component/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/component/index.rst:19::1 36 | msgid ":py:obj:`Component `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/component/index.rst:25 40 | #: ../../autoapi/binaryai/component/index.rst:19::1 41 | msgid "A component entity that represents a SCA result." 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/component/index.rst:23 45 | msgid "Bases: :py:obj:`object`" 46 | msgstr "" 47 | 48 | -------------------------------------------------------------------------------- /src/binaryai/license.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List 3 | 4 | 5 | class LicenseTagItem(json.JSONDecoder): 6 | """A license tag item entity.""" 7 | 8 | def __init__(self, tag_name: str, description: str) -> None: 9 | super().__init__() 10 | self.tag_name = tag_name 11 | self.description = description 12 | 13 | 14 | class LicenseTags(json.JSONDecoder): 15 | """A license tag entity.""" 16 | 17 | def __init__( 18 | self, 19 | permission: List[LicenseTagItem], 20 | condition: List[LicenseTagItem], 21 | forbidden: List[LicenseTagItem], 22 | ) -> None: 23 | super().__init__() 24 | self.permission = permission 25 | self.condition = condition 26 | self.forbidden = forbidden 27 | 28 | 29 | class License(json.JSONDecoder): 30 | """A license entity.""" 31 | 32 | def __init__( 33 | self, 34 | short_name: str, 35 | full_name: str, 36 | content: str, 37 | url: str, 38 | source: str, 39 | tags: LicenseTags = None, 40 | risk: str = None, 41 | extra: str = None, 42 | is_pass: bool = None, 43 | check_reason: str = None, 44 | ) -> None: 45 | super().__init__() 46 | self.full_name = full_name 47 | self.short_name = short_name 48 | self.content = content 49 | self.risk = risk 50 | self.tags = tags 51 | self.source = source 52 | self.url = url 53 | self.extra = extra 54 | self.is_pass = is_pass 55 | self.check_reason = check_reason 56 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/check_or_upload.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Literal, Union 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class CheckOrUpload(BaseModel): 12 | create_upload_ticket: Union[ 13 | "CheckOrUploadCreateUploadTicketUploadTicket", 14 | "CheckOrUploadCreateUploadTicketOwnershipTicket", 15 | "CheckOrUploadCreateUploadTicketFile", 16 | ] = Field(alias="createUploadTicket", discriminator="typename__") 17 | 18 | 19 | class CheckOrUploadCreateUploadTicketUploadTicket(BaseModel): 20 | typename__: Literal["UploadTicket"] = Field(alias="__typename") 21 | ticket_id: str = Field(alias="ticketID") 22 | url: str 23 | request_headers: List[ 24 | "CheckOrUploadCreateUploadTicketUploadTicketRequestHeaders" 25 | ] = Field(alias="requestHeaders") 26 | 27 | 28 | class CheckOrUploadCreateUploadTicketUploadTicketRequestHeaders(BaseModel): 29 | key: str 30 | value: str 31 | 32 | 33 | class CheckOrUploadCreateUploadTicketOwnershipTicket(BaseModel): 34 | typename__: Literal["OwnershipTicket"] = Field(alias="__typename") 35 | ticket_id: str = Field(alias="ticketID") 36 | secret_prepend: str = Field(alias="secretPrepend") 37 | secret_append: str = Field(alias="secretAppend") 38 | 39 | 40 | class CheckOrUploadCreateUploadTicketFile(BaseModel): 41 | typename__: Literal["File"] = Field(alias="__typename") 42 | sha_256: str = Field(alias="sha256") 43 | 44 | 45 | CheckOrUpload.model_rebuild() 46 | CheckOrUploadCreateUploadTicketUploadTicket.model_rebuild() 47 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 3 | # number of processors available to use. 4 | jobs=1 5 | 6 | 7 | [MESSAGES CONTROL] 8 | 9 | # Disable the message, report, category or checker with the given id(s). 10 | disable=all 11 | 12 | # Enable the message, report, category or checker with the given id(s). 13 | enable=c-extension-no-member, 14 | bad-indentation, 15 | bare-except, 16 | broad-except, 17 | dangerous-default-value, 18 | function-redefined, 19 | len-as-condition, 20 | line-too-long, 21 | misplaced-future, 22 | missing-final-newline, 23 | mixed-line-endings, 24 | multiple-imports, 25 | multiple-statements, 26 | singleton-comparison, 27 | trailing-comma-tuple, 28 | trailing-newlines, 29 | trailing-whitespace, 30 | unexpected-line-ending-format, 31 | unused-import, 32 | unused-variable, 33 | wildcard-import, 34 | wrong-import-order 35 | 36 | 37 | [FORMAT] 38 | 39 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 40 | expected-line-ending-format=LF 41 | 42 | # Regexp for a line that is allowed to be longer than the limit. 43 | ignore-long-lines=^\s*(# )??$ 44 | 45 | # Maximum number of characters on a single line. 46 | max-line-length=120 47 | 48 | # Maximum number of lines in a module. 49 | max-module-lines=2000 50 | 51 | 52 | [EXCEPTIONS] 53 | 54 | # Exceptions that will emit a warning when being caught. Defaults to 55 | # "BaseException, Exception". 56 | overgeneral-exceptions=BaseException, 57 | Exception 58 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/ascii_string.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Annotated, List, Literal, Optional, Union 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class ASCIIString(BaseModel): 12 | file: Optional["ASCIIStringFile"] 13 | 14 | 15 | class ASCIIStringFile(BaseModel): 16 | executable: Optional[ 17 | Annotated[ 18 | Union[ 19 | "ASCIIStringFileExecutableELFInfo", 20 | "ASCIIStringFileExecutablePEInfo", 21 | "ASCIIStringFileExecutableMachoInfo", 22 | "ASCIIStringFileExecutableCOFFInfo", 23 | ], 24 | Field(discriminator="typename__"), 25 | ] 26 | ] 27 | 28 | 29 | class ASCIIStringFileExecutableELFInfo(BaseModel): 30 | typename__: Literal["ELFInfo"] = Field(alias="__typename") 31 | ascii_strings: Optional[List[str]] = Field(alias="asciiStrings") 32 | 33 | 34 | class ASCIIStringFileExecutablePEInfo(BaseModel): 35 | typename__: Literal["PEInfo"] = Field(alias="__typename") 36 | ascii_strings: Optional[List[str]] = Field(alias="asciiStrings") 37 | 38 | 39 | class ASCIIStringFileExecutableMachoInfo(BaseModel): 40 | typename__: Literal["MachoInfo"] = Field(alias="__typename") 41 | ascii_strings: Optional[List[str]] = Field(alias="asciiStrings") 42 | 43 | 44 | class ASCIIStringFileExecutableCOFFInfo(BaseModel): 45 | typename__: Literal["COFFInfo"] = Field(alias="__typename") 46 | ascii_strings: Optional[List[str]] = Field(alias="asciiStrings") 47 | 48 | 49 | ASCIIString.model_rebuild() 50 | ASCIIStringFile.model_rebuild() 51 | -------------------------------------------------------------------------------- /examples/decompile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from binaryai import BinaryAI 6 | 7 | DEFAULT_SHA256 = "bbe34331e5068d7dc5b990fbef10002358b4ef8e07ab92c0d5620ed60fc36b30" 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description="decompile binary and do similarity search for a function") 12 | parser.add_argument("--sha256", required=False, default=DEFAULT_SHA256) 13 | args = parser.parse_args() 14 | 15 | sha256 = args.sha256 16 | 17 | # Initial BinaryAI client 18 | bai = BinaryAI() 19 | 20 | # Analyze the file just in case it's not been analyzed. 21 | bai.wait_until_analysis_done(sha256) 22 | 23 | # Get all functions' offset 24 | print("list function offset list") 25 | func_offset_list = bai.list_func_offset(sha256) 26 | print(func_offset_list) 27 | 28 | # Or you can get a list of functions (in interator) directly 29 | for func in bai.list_funcs(sha256): 30 | print("show one function pseudocode") 31 | print(func.name) 32 | # print(func.pseudocode) 33 | break 34 | 35 | # Batch operation 36 | target_offsets = func_offset_list[:3] 37 | target_funcs = bai.get_funcs_info(sha256, target_offsets) 38 | for target_func in target_funcs: 39 | print(target_func.name) 40 | 41 | # Similar search topk function for given function 42 | for func_offset in func_offset_list: 43 | matched_func_list = bai.get_func_match(sha256, func_offset) 44 | for matched_func in matched_func_list or []: 45 | print(matched_func.score) 46 | # print(matched_func.code) 47 | break 48 | 49 | print("done") 50 | 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/exceptions/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/exceptions/index.rst:2 24 | msgid ":py:mod:`binaryai.exceptions`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/exceptions/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/exceptions/index.rst:13 32 | #: ../../autoapi/binaryai/exceptions/index.rst:23 33 | msgid "Bases: :py:obj:`Exception`" 34 | msgstr "" 35 | 36 | #: ../../autoapi/binaryai/exceptions/index.rst:15 37 | msgid "" 38 | "FileNotExistError means the sha256 just uploaded is not found. Normally " 39 | "this error does not occur. If it does, it means that there is a problem " 40 | "with the server" 41 | msgstr "" 42 | 43 | #: ../../autoapi/binaryai/exceptions/index.rst:25 44 | msgid "" 45 | "FileRequiredError means BinaryAI requires the file, but you are not " 46 | "providing it. This error might occur if you are only providing hash to " 47 | "the BinaryAI. Consider provide the original file as well." 48 | msgstr "" 49 | 50 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/compressed_file/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/compressed_file/index.rst:2 24 | msgid ":py:mod:`binaryai.compressed_file`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/compressed_file/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/compressed_file/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/compressed_file/index.rst:19::1 36 | msgid ":py:obj:`CompressedFile `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/compressed_file/index.rst:19::1 40 | msgid "A compressed file entity." 41 | msgstr "" 42 | 43 | #: ../../autoapi/binaryai/compressed_file/index.rst:23 44 | msgid "Bases: :py:obj:`object`" 45 | msgstr "" 46 | 47 | #: ../../autoapi/binaryai/compressed_file/index.rst:25 48 | msgid "" 49 | "A compressed file entity. Note that a file may have no sha256 which " 50 | "should be empty string, e.g. /dev/console and /dev/null." 51 | msgstr "" 52 | 53 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/license.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | 10 | 11 | class License(BaseModel): 12 | file: Optional["LicenseFile"] 13 | 14 | 15 | class LicenseFile(BaseModel): 16 | scainfo: Optional[List["LicenseFileScainfo"]] 17 | 18 | 19 | class LicenseFileScainfo(BaseModel): 20 | licenselist: Optional[List["LicenseFileScainfoLicenselist"]] 21 | 22 | 23 | class LicenseFileScainfoLicenselist(BaseModel): 24 | checkreason: Optional[str] 25 | content: str 26 | extra: Optional[str] 27 | full_name: str = Field(alias="fullName") 28 | pass_: Optional[bool] = Field(alias="pass") 29 | risk: Optional[str] 30 | short_name: str = Field(alias="shortName") 31 | source: str 32 | url: str 33 | tags: Optional["LicenseFileScainfoLicenselistTags"] 34 | 35 | 36 | class LicenseFileScainfoLicenselistTags(BaseModel): 37 | permission: Optional[List["LicenseFileScainfoLicenselistTagsPermission"]] 38 | condition: Optional[List["LicenseFileScainfoLicenselistTagsCondition"]] 39 | forbidden: Optional[List["LicenseFileScainfoLicenselistTagsForbidden"]] 40 | 41 | 42 | class LicenseFileScainfoLicenselistTagsPermission(BaseModel): 43 | tag_name: str = Field(alias="tagName") 44 | description: str 45 | 46 | 47 | class LicenseFileScainfoLicenselistTagsCondition(BaseModel): 48 | tag_name: str = Field(alias="tagName") 49 | description: str 50 | 51 | 52 | class LicenseFileScainfoLicenselistTagsForbidden(BaseModel): 53 | tag_name: str = Field(alias="tagName") 54 | description: str 55 | 56 | 57 | License.model_rebuild() 58 | LicenseFile.model_rebuild() 59 | LicenseFileScainfo.model_rebuild() 60 | LicenseFileScainfoLicenselist.model_rebuild() 61 | LicenseFileScainfoLicenselistTags.model_rebuild() 62 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/upload/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/upload/index.rst:2 24 | msgid ":py:mod:`binaryai.upload`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/upload/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/upload/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/upload/index.rst:19::1 36 | msgid ":py:obj:`Uploader `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/upload/index.rst:37 40 | #: ../../autoapi/binaryai/upload/index.rst:19::1 41 | msgid "Uploads a file to server. See `binaryai.BinaryAI.upload` for detail." 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/upload/index.rst:21 45 | msgid "Attributes" 46 | msgstr "" 47 | 48 | #: ../../autoapi/binaryai/upload/index.rst:27::1 49 | msgid ":py:obj:`logger `\\" 50 | msgstr "" 51 | 52 | #: ../../autoapi/binaryai/upload/index.rst:35 53 | msgid "Bases: :py:obj:`object`" 54 | msgstr "" 55 | 56 | #: ../../autoapi/binaryai/upload/index.rst:41 57 | msgid "Starts the upload sequence." 58 | msgstr "" 59 | 60 | #: ../../autoapi/binaryai/upload/index.rst:46 61 | msgid "Checks if file exists on FileManager with filename and file's hashsum." 62 | msgstr "" 63 | 64 | #: ../../autoapi/binaryai/upload/index.rst:51 65 | msgid "Calculate the POS argument" 66 | msgstr "" 67 | 68 | #: ../../autoapi/binaryai/upload/index.rst:56 69 | msgid "Uploads file to FileManager." 70 | msgstr "" 71 | 72 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/license/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/license/index.rst:2 24 | msgid ":py:mod:`binaryai.license`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/license/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/license/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/license/index.rst:21::1 36 | msgid ":py:obj:`LicenseTagItem `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/license/index.rst:27 40 | #: ../../autoapi/binaryai/license/index.rst:21::1 41 | msgid "A license tag item entity." 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/license/index.rst:21::1 45 | msgid ":py:obj:`LicenseTags `\\" 46 | msgstr "" 47 | 48 | #: ../../autoapi/binaryai/license/index.rst:35 49 | #: ../../autoapi/binaryai/license/index.rst:21::1 50 | msgid "A license tag entity." 51 | msgstr "" 52 | 53 | #: ../../autoapi/binaryai/license/index.rst:21::1 54 | msgid ":py:obj:`License `\\" 55 | msgstr "" 56 | 57 | #: ../../autoapi/binaryai/license/index.rst:43 58 | #: ../../autoapi/binaryai/license/index.rst:21::1 59 | msgid "A license entity." 60 | msgstr "" 61 | 62 | #: ../../autoapi/binaryai/license/index.rst:25 63 | #: ../../autoapi/binaryai/license/index.rst:33 64 | #: ../../autoapi/binaryai/license/index.rst:41 65 | msgid "Bases: :py:obj:`json.JSONDecoder`" 66 | msgstr "" 67 | 68 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/python 3 | { 4 | "name": "Python 3", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "build": { 7 | // Sets the run context to one level up instead of the .devcontainer folder. 8 | "context": "..", 9 | // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename. 10 | "dockerfile": "./Dockerfile" 11 | }, 12 | "containerEnv": { 13 | "BINARYAI_SECRET_ID": "${localEnv:BINARYAI_SECRET_ID}", 14 | "BINARYAI_SECRET_KEY": "${localEnv:BINARYAI_SECRET_KEY}", 15 | "BINARYAI_ENDPOINT": "${localEnv:BINARYAI_ENDPOINT}", 16 | }, 17 | "features": { 18 | "ghcr.io/devcontainers/features/python:1": { 19 | "version": "none" // do not touch os python 20 | }, 21 | "ghcr.io/devcontainers-extra/features/black:2": {}, 22 | "ghcr.io/devcontainers-extra/features/flake8:2": {}, 23 | "ghcr.io/devcontainers-extra/features/pylint:2": {}, 24 | "ghcr.io/devcontainers-extra/features/poetry:2": { 25 | "version": "1.8.5" 26 | } 27 | }, 28 | "customizations": { 29 | "vscode": { 30 | "extensions": [ 31 | "ms-python.vscode-pylance", 32 | "ms-python.pylint", 33 | "ms-python.python", 34 | "ms-python.flake8", 35 | "ms-python.black-formatter", 36 | "ms-toolsai.jupyter", 37 | "GraphQL.vscode-graphql", 38 | "esbenp.prettier-vscode" 39 | ], 40 | "settings": { 41 | "[python]": { 42 | "editor.formatOnSave": true, 43 | "editor.defaultFormatter": "ms-python.black-formatter" 44 | }, 45 | "black-formatter.args": [ 46 | "--line-length=120" 47 | ] 48 | } 49 | } 50 | }, 51 | // Features to add to the dev container. More info: https://containers.dev/features. 52 | // "features": {}, 53 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 54 | // "forwardPorts": [], 55 | // Use 'postCreateCommand' to run commands after the container is created. 56 | "postCreateCommand": "poetry install --with=dev --with=docs --no-interaction", 57 | // Configure tool-specific properties. 58 | // "customizations": {}, 59 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 60 | // "remoteUser": "root" 61 | } -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/function/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/function/index.rst:2 24 | msgid ":py:mod:`binaryai.function`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/function/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/function/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/function/index.rst:20::1 36 | msgid ":py:obj:`Function `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/function/index.rst:26 40 | #: ../../autoapi/binaryai/function/index.rst:20::1 41 | msgid "A function entity that represents a decompiled function." 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/function/index.rst:20::1 45 | msgid ":py:obj:`MatchedFunction `\\" 46 | msgstr "" 47 | 48 | #: ../../autoapi/binaryai/function/index.rst:20::1 49 | msgid "Matched function entity by using similarity search." 50 | msgstr "" 51 | 52 | #: ../../autoapi/binaryai/function/index.rst:24 53 | #: ../../autoapi/binaryai/function/index.rst:32 54 | msgid "Bases: :py:obj:`object`" 55 | msgstr "" 56 | 57 | #: ../../autoapi/binaryai/function/index.rst:34 58 | msgid "" 59 | "Matched function entity by using similarity search. Differ from Function " 60 | "class, this class is a matched function result with only score, code and " 61 | "other fields, but no bytes and fileoffset which represents an function in" 62 | " a executable binary. So this is rather not an actual decompiled " 63 | "function." 64 | msgstr "" 65 | 66 | #: ../../autoapi/binaryai/function/index.rst:40 67 | msgid "Note that this class is experiment and maybe changed in the future." 68 | msgstr "" 69 | 70 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/exceptions.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | 3 | from typing import Any, Dict, List, Optional, Union 4 | 5 | import httpx 6 | 7 | 8 | class GraphQLClientError(Exception): 9 | """Base exception.""" 10 | 11 | 12 | class GraphQLClientHttpError(GraphQLClientError): 13 | def __init__(self, status_code: int, response: httpx.Response) -> None: 14 | self.status_code = status_code 15 | self.response = response 16 | 17 | def __str__(self) -> str: 18 | return f"HTTP status code: {self.status_code}" 19 | 20 | 21 | class GraphQLClientInvalidResponseError(GraphQLClientError): 22 | def __init__(self, response: httpx.Response) -> None: 23 | self.response = response 24 | 25 | def __str__(self) -> str: 26 | return "Invalid response format." 27 | 28 | 29 | class GraphQLClientGraphQLError(GraphQLClientError): 30 | def __init__( 31 | self, 32 | message: str, 33 | locations: Optional[List[Dict[str, int]]] = None, 34 | path: Optional[List[str]] = None, 35 | extensions: Optional[Dict[str, object]] = None, 36 | orginal: Optional[Dict[str, object]] = None, 37 | ): 38 | self.message = message 39 | self.locations = locations 40 | self.path = path 41 | self.extensions = extensions 42 | self.orginal = orginal 43 | 44 | def __str__(self) -> str: 45 | return self.message 46 | 47 | @classmethod 48 | def from_dict(cls, error: Dict[str, Any]) -> "GraphQLClientGraphQLError": 49 | return cls( 50 | message=error["message"], 51 | locations=error.get("locations"), 52 | path=error.get("path"), 53 | extensions=error.get("extensions"), 54 | orginal=error, 55 | ) 56 | 57 | 58 | class GraphQLClientGraphQLMultiError(GraphQLClientError): 59 | def __init__( 60 | self, 61 | errors: List[GraphQLClientGraphQLError], 62 | data: Optional[Dict[str, Any]] = None, 63 | ): 64 | self.errors = errors 65 | self.data = data 66 | 67 | def __str__(self) -> str: 68 | return "; ".join(str(e) for e in self.errors) 69 | 70 | @classmethod 71 | def from_errors_dicts( 72 | cls, errors_dicts: List[Dict[str, Any]], data: Optional[Dict[str, Any]] = None 73 | ) -> "GraphQLClientGraphQLMultiError": 74 | return cls( 75 | errors=[GraphQLClientGraphQLError.from_dict(e) for e in errors_dicts], 76 | data=data, 77 | ) 78 | 79 | 80 | class GraphQLClientInvalidMessageFormat(GraphQLClientError): 81 | def __init__(self, message: Union[str, bytes]) -> None: 82 | self.message = message 83 | 84 | def __str__(self) -> str: 85 | return "Invalid message format." 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .vscode/* 132 | !.vscode/settings.json 133 | !.vscode/tasks.json 134 | !.vscode/launch.json 135 | !.vscode/extensions.json 136 | !.vscode/*.code-snippets 137 | 138 | # Local History for Visual Studio Code 139 | .history/ 140 | 141 | # Built Visual Studio Code Extensions 142 | *.vsix 143 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/enums.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: https://api.binaryai.cn/v1/endpoint 3 | 4 | from enum import Enum 5 | 6 | 7 | class AnalyzeProgressStep(str, Enum): 8 | Begin = "Begin" 9 | Done = "Done" 10 | AutoAnalysis = "AutoAnalysis" 11 | Decompile = "Decompile" 12 | FeatureForward = "FeatureForward" 13 | EmbedRecall = "EmbedRecall" 14 | EmbedRank = "EmbedRank" 15 | ResultUpload = "ResultUpload" 16 | 17 | 18 | class AntivirusSafeLevel(str, Enum): 19 | UNKNOWN = "UNKNOWN" 20 | SAFE_FORCE = "SAFE_FORCE" 21 | SAFE = "SAFE" 22 | SAFE_WEAK = "SAFE_WEAK" 23 | UNDETECTED = "UNDETECTED" 24 | MALICIOUS_WEAK = "MALICIOUS_WEAK" 25 | MALICIOUS = "MALICIOUS" 26 | MALICIOUS_FORCE = "MALICIOUS_FORCE" 27 | INFECTED = "INFECTED" 28 | 29 | 30 | class ExecuteType(str, Enum): 31 | SmartBinary = "SmartBinary" 32 | SmartExtractor = "SmartExtractor" 33 | FileMeta = "FileMeta" 34 | SmartBeat = "SmartBeat" 35 | 36 | 37 | class FileType(str, Enum): 38 | ELF = "ELF" 39 | PE = "PE" 40 | Macho = "Macho" 41 | COFF = "COFF" 42 | 43 | 44 | class MalwareFamilyTag(str, Enum): 45 | TROJAN = "TROJAN" 46 | VIRUS = "VIRUS" 47 | WORM = "WORM" 48 | DOWNLOADER = "DOWNLOADER" 49 | C2 = "C2" 50 | RANSOM = "RANSOM" 51 | COINMINER = "COINMINER" 52 | ADWARE = "ADWARE" 53 | BOTNET = "BOTNET" 54 | 55 | 56 | class MatchAlgorithm(str, Enum): 57 | Normal = "Normal" 58 | Professional = "Professional" 59 | 60 | 61 | class NoopReason(str, Enum): 62 | AlreadyRunning = "AlreadyRunning" 63 | WouldNotChange = "WouldNotChange" 64 | RateLimited = "RateLimited" 65 | 66 | 67 | class PseudoCodeAnnotationType(str, Enum): 68 | HAS_FUNC_DEF = "HAS_FUNC_DEF" 69 | HAS_XREF = "HAS_XREF" 70 | 71 | 72 | class RelroLevel(str, Enum): 73 | No = "No" 74 | Full = "Full" 75 | Partial = "Partial" 76 | 77 | 78 | class SCAAlgo(str, Enum): 79 | NORMAL = "NORMAL" 80 | PROFESSIONAL = "PROFESSIONAL" 81 | 82 | 83 | class SearchBinaryStatisticKey(str, Enum): 84 | THIRD_LIB_NAME = "THIRD_LIB_NAME" 85 | THIRD_LIB_VERSION = "THIRD_LIB_VERSION" 86 | THIRD_LIB_CVE_NAME = "THIRD_LIB_CVE_NAME" 87 | 88 | 89 | class Status(str, Enum): 90 | Ready = "Ready" 91 | Waiting = "Waiting" 92 | Running = "Running" 93 | Success = "Success" 94 | Fail = "Fail" 95 | Timeout = "Timeout" 96 | 97 | 98 | class SymbolType(str, Enum): 99 | FUNCTION = "FUNCTION" 100 | THUNK_FUNCTION = "THUNK_FUNCTION" 101 | DATA_LABEL = "DATA_LABEL" 102 | 103 | 104 | class VulnerabilitySourceStatus(str, Enum): 105 | NO_SOURCE_IDENTIFIED = "NO_SOURCE_IDENTIFIED" 106 | UNPATCHED = "UNPATCHED" 107 | PATCHED = "PATCHED" 108 | 109 | 110 | class XRefType(str, Enum): 111 | CALL = "CALL" 112 | DATA = "DATA" 113 | STRING = "STRING" 114 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/input_types.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: https://api.binaryai.cn/v1/endpoint 3 | 4 | from typing import List, Optional 5 | 6 | from pydantic import Field 7 | 8 | from .base_model import BaseModel 9 | from .enums import FileType, SearchBinaryStatisticKey, SymbolType 10 | 11 | 12 | class BindiffMatchInput(BaseModel): 13 | sha_256: str = Field(alias="sha256") 14 | 15 | 16 | class CreateFileInput(BaseModel): 17 | ticket_id: str = Field(alias="ticketID") 18 | 19 | 20 | class CreateMatchInput(BaseModel): 21 | sha_256: str = Field(alias="sha256") 22 | target: "MatchTargetInput" 23 | 24 | 25 | class CreateUploadTicketInput(BaseModel): 26 | sha_256: Optional[str] = Field(alias="sha256", default=None) 27 | md_5: Optional[str] = Field(alias="md5", default=None) 28 | name: Optional[str] = None 29 | captcha_ticket: Optional[str] = Field(alias="captchaTicket", default=None) 30 | captcha_random_string: Optional[str] = Field( 31 | alias="captchaRandomString", default=None 32 | ) 33 | is_private_upload: Optional[bool] = Field(alias="isPrivateUpload", default=None) 34 | 35 | 36 | class KHashInput(BaseModel): 37 | hash_hexlified_string: str = Field(alias="hashHexlifiedString") 38 | version: str 39 | bit_size: int = Field(alias="bitSize") 40 | 41 | 42 | class MatchTargetInput(BaseModel): 43 | bindiff: Optional["BindiffMatchInput"] = None 44 | oss: Optional["OSSMatchInput"] = None 45 | 46 | 47 | class OSSMatchInput(BaseModel): 48 | repo_ur_ls: Optional[List[str]] = Field(alias="repoURLs", default=None) 49 | 50 | 51 | class ReanalyzeInput(BaseModel): 52 | sha_256: str = Field(alias="sha256") 53 | skip_version_check: Optional[bool] = Field(alias="skipVersionCheck", default=None) 54 | 55 | 56 | class SearchBinaryInput(BaseModel): 57 | offset: Optional[int] = None 58 | limit: Optional[int] = None 59 | keyword: Optional[str] = None 60 | include_type: Optional[List[FileType]] = Field(alias="includeType", default=None) 61 | third_lib: Optional[List["SearchThirdLib"]] = Field(alias="thirdLib", default=None) 62 | statistic: Optional[List["SearchBinaryStatisticInput"]] = None 63 | 64 | 65 | class SearchBinaryStatisticInput(BaseModel): 66 | keys: List[SearchBinaryStatisticKey] 67 | 68 | 69 | class SearchCVESec(BaseModel): 70 | name: str 71 | 72 | 73 | class SearchFileInput(BaseModel): 74 | sha_256: Optional[str] = Field(alias="sha256", default=None) 75 | md_5: Optional[str] = Field(alias="md5", default=None) 76 | 77 | 78 | class SearchThirdLib(BaseModel): 79 | name: Optional[str] = None 80 | version: Optional[List[str]] = None 81 | cves: Optional[List["SearchCVESec"]] = None 82 | 83 | 84 | class SessionLoginInput(BaseModel): 85 | previous_session_token: Optional[str] = Field( 86 | alias="previousSessionToken", default=None 87 | ) 88 | weixin: Optional["WeixinSessionLoginInput"] = None 89 | 90 | 91 | class SymbolTableFilter(BaseModel): 92 | symbol_type: Optional[List[SymbolType]] = Field(alias="symbolType", default=None) 93 | 94 | 95 | class UpdateAccessKeyInput(BaseModel): 96 | enabled: Optional[bool] = None 97 | notes: Optional[str] = None 98 | 99 | 100 | class WeixinSessionLoginInput(BaseModel): 101 | code: str 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BinaryAI Python SDK 2 | 3 | ![PUBLISH](https://github.com/binaryai/sdk/workflows/PUBLISH/badge.svg) 4 | [![readthedocs](https://readthedocs.org/projects/binaryai/badge/?version=stable&style=flat)](https://binaryai.readthedocs.io/) 5 | [![Downloads](https://pepy.tech/badge/binaryai/month)](https://pepy.tech/project/binaryai/month) 6 | [![Gitter](https://badges.gitter.im/binaryai/community.svg)](https://gitter.im/binaryai/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 7 | 8 | [BinaryAI](https://www.binaryai.cn) is a binary file security analysis platform. This SDK aims at providing 9 | a simple client to upload file and get analysis result. It also works as 10 | a demo on calling BinaryAI's GraphQL API directly. 11 | 12 | To use SDK, you need a valid credential. Read [BinaryAI docs](https://www.binaryai.cn/doc/) about detailed instructions. 13 | 14 | ## Dependency 15 | 16 | Python >= 3.9 17 | 18 | ## Download and install 19 | 20 | ```bash 21 | python3 -m pip install binaryai 22 | ``` 23 | 24 | ## Quick start 25 | 26 | See the [SDK document](https://binaryai.readthedocs.io) for guide. 27 | 28 | ## Internals 29 | 30 | ### Endpoints 31 | 32 | The default endpoint is `https://api.binaryai.cn/v1/endpoint`. 33 | 34 | ### API Credentials 35 | 36 | API Credentials are used for signing requests. We suggest you using our SDK or our library to sign it, but you can also 37 | have your own implementation. We are using the signing method `TC3-HMAC-SHA256`, same with the Tencent Cloud. You can 38 | read their [document](https://cloud.tencent.com/document/product/213/30654) about how to sign requests. BinaryAI would 39 | require following fields: 40 | 41 | ```toml 42 | Region = "ap-shanghai" 43 | service = "binaryai" 44 | Action = "BinaryAI" 45 | Version = "2023-04-15" 46 | ``` 47 | 48 | ## Additional Reading 49 | 50 | Read the [Changelog](https://www.binaryai.cn/doc/zh/releasenotes/) of our product, and hope you can also have fun reading papers related to our job: 51 | 52 | 1. Yu, Zeping, et al. "Codecmr: Cross-modal retrieval for function-level binary source code matching." Advances in Neural Information Processing Systems 33 (2020): 3872-3883. 53 | 2. Yu, Zeping, et al. "Order matters: Semantic-aware neural networks for binary code similarity detection." Proceedings of the AAAI conference on artificial intelligence. Vol. 34. No. 01. 2020. 54 | 3. Li, Zongjie, et al. "Unleashing the power of compiler intermediate representation to enhance neural program embeddings." Proceedings of the 44th International Conference on Software Engineering. 2022. 55 | 4. Wong, Wai Kin, et al. "Deceiving Deep Neural Networks-Based Binary Code Matching with Adversarial Programs." 2022 IEEE International Conference on Software Maintenance and Evolution (ICSME). IEEE, 2022. 56 | 5. Wang, Huaijin, et al. "Enhancing DNN-Based Binary Code Function Search With Low-Cost Equivalence Checking." IEEE Transactions on Software Engineering 49.1 (2022): 226-250. 57 | 6. Jia, Ang, et al. "1-to-1 or 1-to-n? Investigating the Effect of Function Inlining on Binary Similarity Analysis." ACM Transactions on Software Engineering and Methodology 32.4 (2023): 1-26. 58 | 7. Wang, Huaijin, et al. "sem2vec: Semantics-aware Assembly Tracelet Embedding." ACM Transactions on Software Engineering and Methodology 32.4 (2023): 1-34. 59 | 8. Jiang, Ling, et al. "Third-Party Library Dependency for Large-Scale SCA in the C/C++ Ecosystem: How Far Are We?." Proceedings of the 32nd ACM SIGSOFT International Symposium on Software Testing and Analysis. 2023. 60 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/quick_start.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2024-08-23 07:36+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.14.0\n" 22 | 23 | #: ../../quick_start.ipynb:9 24 | msgid "Quick start" 25 | msgstr "快速入门" 26 | 27 | #: ../../quick_start.ipynb:20 28 | msgid "" 29 | "This notebook gives an example on how to use this SDK to upload, start " 30 | "analysis and get the analysis result of a file." 31 | msgstr "下面给出了使用SDK上传和分析文件,并获取文件分析结果的示例。" 32 | 33 | #: ../../quick_start.ipynb:32 34 | msgid "Initialize" 35 | msgstr "初始化" 36 | 37 | #: ../../quick_start.ipynb:34 38 | msgid "" 39 | "To initialize the SDK, please prepare your Secret ID and Secret Key. " 40 | "Please `apply from us `__ if you don't have" 41 | " one." 42 | msgstr "" 43 | "初始化SDK需要使用 Secret ID 和 Secret Key。如果你目前没有,请 `联系我们 " 44 | "`__ 获取一个。" 45 | 46 | #: ../../quick_start.ipynb:36 47 | msgid "" 48 | "The Secret ID & Key is the *only* credential to access API, so please " 49 | "keep it safely. We recommend you read your keys to environment variable, " 50 | "instead of saving in your code:" 51 | msgstr "Secret ID 和 Secret Key 是访问API的**唯一凭据**,请妥善保存。我们建议将这些凭据放入环境变量中,而不是硬编码在代码里:" 52 | 53 | #: ../../quick_start.ipynb:47 54 | msgid "Once those environment variables are set, our SDK can read them directly." 55 | msgstr "环境变量设置好后,SDK会自动读取它们。" 56 | 57 | #: ../../quick_start.ipynb:49 58 | msgid "To initialize the SDK:" 59 | msgstr "要初始化SDK:" 60 | 61 | #: ../../quick_start.ipynb:77 62 | msgid "Great! If no exceptions raised, the client is initialized." 63 | msgstr "赞!如果没有异常发生,客户端就初始化好了。" 64 | 65 | #: ../../quick_start.ipynb:89 66 | msgid "Upload and analyze file" 67 | msgstr "上传并分析文件" 68 | 69 | #: ../../quick_start.ipynb:91 70 | msgid "" 71 | "Note: file upload might be rejected if file is too big or upload is too " 72 | "quick." 73 | msgstr "注意:如果文件过大或上传过于频繁,上传请求可能被拒绝。" 74 | 75 | #: ../../quick_start.ipynb:93 76 | msgid "Now you can upload by the file path:" 77 | msgstr "现在你可以使用本地文件路径上传文件:" 78 | 79 | #: ../../quick_start.ipynb:145 80 | msgid "Get analysis result" 81 | msgstr "获得分析结果" 82 | 83 | #: ../../quick_start.ipynb:147 84 | msgid "You can get analysis result by giving hash of a file for each method:" 85 | msgstr "调用SDK的各个方法,并给出文件的sha256,即可访问分析结果:" 86 | 87 | #: ../../quick_start.ipynb:282 88 | msgid "Or initialize a file object and call it:" 89 | msgstr "或者,你也可以初始化一个文件对象并调用:" 90 | 91 | #: ../../quick_start.ipynb:343 92 | msgid "" 93 | "You can also get a file's KHash, which can be used to compare " 94 | "similarities:" 95 | msgstr "通过获取一个文件的KHash,你可以计算文件之间的相似程度:" 96 | 97 | #: ../../quick_start.ipynb:412 98 | msgid "" 99 | "In August 2024, we introduced a new feature to calculate a file's risky " 100 | "probability. A value ranged at ``[0, 1]`` might returned." 101 | msgstr "在2024年8月,我们推出了一个新特性,可用于计算文件的恶意程度,取值区间为``[0, 1]``。" 102 | 103 | #: ../../quick_start.ipynb:461 104 | msgid "" 105 | "As shown above, you can always give a file hash (md5 or sha256) to get " 106 | "its analysis result." 107 | msgstr "如上所示,只要给出文件的哈希,即可访问结果。" 108 | 109 | #: ../../quick_start.ipynb:463 110 | msgid "" 111 | "Read ``examples/`` in SDK repository or read the SDK API document for " 112 | "more info." 113 | msgstr "你可以查看SDK代码仓库中的 ``examples/`` 文件夹,或浏览文档的其他部分来获取更多信息。" 114 | 115 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/utils/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2023-09-18 19:49+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.12.1\n" 22 | 23 | #: ../../autoapi/binaryai/utils/index.rst:2 24 | msgid ":py:mod:`binaryai.utils`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/utils/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/utils/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/utils/index.rst:18::1 36 | msgid ":py:obj:`QCloudHttpxAuth `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/utils/index.rst:54 40 | #: ../../autoapi/binaryai/utils/index.rst:18::1 41 | msgid "Auth class that allows us to connect to QCloud services" 42 | msgstr "" 43 | 44 | #: ../../autoapi/binaryai/utils/index.rst:20 45 | msgid "Functions" 46 | msgstr "" 47 | 48 | #: ../../autoapi/binaryai/utils/index.rst:29::1 49 | msgid ":py:obj:`sha256sum `\\ \\(→ str\\)" 50 | msgstr "" 51 | 52 | #: ../../autoapi/binaryai/utils/index.rst:32 53 | #: ../../autoapi/binaryai/utils/index.rst:29::1 54 | msgid "Computes sha256 hash sum of a file." 55 | msgstr "" 56 | 57 | #: ../../autoapi/binaryai/utils/index.rst:29::1 58 | msgid ":py:obj:`sign `\\ \\(key\\, msg\\)" 59 | msgstr "" 60 | 61 | #: ../../autoapi/binaryai/utils/index.rst:41 62 | #: ../../autoapi/binaryai/utils/index.rst:46 63 | #: ../../autoapi/binaryai/utils/index.rst:29::1 64 | msgid "" 65 | "Modified from https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-" 66 | "request-examples.html" 67 | msgstr "" 68 | 69 | #: ../../autoapi/binaryai/utils/index.rst:29::1 70 | msgid "" 71 | ":py:obj:`getSignatureKey `\\ \\(key\\, " 72 | "dateStamp\\, serviceName\\)" 73 | msgstr "" 74 | 75 | #: ../../autoapi/binaryai/utils/index.rst 76 | msgid "Parameters" 77 | msgstr "" 78 | 79 | #: ../../autoapi/binaryai/utils/index.rst:34 80 | msgid "path of file" 81 | msgstr "" 82 | 83 | #: ../../autoapi/binaryai/utils/index.rst 84 | msgid "Returns" 85 | msgstr "" 86 | 87 | #: ../../autoapi/binaryai/utils/index.rst:36 88 | msgid "hex digest of sha256" 89 | msgstr "" 90 | 91 | #: ../../autoapi/binaryai/utils/index.rst:52 92 | msgid "Bases: :py:obj:`httpx.Auth`" 93 | msgstr "" 94 | 95 | #: ../../autoapi/binaryai/utils/index.rst:63 96 | msgid "Adds the authorization headers required by QCloud Signature v3." 97 | msgstr "" 98 | 99 | #: ../../autoapi/binaryai/utils/index.rst:68 100 | msgid "" 101 | "Override get_qcloud_request_headers_handler() if you have a subclass that" 102 | " needs to call get_qcloud_request_headers() with an arbitrary set of " 103 | "QCloud credentials. The default implementation calls " 104 | "get_qcloud_request_headers() with self.qcloud_access_key, and " 105 | "self.qcloud_secret_access_key" 106 | msgstr "" 107 | 108 | #: ../../autoapi/binaryai/utils/index.rst:77 109 | msgid "" 110 | "Returns a dictionary containing the necessary headers for Amazon's " 111 | "signature version 4 signing process. An example return value might look " 112 | "like" 113 | msgstr "" 114 | 115 | #: ../../autoapi/binaryai/utils/index.rst:82 116 | msgid "{" 117 | msgstr "" 118 | 119 | #: ../../autoapi/binaryai/utils/index.rst:82 120 | msgid "'Authorization': '...', '...'," 121 | msgstr "" 122 | 123 | #: ../../autoapi/binaryai/utils/index.rst:84 124 | msgid "}" 125 | msgstr "" 126 | 127 | #: ../../autoapi/binaryai/utils/index.rst:90 128 | msgid "Create canonical path. According to QCloud, this should always be \"/\"" 129 | msgstr "" 130 | 131 | #: ../../autoapi/binaryai/utils/index.rst:96 132 | msgid "" 133 | "Create the canonical query string. According to QCloud, by the end of " 134 | "this function our query string values must be URL-encoded (space=%20) and" 135 | " the parameters must be sorted by name." 136 | msgstr "" 137 | 138 | #: ../../autoapi/binaryai/utils/index.rst:101 139 | msgid "" 140 | "This method assumes that the query params in `r` are *already* url " 141 | "encoded. If they are not url encoded by the time they make it to this " 142 | "function, QCloud may complain that the signature for your request is " 143 | "incorrect." 144 | msgstr "" 145 | 146 | #: ../../autoapi/binaryai/utils/index.rst:107 147 | msgid "It appears elasticsearc-py url encodes query paramaters on its own:" 148 | msgstr "" 149 | 150 | #: ../../autoapi/binaryai/utils/index.rst:107 151 | msgid "" 152 | "https://github.com/elastic/elasticsearch-" 153 | "py/blob/5dfd6985e5d32ea353d2b37d01c2521b2089ac2b/elasticsearch/connection/http_requests.py#L64" 154 | msgstr "" 155 | 156 | #: ../../autoapi/binaryai/utils/index.rst:109 157 | msgid "" 158 | "If you are using a different client than elasticsearch-py, it will be " 159 | "your responsibility to urleconde your query params before this method is " 160 | "called." 161 | msgstr "" 162 | 163 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import unittest 3 | import unittest.mock as mock 4 | import zoneinfo 5 | 6 | import httpx 7 | 8 | from binaryai.utils import QCloudHttpxAuth 9 | 10 | 11 | class TestQCloudRequestsAuth(unittest.TestCase): 12 | """ 13 | Tests for QCloudHttpxAuth 14 | """ 15 | 16 | def test_no_query_params(self): 17 | """ 18 | Assert we generate the 'correct' cannonical query string 19 | and canonical path for a request with no query params 20 | 21 | Correct is relative here b/c 'correct' simply means what 22 | the QCloud CVM service expects 23 | """ 24 | url = "http://cvm.tencentcloudapi.com:80/" 25 | mock_request = mock.Mock() 26 | mock_request.url = httpx.URL(url) 27 | self.assertEqual("/", QCloudHttpxAuth.get_canonical_path(mock_request)) 28 | self.assertEqual("", QCloudHttpxAuth.get_canonical_querystring(mock_request)) 29 | 30 | def test_characters_escaped_in_path(self): 31 | """ 32 | Assert we generate the 'correct' cannonical query string 33 | and path a request with characters that need to be escaped 34 | """ 35 | url = "http://cvm.tencentcloudapi.com:80/+foo.*/_stats" 36 | mock_request = mock.Mock() 37 | mock_request.url = httpx.URL(url) 38 | self.assertEqual("/", QCloudHttpxAuth.get_canonical_path(mock_request)) 39 | self.assertEqual("", QCloudHttpxAuth.get_canonical_querystring(mock_request)) 40 | 41 | def test_path_with_querystring(self): 42 | """ 43 | Assert we generate the 'correct' cannonical query string 44 | and path for request that includes a query stirng 45 | """ 46 | url = "http://cvm.tencentcloudapi.com:80/my_index/?pretty=True" 47 | mock_request = mock.Mock() 48 | mock_request.url = httpx.URL(url) 49 | self.assertEqual("/", QCloudHttpxAuth.get_canonical_path(mock_request)) 50 | self.assertEqual("pretty=True", QCloudHttpxAuth.get_canonical_querystring(mock_request)) 51 | 52 | def test_multiple_get_params(self): 53 | """ 54 | Assert we generate the 'correct' cannonical query string 55 | for request that includes more than one query parameter 56 | """ 57 | url = "http://cvm.tencentcloudapi.com:80/index/type/_search?scroll=5m&search_type=scan" 58 | mock_request = mock.Mock() 59 | mock_request.url = httpx.URL(url) 60 | self.assertEqual("scroll=5m&search_type=scan", QCloudHttpxAuth.get_canonical_querystring(mock_request)) 61 | 62 | def test_post_request_with_get_param(self): 63 | """ 64 | Assert we generate the 'correct' cannonical query string 65 | for a post request that includes GET-parameters 66 | """ 67 | url = "http://cvm.tencentcloudapi.com:80/index/type/1/_update?version=1" 68 | mock_request = mock.Mock() 69 | mock_request.url = httpx.URL(url) 70 | mock_request.method = "POST" 71 | self.assertEqual("version=1", QCloudHttpxAuth.get_canonical_querystring(mock_request)) 72 | 73 | def test_auth_for_get(self): 74 | auth = QCloudHttpxAuth( 75 | qcloud_secret_id="YOURKEY", 76 | qcloud_secret_key="YOURSECRET", 77 | qcloud_host="cvm.tencentcloudapi.com", 78 | qcloud_region="ap-shanghai", 79 | qcloud_service="cvm", 80 | qcloud_action="DescribeInstances", 81 | qcloud_apiversion="2017-03-12", 82 | ) 83 | url = "http://cvm.tencentcloudapi.com:80/" 84 | mock_request = httpx.Request(method="GET", url=url) 85 | 86 | frozen_datetime = datetime.datetime(2016, 6, 18, 22, 4, 5, tzinfo=zoneinfo.ZoneInfo("Asia/Shanghai")) 87 | with mock.patch("datetime.datetime") as mock_datetime: 88 | mock_datetime.now.return_value = frozen_datetime 89 | mock_request = auth.auth_flow(mock_request).__next__() 90 | print(mock_request.headers) 91 | self.assertEqual( 92 | { 93 | "host": "cvm.tencentcloudapi.com", 94 | "content-type": "application/x-www-form-urlencoded", 95 | "authorization": "TC3-HMAC-SHA256 Credential=YOURKEY/2016-06-18/cvm/tc3_request" 96 | ", SignedHeaders=content-type;host, " 97 | "Signature=1827327c7138a0193e2883c6f865cffe94b5b4444818eda77324898cc73a37ad", 98 | "x-tc-timestamp": "1466258645", 99 | "x-tc-action": "DescribeInstances", 100 | "x-tc-region": "ap-shanghai", 101 | "x-tc-version": "2017-03-12", 102 | }, 103 | mock_request.headers, 104 | ) 105 | 106 | def test_auth_for_post_with_json_body(self): 107 | auth = QCloudHttpxAuth( 108 | qcloud_secret_id="YOURKEY", 109 | qcloud_secret_key="YOURSECRET", 110 | qcloud_host="cvm.tencentcloudapi.com", 111 | qcloud_region="ap-shanghai", 112 | qcloud_service="cvm", 113 | qcloud_action="DescribeInstances", 114 | qcloud_apiversion="2017-03-12", 115 | ) 116 | url = "http://cvm.tencentcloudapi.com:80/" 117 | mock_request = httpx.Request(method="POST", url=url, json={"Limit": 10}) 118 | 119 | frozen_datetime = datetime.datetime(2016, 6, 18, 22, 4, 5, tzinfo=zoneinfo.ZoneInfo("Asia/Shanghai")) 120 | with mock.patch("datetime.datetime") as mock_datetime: 121 | mock_datetime.now.return_value = frozen_datetime 122 | mock_request = auth.auth_flow(mock_request).__next__() 123 | print(mock_request.headers) 124 | self.assertEqual( 125 | httpx.Headers( 126 | { 127 | "host": "cvm.tencentcloudapi.com", 128 | "content-length": "13", 129 | "content-type": "application/json", 130 | "authorization": "TC3-HMAC-SHA256 Credential=YOURKEY/2016-06-18/cvm/tc3_request" 131 | ", SignedHeaders=content-type;host, " 132 | "Signature=51ed57e4b544a988b76ebd522a9df26273c370c411be3bd83911a24312dfbae5", 133 | "x-tc-timestamp": "1466258645", 134 | "x-tc-action": "DescribeInstances", 135 | "x-tc-region": "ap-shanghai", 136 | "x-tc-version": "2017-03-12", 137 | } 138 | ), 139 | mock_request.headers, 140 | ) 141 | -------------------------------------------------------------------------------- /src/binaryai/upload.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import logging 3 | import os 4 | from typing import Dict, Optional 5 | 6 | import httpx 7 | 8 | from binaryai import client_stub 9 | from binaryai.exceptions import FileRequiredError 10 | from binaryai.utils import sha256sum 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class Uploader(object): 16 | """ 17 | Uploads a file to server. See `binaryai.BinaryAI.upload` for detail. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | client: client_stub.Client, 23 | *, 24 | filepath: Optional[str] = None, 25 | mem: Optional[bytes] = None, 26 | hooks: Optional[Dict] = None, 27 | sha256: Optional[str] = None, 28 | md5: Optional[str] = None, 29 | ) -> None: 30 | """ 31 | Initialize an uploader instance. Detail usage are listed in `binaryai.BinaryAI.upload`. 32 | 33 | Params: 34 | client: gql Client 35 | """ 36 | self._client = client 37 | self._hooks: Dict = hooks or {} 38 | 39 | self._sha256 = sha256 40 | self._md5 = md5 if not self._sha256 else None 41 | 42 | self._filename: Optional[str] = None 43 | self._filepath = filepath 44 | self._mem = mem 45 | 46 | if filepath and mem: 47 | raise ValueError("providing both filepath and mem is nonsense") 48 | 49 | if filepath: 50 | if not self._filename: 51 | self._filename = os.path.split(self._filepath)[-1] 52 | self._sha256 = sha256sum(filepath) 53 | self._md5 = None 54 | elif mem: 55 | self._sha256 = hashlib.sha256(self._mem).hexdigest() 56 | self._md5 = None 57 | 58 | if not self._sha256 and not self._md5: 59 | raise ValueError("no info provided, at least have one meaningful value") 60 | 61 | def upload(self, *, is_private: Optional[bool] = True) -> str: 62 | """ 63 | Starts the upload sequence. 64 | """ 65 | ticket = self.__create_ticket( 66 | filename=self._filename, sha256=self._sha256, md5=self._md5, is_private=is_private 67 | ) 68 | ticket_type = ticket.typename__ 69 | 70 | if ticket_type == "File": 71 | return ticket.sha_256 72 | 73 | reply_pos = None 74 | 75 | if ticket_type == "OwnershipTicket": 76 | logger.info("calculate pos") 77 | reply_pos = self.__reply_ticket_pos(ticket) 78 | elif ticket_type == "UploadTicket": 79 | logger.info("uploading file") 80 | self.__reply_ticket_upload(ticket) 81 | else: 82 | raise ValueError("unknown upload type, upgrade SDK or contact developers") 83 | 84 | ticket_id = ticket.ticket_id 85 | logger.info("creating file") 86 | req = client_stub.CreateFileInput(ticketID=ticket_id, ownershipPoS=reply_pos) 87 | verify_response = self._client.create_file(req) 88 | 89 | return verify_response.create_file.sha_256 90 | 91 | def __create_ticket( 92 | self, 93 | *, 94 | filename: Optional[str] = None, 95 | sha256: Optional[str] = None, 96 | md5: Optional[str] = None, 97 | is_private: Optional[bool] = True, 98 | ): 99 | """ 100 | Checks if file exists on FileManager with filename and file's hashsum. 101 | """ 102 | # is_private_upload is marked as internal use, so just don't even use it for public upload 103 | if is_private: 104 | req = client_stub.CreateUploadTicketInput( 105 | name=filename, sha256=sha256, md5=md5, is_private_upload=is_private 106 | ) 107 | else: 108 | req = client_stub.CreateUploadTicketInput(name=filename, sha256=sha256, md5=md5) 109 | try: 110 | response = self._client.check_or_upload(req) 111 | except client_stub.GraphQLClientGraphQLMultiError as err: 112 | # If only md5 is provided, the error is hash missing 113 | is_hash_missing = False 114 | for e in err.errors: 115 | if "No hash provided" in e.message: 116 | is_hash_missing = True 117 | break 118 | if is_hash_missing: 119 | raise FileRequiredError("File upload need a file to continue") from None 120 | raise 121 | 122 | return response.create_upload_ticket 123 | 124 | def __reply_ticket_pos(self, ticket: client_stub.CheckOrUploadCreateUploadTicketOwnershipTicket): 125 | """ 126 | Calculate the POS argument 127 | """ 128 | assert ticket.typename__ == "OwnershipTicket" 129 | secret_prepend = ticket.secret_prepend 130 | secret_append = ticket.secret_append 131 | assert secret_prepend and secret_append 132 | if not self._filepath and not self._mem: 133 | raise FileRequiredError("PoS verify need a file to continue") 134 | 135 | hasher = hashlib.sha256() 136 | hasher.update(secret_prepend.encode()) 137 | if self._mem: 138 | hasher.update(self._mem) 139 | else: 140 | with open(self._filepath, "rb", buffering=0) as upload_file: 141 | file_size = upload_file.seek(0, os.SEEK_END) 142 | upload_file.seek(0, os.SEEK_SET) 143 | if file_size < 16: 144 | hasher.update(b"\x04" * min(16 - file_size, 8)) 145 | while True: 146 | chunk = upload_file.read(hasher.block_size) 147 | if not chunk: 148 | break 149 | hasher.update(chunk) 150 | if file_size < 8: 151 | hasher.update(b"\x95" * min(8 - file_size, 8)) 152 | hasher.update(secret_append.encode()) 153 | return hasher.hexdigest().lower() 154 | 155 | def __reply_ticket_upload(self, ticket: client_stub.CheckOrUploadCreateUploadTicketUploadTicket): 156 | """ 157 | Uploads file to FileManager. 158 | """ 159 | assert ticket.typename__ == "UploadTicket" 160 | if not self._filepath and not self._mem: 161 | raise FileRequiredError("File upload need a file to continue") 162 | 163 | if self._hooks.get("upload_ticket"): 164 | ticket = self._hooks["upload_ticket"](ticket) 165 | auth_header = {kv.key: kv.value for kv in ticket.request_headers} 166 | 167 | with httpx.Client() as upload_client: 168 | if self._mem: 169 | upload_client.put(url=ticket.url, headers=auth_header, content=self._mem) 170 | else: 171 | with open(self._filepath, "rb") as upload_file: 172 | upload_client.put(url=ticket.url, headers=auth_header, content=upload_file) 173 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2024-08-23 07:36+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.14.0\n" 22 | 23 | #: ../../../README.md:2 24 | msgid "BinaryAI Python SDK" 25 | msgstr "" 26 | 27 | #: ../../../README.md:4 28 | msgid "" 29 | "![PUBLISH](https://github.com/binaryai/sdk/workflows/PUBLISH/badge.svg) " 30 | "[![readthedocs](https://readthedocs.org/projects/binaryai/badge/?version=stable&style=flat)](https://binaryai.readthedocs.io/)" 31 | " " 32 | "[![Downloads](https://pepy.tech/badge/binaryai/month)](https://pepy.tech/project/binaryai/month)" 33 | " " 34 | "[![Gitter](https://badges.gitter.im/binaryai/community.svg)](https://gitter.im/binaryai/community?utm_source=badge&utm_medium=badge&utm_campaign" 35 | "=pr-badge)" 36 | msgstr "" 37 | 38 | #: ../../../README.md:4 39 | msgid "PUBLISH" 40 | msgstr "" 41 | 42 | #: ../../../README.md:4 43 | msgid "readthedocs" 44 | msgstr "" 45 | 46 | #: ../../../README.md:4 47 | msgid "Downloads" 48 | msgstr "" 49 | 50 | #: ../../../README.md:4 51 | msgid "Gitter" 52 | msgstr "" 53 | 54 | #: ../../../README.md:9 55 | msgid "" 56 | "[BinaryAI](https://www.binaryai.cn) is a binary file security analysis " 57 | "platform. This SDK aims at providing a simple client to upload file and " 58 | "get analysis result. It also works as a demo on calling BinaryAI's " 59 | "GraphQL API directly." 60 | msgstr "" 61 | "[BinaryAI](https://www.binaryai.cn)是腾讯安全科恩实验室基于在静态分析和AI安全领域的经验研发的二进制安全智能分析平台。本SDK旨在帮助用户上传文件并获取分析结果,也可以作为调用BinaryAI" 62 | " API的一个参考" 63 | 64 | #: ../../../README.md:13 65 | msgid "" 66 | "To use SDK, you need a valid credential. Read [BinaryAI " 67 | "docs](https://www.binaryai.cn/doc/) about detailed instructions." 68 | msgstr "要使用SDK,你需要相应的凭据。查看[BinaryAI的文档](https://www.binaryai.cn/doc/) 获取指引。" 69 | 70 | #: ../../../README.md:15 71 | msgid "Dependency" 72 | msgstr "依赖版本" 73 | 74 | #: ../../../README.md:17 75 | msgid "Python >= 3.9" 76 | msgstr "" 77 | 78 | #: ../../../README.md:19 79 | msgid "Download and install" 80 | msgstr "下载和安装" 81 | 82 | #: ../../../README.md:25 83 | msgid "Quick start" 84 | msgstr "快速入门" 85 | 86 | #: ../../../README.md:27 87 | msgid "See the [SDK document](https://binaryai.readthedocs.io) for guide." 88 | msgstr "请查看[SDK文档](https://binaryai.readthedocs.io)。" 89 | 90 | #: ../../../README.md:29 91 | msgid "Internals" 92 | msgstr "内部细节" 93 | 94 | #: ../../../README.md:31 95 | msgid "Endpoints" 96 | msgstr "API地址" 97 | 98 | #: ../../../README.md:33 99 | msgid "The default endpoint is `https://api.binaryai.cn/v1/endpoint`." 100 | msgstr "对公众的默认API地址是`https://api.binaryai.cn/v1/endpoint`。" 101 | 102 | #: ../../../README.md:35 103 | msgid "API Credentials" 104 | msgstr "API凭据" 105 | 106 | #: ../../../README.md:37 107 | msgid "" 108 | "API Credentials are used for signing requests. We suggest you using our " 109 | "SDK or our library to sign it, but you can also have your own " 110 | "implementation. We are using the signing method `TC3-HMAC-SHA256`, same " 111 | "with the Tencent Cloud. You can read their " 112 | "[document](https://cloud.tencent.com/document/product/213/30654) about " 113 | "how to sign requests. BinaryAI would require following fields:" 114 | msgstr "" 115 | "API凭据用于签名请求。你可以使用SDK进行签名,但也可以自行编写签名方法。我们使用和腾讯云一致的 `TC3-HMAC-SHA256` " 116 | "方案,你可以阅读 [腾讯云文档](https://cloud.tencent.com/document/product/213/30654) " 117 | "获取技术细节。BinaryAI需要的签名信息是:" 118 | 119 | #: ../../../README.md:49 120 | msgid "Additional Reading" 121 | msgstr "更多资料" 122 | 123 | #: ../../../README.md:51 124 | #, fuzzy 125 | msgid "" 126 | "Read the [Changelog](https://www.binaryai.cn/doc/zh/releasenotes/) of our" 127 | " product, and hope you can also have fun reading papers related to our " 128 | "job:" 129 | msgstr "" 130 | "我们的 " 131 | "[发布记录](https://www.binaryai.cn/doc/zh/releasenotes/releasenotes.html)记录了历史版本,你也可以参考我们此前的论文:" 132 | 133 | #: ../../../README.md:53 134 | msgid "" 135 | "Yu, Zeping, et al. \"Codecmr: Cross-modal retrieval for function-level " 136 | "binary source code matching.\" Advances in Neural Information Processing " 137 | "Systems 33 (2020): 3872-3883." 138 | msgstr "" 139 | 140 | #: ../../../README.md:54 141 | msgid "" 142 | "Yu, Zeping, et al. \"Order matters: Semantic-aware neural networks for " 143 | "binary code similarity detection.\" Proceedings of the AAAI conference on" 144 | " artificial intelligence. Vol. 34. No. 01. 2020." 145 | msgstr "" 146 | 147 | #: ../../../README.md:55 148 | msgid "" 149 | "Li, Zongjie, et al. \"Unleashing the power of compiler intermediate " 150 | "representation to enhance neural program embeddings.\" Proceedings of the" 151 | " 44th International Conference on Software Engineering. 2022." 152 | msgstr "" 153 | 154 | #: ../../../README.md:56 155 | msgid "" 156 | "Wong, Wai Kin, et al. \"Deceiving Deep Neural Networks-Based Binary Code " 157 | "Matching with Adversarial Programs.\" 2022 IEEE International Conference " 158 | "on Software Maintenance and Evolution (ICSME). IEEE, 2022." 159 | msgstr "" 160 | 161 | #: ../../../README.md:57 162 | msgid "" 163 | "Wang, Huaijin, et al. \"Enhancing DNN-Based Binary Code Function Search " 164 | "With Low-Cost Equivalence Checking.\" IEEE Transactions on Software " 165 | "Engineering 49.1 (2022): 226-250." 166 | msgstr "" 167 | 168 | #: ../../../README.md:58 169 | msgid "" 170 | "Jia, Ang, et al. \"1-to-1 or 1-to-n? Investigating the Effect of Function" 171 | " Inlining on Binary Similarity Analysis.\" ACM Transactions on Software " 172 | "Engineering and Methodology 32.4 (2023): 1-26." 173 | msgstr "" 174 | 175 | #: ../../../README.md:59 176 | msgid "" 177 | "Wang, Huaijin, et al. \"sem2vec: Semantics-aware Assembly Tracelet " 178 | "Embedding.\" ACM Transactions on Software Engineering and Methodology " 179 | "32.4 (2023): 1-34." 180 | msgstr "" 181 | 182 | #: ../../../README.md:60 183 | msgid "" 184 | "Jiang, Ling, et al. \"Third-Party Library Dependency for Large-Scale SCA " 185 | "in the C/C++ Ecosystem: How Far Are We?.\" Proceedings of the 32nd ACM " 186 | "SIGSOFT International Symposium on Software Testing and Analysis. 2023." 187 | msgstr "" 188 | 189 | #~ msgid "" 190 | #~ "![PUBLISH](https://github.com/binaryai/sdk/workflows/PUBLISH/badge.svg)" 191 | #~ " " 192 | #~ "[![Downloads](https://pepy.tech/badge/binaryai/month)](https://pepy.tech/project/binaryai/month)" 193 | #~ " " 194 | #~ "[![Gitter](https://badges.gitter.im/binaryai/community.svg)](https://gitter.im/binaryai/community?utm_source=badge&utm_medium=badge&utm_campaign" 195 | #~ "=pr-badge)" 196 | #~ msgstr "" 197 | 198 | -------------------------------------------------------------------------------- /src/binaryai/binaryai_file.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Iterator, List 2 | 3 | from binaryai.client import BinaryAI 4 | from binaryai.component import Component 5 | from binaryai.compressed_file import CompressedFile 6 | from binaryai.cve import CVE 7 | from binaryai.function import Function, MatchedFunction 8 | from binaryai.license import License 9 | 10 | 11 | class BinaryAIFile(object): 12 | """BinaryAIFile represent the file already analyzed by BinaryAI. 13 | Users can receive the detailed results by using this conveniently. 14 | Note: This is not thread safe!!! 15 | """ 16 | 17 | def __init__(self, bai: BinaryAI, sha256: str = None, md5: str = None) -> None: 18 | if sha256 is None and md5 is None: 19 | raise ValueError("sha256 and md5 can not empty simultaneously") 20 | if sha256 is None: 21 | self.sha256 = bai.get_sha256(md5) 22 | else: 23 | self.sha256 = sha256 24 | self.md5 = md5 25 | self._bai = bai 26 | 27 | def get_filenames(self) -> List[str]: 28 | """Get all uploaded filenames. 29 | 30 | Returns: 31 | List[str]: A list of filenames. 32 | """ 33 | return self._bai.get_filenames(self.sha256) 34 | 35 | def get_mime_type(self) -> str: 36 | """Get MIME type 37 | 38 | Returns: 39 | str: MIME type string. 40 | """ 41 | return self._bai.get_mime_type(self.sha256) 42 | 43 | def get_size(self) -> int: 44 | """Get size in bytes. 45 | 46 | Returns: 47 | int: File size in bytes. 48 | """ 49 | return self._bai.get_size(self.sha256) 50 | 51 | def get_compressed_files(self) -> List[CompressedFile]: 52 | """Get a list of files inside a compressed file identified. 53 | 54 | Returns: 55 | List[CompressedFile]: A list of compressed files. 56 | """ 57 | return self._bai.get_compressed_files(self.sha256) 58 | 59 | def get_all_cves(self) -> List[CVE]: 60 | """Get all CVEs. 61 | 62 | Returns: 63 | List[str]: A list of CVE objects. 64 | """ 65 | return self._bai.get_all_cves(self.sha256) 66 | 67 | def get_all_cve_names(self) -> List[str]: 68 | """Get all CVE names. 69 | 70 | Returns: 71 | List[str]: A list of CVE names. 72 | """ 73 | return self._bai.get_all_cve_names(self.sha256) 74 | 75 | def get_all_licenses(self) -> List[License]: 76 | """Get all licenses. 77 | 78 | Returns: 79 | List[str]: A list of license objects. 80 | """ 81 | return self._bai.get_all_licenses(self.sha256) 82 | 83 | def get_all_license_short_names(self) -> List[str]: 84 | """Get all license short names. 85 | 86 | Returns: 87 | List[str]: A list of license short names. 88 | """ 89 | return self._bai.get_all_license_short_names(self.sha256) 90 | 91 | def get_all_ascii_strings(self) -> List[str]: 92 | """Get all ASCII strings. 93 | 94 | Returns: 95 | List[str]: A list of ASCII strings. 96 | """ 97 | return self._bai.get_all_ascii_strings(self.sha256) 98 | 99 | def get_sca_result(self) -> List[Component]: 100 | """Get SCA result. 101 | 102 | Returns: 103 | List[Component]: A list of sortware components. 104 | """ 105 | return self._bai.get_sca_result(self.sha256) 106 | 107 | def get_overview(self) -> Dict[str, str]: 108 | """Fetch analysis overview. 109 | 110 | Returns: 111 | Dict[str, str]: A key-value pair containing overview of the file 112 | """ 113 | return self._bai.get_overview(self.sha256) 114 | 115 | def list_func_offset(self) -> List[int]: 116 | """Fetch offsets of functions. 117 | 118 | Returns: 119 | List[int]: A list of function offsets 120 | """ 121 | return self._bai.list_func_offset(self.sha256) 122 | 123 | def list_funcs(self) -> Iterator[Function]: 124 | """Parses the list of functions and returns a Function instance 125 | containing the given function's name, fileoffset, bytes, pseudocode 126 | and returns the list with a generator. 127 | 128 | Returns: 129 | Iterator[Function]: A Function iterator 130 | """ 131 | return self._bai.list_funcs(self.sha256) 132 | 133 | def get_func_info(self, offset: int, with_embedding: bool = False) -> Function: 134 | """Fetch detailed information about the given function 135 | identified by its offset address. 136 | 137 | Params: 138 | offset: Offset address of desired function 139 | with_embedding: if True, try get the embedding representation of each function. 140 | 141 | Returns: 142 | Function: A Function instance containing the given function's 143 | name, fileoffset, bytes, pseudocode 144 | """ 145 | return self._bai.get_func_info(self.sha256, offset, with_embedding) 146 | 147 | def get_funcs_info(self, offset: List[int], with_embedding: bool = False) -> Iterator[Function]: 148 | """Fetch detailed information about the given functions 149 | identified by its offset address. 150 | 151 | Params: 152 | offset: List of offset address of desired function 153 | with_embedding: if True, try get the embedding representation of each function. 154 | 155 | Returns: 156 | Iterator[Function]: A iterator Functions instance containing the given 157 | function's name, fileoffset, bytes, pseudocode. 158 | """ 159 | return self._bai.get_funcs_info(self.sha256, offset, with_embedding) 160 | 161 | def get_func_match(self, offset: int) -> List[MatchedFunction]: 162 | """Match functions about the given function identified 163 | by its offset address. 164 | 165 | Params: 166 | offset: Offset address of desired function 167 | 168 | Returns: 169 | List[MatchedFunction]: a List containing 10 match results, 170 | every result is a Dict the contains score and pseudocode. 171 | The List is sorted by score from high to low. 172 | """ 173 | return self._bai.get_func_match(self.sha256, offset) 174 | 175 | def get_khash_info(self) -> tuple[bytes, str]: 176 | """Return the KHash of this file. See website for detailed introduction on KHash. 177 | 178 | Returns: 179 | Tuple[bytes, str]: KHash's value and version. Only compare if version is same. 180 | """ 181 | return self._bai.get_khash_info(self.sha256) 182 | 183 | def get_malware_probability(self): 184 | """Return the malware probability of this file. 0 usually mean a white file, while 1 mean the file is risky. 185 | 186 | This is a experimental feature. This might be changed without noticed. 187 | 188 | Returns: 189 | Optional[float]: Probability of the file. None means no result is available. 190 | """ 191 | return self._bai.get_malware_probability(self.sha256) 192 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/base_client.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | 3 | import json 4 | from typing import IO, Any, Dict, List, Optional, Tuple, TypeVar, cast 5 | 6 | import httpx 7 | from pydantic import BaseModel 8 | from pydantic_core import to_jsonable_python 9 | 10 | from .base_model import UNSET, Upload 11 | from .exceptions import ( 12 | GraphQLClientGraphQLMultiError, 13 | GraphQLClientHttpError, 14 | GraphQLClientInvalidResponseError, 15 | ) 16 | 17 | Self = TypeVar("Self", bound="BaseClient") 18 | 19 | 20 | class BaseClient: 21 | def __init__( 22 | self, 23 | url: str = "", 24 | headers: Optional[Dict[str, str]] = None, 25 | http_client: Optional[httpx.Client] = None, 26 | ) -> None: 27 | self.url = url 28 | self.headers = headers 29 | 30 | self.http_client = http_client if http_client else httpx.Client(headers=headers) 31 | 32 | def __enter__(self: Self) -> Self: 33 | return self 34 | 35 | def __exit__( 36 | self, 37 | exc_type: object, 38 | exc_val: object, 39 | exc_tb: object, 40 | ) -> None: 41 | self.http_client.close() 42 | 43 | def execute( 44 | self, 45 | query: str, 46 | operation_name: Optional[str] = None, 47 | variables: Optional[Dict[str, Any]] = None, 48 | **kwargs: Any, 49 | ) -> httpx.Response: 50 | processed_variables, files, files_map = self._process_variables(variables) 51 | 52 | if files and files_map: 53 | return self._execute_multipart( 54 | query=query, 55 | operation_name=operation_name, 56 | variables=processed_variables, 57 | files=files, 58 | files_map=files_map, 59 | **kwargs, 60 | ) 61 | 62 | return self._execute_json( 63 | query=query, 64 | operation_name=operation_name, 65 | variables=processed_variables, 66 | **kwargs, 67 | ) 68 | 69 | def get_data(self, response: httpx.Response) -> Dict[str, Any]: 70 | if not response.is_success: 71 | raise GraphQLClientHttpError( 72 | status_code=response.status_code, response=response 73 | ) 74 | 75 | try: 76 | response_json = response.json() 77 | except ValueError as exc: 78 | raise GraphQLClientInvalidResponseError(response=response) from exc 79 | 80 | if (not isinstance(response_json, dict)) or ( 81 | "data" not in response_json and "errors" not in response_json 82 | ): 83 | raise GraphQLClientInvalidResponseError(response=response) 84 | 85 | data = response_json.get("data") 86 | errors = response_json.get("errors") 87 | 88 | if errors: 89 | raise GraphQLClientGraphQLMultiError.from_errors_dicts( 90 | errors_dicts=errors, data=data 91 | ) 92 | 93 | return cast(Dict[str, Any], data) 94 | 95 | def _process_variables( 96 | self, variables: Optional[Dict[str, Any]] 97 | ) -> Tuple[ 98 | Dict[str, Any], Dict[str, Tuple[str, IO[bytes], str]], Dict[str, List[str]] 99 | ]: 100 | if not variables: 101 | return {}, {}, {} 102 | 103 | serializable_variables = self._convert_dict_to_json_serializable(variables) 104 | return self._get_files_from_variables(serializable_variables) 105 | 106 | def _convert_dict_to_json_serializable( 107 | self, dict_: Dict[str, Any] 108 | ) -> Dict[str, Any]: 109 | return { 110 | key: self._convert_value(value) 111 | for key, value in dict_.items() 112 | if value is not UNSET 113 | } 114 | 115 | def _convert_value(self, value: Any) -> Any: 116 | if isinstance(value, BaseModel): 117 | return value.model_dump(by_alias=True, exclude_unset=True) 118 | if isinstance(value, list): 119 | return [self._convert_value(item) for item in value] 120 | return value 121 | 122 | def _get_files_from_variables( 123 | self, variables: Dict[str, Any] 124 | ) -> Tuple[ 125 | Dict[str, Any], Dict[str, Tuple[str, IO[bytes], str]], Dict[str, List[str]] 126 | ]: 127 | files_map: Dict[str, List[str]] = {} 128 | files_list: List[Upload] = [] 129 | 130 | def separate_files(path: str, obj: Any) -> Any: 131 | if isinstance(obj, list): 132 | nulled_list = [] 133 | for index, value in enumerate(obj): 134 | value = separate_files(f"{path}.{index}", value) 135 | nulled_list.append(value) 136 | return nulled_list 137 | 138 | if isinstance(obj, dict): 139 | nulled_dict = {} 140 | for key, value in obj.items(): 141 | value = separate_files(f"{path}.{key}", value) 142 | nulled_dict[key] = value 143 | return nulled_dict 144 | 145 | if isinstance(obj, Upload): 146 | if obj in files_list: 147 | file_index = files_list.index(obj) 148 | files_map[str(file_index)].append(path) 149 | else: 150 | file_index = len(files_list) 151 | files_list.append(obj) 152 | files_map[str(file_index)] = [path] 153 | return None 154 | 155 | return obj 156 | 157 | nulled_variables = separate_files("variables", variables) 158 | files: Dict[str, Tuple[str, IO[bytes], str]] = { 159 | str(i): (file_.filename, cast(IO[bytes], file_.content), file_.content_type) 160 | for i, file_ in enumerate(files_list) 161 | } 162 | return nulled_variables, files, files_map 163 | 164 | def _execute_multipart( 165 | self, 166 | query: str, 167 | operation_name: Optional[str], 168 | variables: Dict[str, Any], 169 | files: Dict[str, Tuple[str, IO[bytes], str]], 170 | files_map: Dict[str, List[str]], 171 | **kwargs: Any, 172 | ) -> httpx.Response: 173 | data = { 174 | "operations": json.dumps( 175 | { 176 | "query": query, 177 | "operationName": operation_name, 178 | "variables": variables, 179 | }, 180 | default=to_jsonable_python, 181 | ), 182 | "map": json.dumps(files_map, default=to_jsonable_python), 183 | } 184 | 185 | return self.http_client.post(url=self.url, data=data, files=files, **kwargs) 186 | 187 | def _execute_json( 188 | self, 189 | query: str, 190 | operation_name: Optional[str], 191 | variables: Dict[str, Any], 192 | **kwargs: Any, 193 | ) -> httpx.Response: 194 | headers: Dict[str, str] = {"Content-Type": "application/json"} 195 | headers.update(kwargs.get("headers", {})) 196 | 197 | merged_kwargs: Dict[str, Any] = kwargs.copy() 198 | merged_kwargs["headers"] = headers 199 | 200 | return self.http_client.post( 201 | url=self.url, 202 | content=json.dumps( 203 | { 204 | "query": query, 205 | "operationName": operation_name, 206 | "variables": variables, 207 | }, 208 | default=to_jsonable_python, 209 | ), 210 | **merged_kwargs, 211 | ) 212 | -------------------------------------------------------------------------------- /src/binaryai/query.graphql: -------------------------------------------------------------------------------- 1 | query Sha256($md5: String!) { 2 | file: fileByHash(input: { md5: $md5 }) { 3 | sha256 4 | } 5 | } 6 | 7 | query Filename($sha256: String!) { 8 | file: fileByHash(input: { sha256: $sha256 }) { 9 | name 10 | } 11 | } 12 | 13 | query MIMEType($sha256: String!) { 14 | file: fileByHash(input: { sha256: $sha256 }) { 15 | mimeType 16 | } 17 | } 18 | 19 | query FileSize($sha256: String!) { 20 | file: fileByHash(input: { sha256: $sha256 }) { 21 | size 22 | } 23 | } 24 | 25 | query CVEName($sha256: String!) { 26 | file: fileByHash(input: { sha256: $sha256 }) { 27 | scainfo { 28 | cves { 29 | name 30 | } 31 | } 32 | } 33 | } 34 | 35 | query LicenseShortName($sha256: String!) { 36 | file: fileByHash(input: { sha256: $sha256 }) { 37 | scainfo { 38 | license 39 | } 40 | } 41 | } 42 | 43 | query License($sha256: String!) { 44 | file: fileByHash(input: { sha256: $sha256 }) { 45 | scainfo { 46 | licenselist { 47 | checkreason 48 | content 49 | extra 50 | fullName 51 | pass 52 | risk 53 | shortName 54 | source 55 | url 56 | tags { 57 | permission { 58 | tagName 59 | description 60 | } 61 | condition { 62 | tagName 63 | description 64 | } 65 | forbidden { 66 | tagName 67 | description 68 | } 69 | } 70 | } 71 | } 72 | } 73 | } 74 | 75 | query ASCIIString($sha256: String!) { 76 | file: fileByHash(input: { sha256: $sha256 }) { 77 | executable { 78 | ... on COFFInfo { 79 | asciiStrings 80 | } 81 | ... on ELFInfo { 82 | asciiStrings 83 | } 84 | ... on MachoInfo { 85 | asciiStrings 86 | } 87 | ... on PEInfo { 88 | asciiStrings 89 | } 90 | } 91 | } 92 | } 93 | 94 | query SCA($sha256: String!) { 95 | file: fileByHash(input: { sha256: $sha256 }) { 96 | scainfo { 97 | name 98 | version 99 | description 100 | sourceCodeURL 101 | summary 102 | } 103 | } 104 | } 105 | 106 | query Overview($sha256: String!) { 107 | file: fileByHash(input: { sha256: $sha256 }) { 108 | decompileResult { 109 | basicInfo { 110 | fileType 111 | machine 112 | platform 113 | endian 114 | loader 115 | entryPoint 116 | baseAddress 117 | } 118 | } 119 | } 120 | } 121 | 122 | query DownloadLink($sha256: String!) { 123 | file: fileByHash(input: { sha256: $sha256 }) { 124 | downloadLink 125 | } 126 | } 127 | 128 | query CheckState($sha256: String!) { 129 | file: fileByHash(input: { sha256: $sha256 }) { 130 | smartBinaryStatus: analyzeStatus(analyzer: SmartBinary) { 131 | status 132 | } 133 | smartBeatStatus: analyzeStatus(analyzer: SmartBeat) { 134 | status 135 | } 136 | text { 137 | content # trigger smartBinary 138 | } 139 | decompileResult { 140 | basicInfo { 141 | fileType # trigger smartBeat 142 | } 143 | } 144 | } 145 | } 146 | 147 | query FunctionList($sha256: String!) { 148 | file: fileByHash(input: { sha256: $sha256 }) { 149 | decompileResult { 150 | functions { 151 | offset 152 | } 153 | } 154 | } 155 | } 156 | 157 | query FunctionInfo( 158 | $sha256: String! 159 | $offset: BigInt! 160 | $withEmbedding: Boolean! 161 | ) { 162 | file: fileByHash(input: { sha256: $sha256 }) { 163 | decompileResult { 164 | function(offset: $offset) { 165 | offset 166 | name 167 | embedding @include(if: $withEmbedding) { 168 | vector 169 | version 170 | } 171 | pseudoCode { 172 | code 173 | } 174 | } 175 | } 176 | } 177 | } 178 | 179 | query FunctionsInfo( 180 | $sha256: String! 181 | $offset: [BigInt!] 182 | $withEmbedding: Boolean! 183 | ) { 184 | file: fileByHash(input: { sha256: $sha256 }) { 185 | decompileResult { 186 | functions(offset: $offset) { 187 | offset 188 | name 189 | embedding @include(if: $withEmbedding) { 190 | vector 191 | version 192 | } 193 | pseudoCode { 194 | code 195 | } 196 | } 197 | } 198 | } 199 | } 200 | 201 | query FunctionMatch($sha256: String!, $offset: BigInt!) { 202 | file: fileByHash(input: { sha256: $sha256 }) { 203 | decompileResult { 204 | function(offset: $offset) { 205 | match(topK: 10) { 206 | score 207 | function { 208 | code 209 | } 210 | } 211 | } 212 | } 213 | } 214 | } 215 | 216 | query FileKHash($sha256: String!) { 217 | file: fileByHash(input: { sha256: $sha256 }) { 218 | decompileResult { 219 | kHashInfo { 220 | hash { 221 | hash 222 | version 223 | } 224 | } 225 | } 226 | } 227 | } 228 | 229 | query FileMalwareProbability($sha256: String!) { 230 | file: fileByHash(input: { sha256: $sha256 }) { 231 | decompileResult { 232 | malwareProbability 233 | } 234 | analyzeStatus(analyzer: SmartBeat) { 235 | status 236 | } 237 | } 238 | } 239 | 240 | query CompressedFile($sha256: String!) { 241 | file: fileByHash(input: { sha256: $sha256 }) { 242 | decompressed { 243 | ... on CompressedFile { 244 | path 245 | sha256 246 | } 247 | } 248 | } 249 | } 250 | 251 | mutation Reanalyze($input: ReanalyzeInput!) { 252 | reanalyze(input: $input) { 253 | noopReason 254 | file { 255 | analyzeStatus { 256 | status 257 | } 258 | } 259 | } 260 | } 261 | 262 | mutation CheckOrUpload($input: CreateUploadTicketInput!) { 263 | createUploadTicket(input: $input) { 264 | __typename 265 | ... on File { 266 | sha256 267 | } 268 | ... on UploadTicket { 269 | ticketID 270 | url 271 | requestHeaders { 272 | key 273 | value 274 | } 275 | } 276 | ... on OwnershipTicket { 277 | ticketID 278 | secretPrepend 279 | secretAppend 280 | } 281 | } 282 | } 283 | 284 | mutation CreateFile($input: CreateFileInput!) { 285 | createFile(input: $input) { 286 | sha256 287 | md5 288 | name 289 | size 290 | mimeType 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/binaryai_file/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2024-08-23 07:36+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.14.0\n" 22 | 23 | #: ../../autoapi/binaryai/binaryai_file/index.rst:2 24 | msgid ":py:mod:`binaryai.binaryai_file`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/binaryai_file/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/binaryai_file/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/binaryai_file/index.rst:19::1 36 | msgid ":py:obj:`BinaryAIFile `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/binaryai_file/index.rst:19::1 40 | msgid "BinaryAIFile represent the file already analyzed by BinaryAI." 41 | msgstr "" 42 | 43 | #: ../../autoapi/binaryai/binaryai_file/index.rst:23 44 | msgid "Bases: :py:obj:`object`" 45 | msgstr "" 46 | 47 | #: ../../autoapi/binaryai/binaryai_file/index.rst:25 48 | msgid "" 49 | "BinaryAIFile represent the file already analyzed by BinaryAI. Users can " 50 | "receive the detailed results by using this conveniently. Note: This is " 51 | "not thread safe!!!" 52 | msgstr "" 53 | 54 | #: ../../autoapi/binaryai/binaryai_file/index.rst:31 55 | msgid "Get all uploaded filenames." 56 | msgstr "" 57 | 58 | #: ../../autoapi/binaryai/binaryai_file/index.rst 59 | msgid "Returns" 60 | msgstr "" 61 | 62 | #: ../../autoapi/binaryai/binaryai_file/index.rst:33 63 | msgid "A list of filenames." 64 | msgstr "" 65 | 66 | #: ../../autoapi/binaryai/binaryai_file/index.rst 67 | msgid "Return type" 68 | msgstr "" 69 | 70 | #: ../../autoapi/binaryai/binaryai_file/index.rst:39 71 | msgid "Get MIME type" 72 | msgstr "" 73 | 74 | #: ../../autoapi/binaryai/binaryai_file/index.rst:41 75 | msgid "MIME type string." 76 | msgstr "" 77 | 78 | #: ../../autoapi/binaryai/binaryai_file/index.rst:47 79 | msgid "Get size in bytes." 80 | msgstr "" 81 | 82 | #: ../../autoapi/binaryai/binaryai_file/index.rst:49 83 | msgid "File size in bytes." 84 | msgstr "" 85 | 86 | #: ../../autoapi/binaryai/binaryai_file/index.rst:55 87 | msgid "Get a list of files inside a compressed file identified." 88 | msgstr "" 89 | 90 | #: ../../autoapi/binaryai/binaryai_file/index.rst:57 91 | msgid "A list of compressed files." 92 | msgstr "" 93 | 94 | #: ../../autoapi/binaryai/binaryai_file/index.rst:63 95 | msgid "Get all CVEs." 96 | msgstr "" 97 | 98 | #: ../../autoapi/binaryai/binaryai_file/index.rst:65 99 | msgid "A list of CVE objects." 100 | msgstr "" 101 | 102 | #: ../../autoapi/binaryai/binaryai_file/index.rst:71 103 | msgid "Get all CVE names." 104 | msgstr "" 105 | 106 | #: ../../autoapi/binaryai/binaryai_file/index.rst:73 107 | msgid "A list of CVE names." 108 | msgstr "" 109 | 110 | #: ../../autoapi/binaryai/binaryai_file/index.rst:79 111 | msgid "Get all licenses." 112 | msgstr "" 113 | 114 | #: ../../autoapi/binaryai/binaryai_file/index.rst:81 115 | msgid "A list of license objects." 116 | msgstr "" 117 | 118 | #: ../../autoapi/binaryai/binaryai_file/index.rst:87 119 | msgid "Get all license short names." 120 | msgstr "" 121 | 122 | #: ../../autoapi/binaryai/binaryai_file/index.rst:89 123 | msgid "A list of license short names." 124 | msgstr "" 125 | 126 | #: ../../autoapi/binaryai/binaryai_file/index.rst:95 127 | msgid "Get all ASCII strings." 128 | msgstr "" 129 | 130 | #: ../../autoapi/binaryai/binaryai_file/index.rst:97 131 | msgid "A list of ASCII strings." 132 | msgstr "" 133 | 134 | #: ../../autoapi/binaryai/binaryai_file/index.rst:103 135 | msgid "Get SCA result." 136 | msgstr "" 137 | 138 | #: ../../autoapi/binaryai/binaryai_file/index.rst:105 139 | msgid "A list of sortware components." 140 | msgstr "" 141 | 142 | #: ../../autoapi/binaryai/binaryai_file/index.rst:111 143 | msgid "Fetch analysis overview." 144 | msgstr "" 145 | 146 | #: ../../autoapi/binaryai/binaryai_file/index.rst:113 147 | msgid "A key-value pair containing overview of the file" 148 | msgstr "" 149 | 150 | #: ../../autoapi/binaryai/binaryai_file/index.rst:119 151 | msgid "Fetch offsets of functions." 152 | msgstr "" 153 | 154 | #: ../../autoapi/binaryai/binaryai_file/index.rst:121 155 | msgid "A list of function offsets" 156 | msgstr "" 157 | 158 | #: ../../autoapi/binaryai/binaryai_file/index.rst:127 159 | msgid "" 160 | "Parses the list of functions and returns a Function instance containing " 161 | "the given function's name, fileoffset, bytes, pseudocode and returns the " 162 | "list with a generator." 163 | msgstr "" 164 | 165 | #: ../../autoapi/binaryai/binaryai_file/index.rst:131 166 | msgid "A Function iterator" 167 | msgstr "" 168 | 169 | #: ../../autoapi/binaryai/binaryai_file/index.rst:137 170 | msgid "" 171 | "Fetch detailed information about the given function identified by its " 172 | "offset address." 173 | msgstr "" 174 | 175 | #: ../../autoapi/binaryai/binaryai_file/index.rst:142 176 | #: ../../autoapi/binaryai/binaryai_file/index.rst:156 177 | #: ../../autoapi/binaryai/binaryai_file/index.rst:169 178 | msgid "Params:" 179 | msgstr "" 180 | 181 | #: ../../autoapi/binaryai/binaryai_file/index.rst:141 182 | msgid "" 183 | "offset: Offset address of desired function with_embedding: if True, try " 184 | "get the embedding representation of each function." 185 | msgstr "" 186 | 187 | #: ../../autoapi/binaryai/binaryai_file/index.rst:144 188 | msgid "" 189 | "A Function instance containing the given function's name, fileoffset, " 190 | "bytes, pseudocode" 191 | msgstr "" 192 | 193 | #: ../../autoapi/binaryai/binaryai_file/index.rst:151 194 | msgid "" 195 | "Fetch detailed information about the given functions identified by its " 196 | "offset address." 197 | msgstr "" 198 | 199 | #: ../../autoapi/binaryai/binaryai_file/index.rst:155 200 | msgid "" 201 | "offset: List of offset address of desired function with_embedding: if " 202 | "True, try get the embedding representation of each function." 203 | msgstr "" 204 | 205 | #: ../../autoapi/binaryai/binaryai_file/index.rst:158 206 | msgid "" 207 | "A iterator Functions instance containing the given function's name, " 208 | "fileoffset, bytes, pseudocode." 209 | msgstr "" 210 | 211 | #: ../../autoapi/binaryai/binaryai_file/index.rst:165 212 | msgid "Match functions about the given function identified by its offset address." 213 | msgstr "" 214 | 215 | #: ../../autoapi/binaryai/binaryai_file/index.rst:169 216 | msgid "offset: Offset address of desired function" 217 | msgstr "" 218 | 219 | #: ../../autoapi/binaryai/binaryai_file/index.rst:171 220 | msgid "" 221 | "a List containing 10 match results, every result is a Dict the contains " 222 | "score and pseudocode. The List is sorted by score from high to low." 223 | msgstr "" 224 | 225 | #: ../../autoapi/binaryai/binaryai_file/index.rst:179 226 | msgid "" 227 | "Return the KHash of this file. See website for detailed introduction on " 228 | "KHash." 229 | msgstr "" 230 | 231 | #: ../../autoapi/binaryai/binaryai_file/index.rst:181 232 | msgid "KHash's value and version. Only compare if version is same." 233 | msgstr "" 234 | 235 | #: ../../autoapi/binaryai/binaryai_file/index.rst:187 236 | msgid "" 237 | "Return the malware probability of this file. 0 usually mean a white file," 238 | " while 1 mean the file is risky." 239 | msgstr "" 240 | 241 | #: ../../autoapi/binaryai/binaryai_file/index.rst:189 242 | msgid "This is a experimental feature. This might be changed without noticed." 243 | msgstr "" 244 | 245 | #: ../../autoapi/binaryai/binaryai_file/index.rst:191 246 | msgid "Probability of the file. None means no result is available." 247 | msgstr "" 248 | 249 | -------------------------------------------------------------------------------- /docs/quick_start.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Quick start" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook gives an example on how to use this SDK to upload, start analysis and get the analysis result of a file." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Initialize\n", 22 | "\n", 23 | "To initialize the SDK, please prepare your Secret ID and Secret Key. Please [apply from us](https://www.binaryai.cn/doc/) if\n", 24 | "you don't have one.\n", 25 | "\n", 26 | "The Secret ID & Key is the *only* credential to access API, so please keep it safely. We recommend you read your keys to\n", 27 | "environment variable, instead of saving in your code:\n", 28 | "\n", 29 | "```bash\n", 30 | "$ read BINARYAI_SECRET_ID\n", 31 | "#(enter your secret id)\n", 32 | "$ read BINARYAI_SECRET_KEY\n", 33 | "#(enter your secret key)\n", 34 | "$ export BINARYAI_SECRET_ID\n", 35 | "$ export BINARYAI_SECRET_KEY\n", 36 | "```\n", 37 | "\n", 38 | "Once those environment variables are set, our SDK can read them directly.\n", 39 | "\n", 40 | "To initialize the SDK:" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 1, 46 | "metadata": { 47 | "metadata": {} 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# Uncomment to get more logs\n", 52 | "# import logging\n", 53 | "# logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", 54 | "# logger = logging.getLogger(\"binaryai_sdk\")\n", 55 | "\n", 56 | "from binaryai import BinaryAI\n", 57 | "\n", 58 | "bai = BinaryAI() # Initialize the client" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Great! If no exceptions raised, the client is initialized." 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Upload and analyze file\n", 73 | "\n", 74 | "Note: file upload might be rejected if file is too big or upload is too quick.\n", 75 | "\n", 76 | "Now you can upload by the file path:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 2, 82 | "metadata": { 83 | "metadata": {} 84 | }, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "analysis succeed\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | " # if upload succeed, file hash is returned\n", 96 | "sha256 = bai.upload(\"/bin/echo\")\n", 97 | "\n", 98 | "# wait until done. timeout=-1 means wait forever\n", 99 | "bai.wait_until_analysis_done(sha256, timeout=-1)\n", 100 | "\n", 101 | "print(\"analysis succeed\")" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## Get analysis result\n", 109 | "\n", 110 | "You can get analysis result by giving hash of a file for each method:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "metadata": { 117 | "metadata": {} 118 | }, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "{'fileType': 'ELF64',\n", 124 | " 'machine': 'AMD64',\n", 125 | " 'platform': 'LINUX',\n", 126 | " 'endian': 'LITTLE_ENDIAN',\n", 127 | " 'loader': 'x86:LE:64:default',\n", 128 | " 'entryPoint': 1059200,\n", 129 | " 'baseAddress': 1048576}" 130 | ] 131 | }, 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "bai.get_overview(sha256)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 4, 144 | "metadata": { 145 | "metadata": {} 146 | }, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "[1: _DT_INIT]\n", 153 | "[2: FUN_00102020]\n", 154 | "[3: ::getenv]\n", 155 | "[4: ::free]\n", 156 | "[5: ::abort]\n", 157 | "[6: ::__errno_location]\n", 158 | "[7: ::strncmp]\n", 159 | "[8: ::_exit]\n", 160 | "[9: ::__fpending]\n", 161 | "[10: ::textdomain]\n", 162 | "[11: ::fclose]\n", 163 | "[12: ::bindtextdomain]\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "funcs = bai.list_funcs(sha256)\n", 169 | "for i, f in enumerate(funcs):\n", 170 | " print(\"[{}: {}]\".format(i+1, f.name))\n", 171 | " if i > 10:\n", 172 | " break" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Or initialize a file object and call it:" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 5, 185 | "metadata": { 186 | "metadata": {} 187 | }, 188 | "outputs": [ 189 | { 190 | "name": "stdout", 191 | "output_type": "stream", 192 | "text": [ 193 | "reptile\n", 194 | "----\n", 195 | "tsh\n", 196 | "----\n" 197 | ] 198 | } 199 | ], 200 | "source": [ 201 | "from binaryai import BinaryAIFile\n", 202 | "# This pair of hash is the same file\n", 203 | "sha256 = \"289616b59a145e2033baddb8a8a9b5a8fb01bdbba1b8cf9acadcdd92e6cc0562\"\n", 204 | "md5 = \"c3366c6b688a5b5fa4451fec09930e06\"\n", 205 | "bai_file = BinaryAIFile(bai, md5=md5)\n", 206 | "for component in bai_file.get_sca_result():\n", 207 | " print(component.name)\n", 208 | " print(\"----\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "You can also get a file's KHash, which can be used to compare similarities:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": { 222 | "metadata": {} 223 | }, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "A<->B: 0.9716796875\n", 230 | "A<->C: 0.583984375\n", 231 | "B<->C: 0.5888671875\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "from binaryai import BinaryAIFile\n", 237 | "\n", 238 | "fileA = BinaryAIFile(bai, md5=\"346136457e1eb6eca44a06bb55f93284\").get_khash_info()\n", 239 | "fileB = BinaryAIFile(bai, sha256=\"841de34799fc46bf4b926559e4e7a70e0cc386050963978d5081595e9a280ae1\").get_khash_info()\n", 240 | "fileC = BinaryAIFile(bai, sha256=\"9b53a3936c8c4202e418c37cbadeaef7cc7471f6a6522f6ead1a19b31831f4a1\").get_khash_info()\n", 241 | "assert fileA[1] == fileB[1]\n", 242 | "assert fileB[1] == fileC[1]\n", 243 | "\n", 244 | "# calculate hamming distance\n", 245 | "def khash_similarity(khash_a: str, khash_b: str) -> float:\n", 246 | " def khash_str_to_list(khash: str) -> list:\n", 247 | " return list(bin(int(khash, 16))[2:].zfill(1024))\n", 248 | " from scipy.spatial import distance\n", 249 | " khash_a, khash_b = khash_str_to_list(khash_a), khash_str_to_list(khash_b)\n", 250 | " return 1 - distance.hamming(khash_a, khash_b)\n", 251 | "print(f\"A<->B: {khash_similarity(fileA[0].hex(), fileB[0].hex())}\")\n", 252 | "print(f\"A<->C: {khash_similarity(fileA[0].hex(), fileC[0].hex())}\")\n", 253 | "print(f\"B<->C: {khash_similarity(fileB[0].hex(), fileC[0].hex())}\")\n" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "In August 2024, we introduced a new feature to calculate a file's risky probability. A value ranged at `[0, 1]` might returned." 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 7, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "libidn: 0.0003542900085449219\n", 273 | "tshd: 0.9892578125\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "print(f\"libidn: {BinaryAIFile(bai, sha256='fed32e9a49717eacd2b2ff73ce22a6140a3b814805a089ca6c4dd09befae0d36').get_malware_probability()}\")\n", 279 | "print(f\"tshd: {BinaryAIFile(bai, sha256='289616b59a145e2033baddb8a8a9b5a8fb01bdbba1b8cf9acadcdd92e6cc0562').get_malware_probability()}\")" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "As shown above, you can always give a file hash (md5 or sha256) to get its analysis result.\n", 287 | "\n", 288 | "Read `examples/` in SDK repository or read the SDK API document for more info." 289 | ] 290 | } 291 | ], 292 | "metadata": { 293 | "kernelspec": { 294 | "display_name": "binaryai-YJgBNhjL-py3.9", 295 | "language": "python", 296 | "name": "python3" 297 | }, 298 | "language_info": { 299 | "codemirror_mode": { 300 | "name": "ipython", 301 | "version": 3 302 | }, 303 | "file_extension": ".py", 304 | "mimetype": "text/x-python", 305 | "name": "python", 306 | "nbconvert_exporter": "python", 307 | "pygments_lexer": "ipython3", 308 | "version": "3.9.19" 309 | }, 310 | "orig_nbformat": 4 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 2 314 | } 315 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/__init__.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | 3 | from .ascii_string import ( 4 | ASCIIString, 5 | ASCIIStringFile, 6 | ASCIIStringFileExecutableCOFFInfo, 7 | ASCIIStringFileExecutableELFInfo, 8 | ASCIIStringFileExecutableMachoInfo, 9 | ASCIIStringFileExecutablePEInfo, 10 | ) 11 | from .base_client import BaseClient 12 | from .base_model import BaseModel, Upload 13 | from .check_or_upload import ( 14 | CheckOrUpload, 15 | CheckOrUploadCreateUploadTicketFile, 16 | CheckOrUploadCreateUploadTicketOwnershipTicket, 17 | CheckOrUploadCreateUploadTicketUploadTicket, 18 | CheckOrUploadCreateUploadTicketUploadTicketRequestHeaders, 19 | ) 20 | from .check_state import ( 21 | CheckState, 22 | CheckStateFile, 23 | CheckStateFileDecompileResult, 24 | CheckStateFileDecompileResultBasicInfo, 25 | CheckStateFileSmartBeatStatus, 26 | CheckStateFileSmartBinaryStatus, 27 | CheckStateFileText, 28 | ) 29 | from .client import Client 30 | from .compressed_file import ( 31 | CompressedFile, 32 | CompressedFileFile, 33 | CompressedFileFileDecompressedCompressedDirectory, 34 | CompressedFileFileDecompressedCompressedFile, 35 | ) 36 | from .create_file import CreateFile, CreateFileCreateFile 37 | from .cve_name import CVEName, CVENameFile, CVENameFileScainfo, CVENameFileScainfoCves 38 | from .download_link import DownloadLink, DownloadLinkFile 39 | from .enums import ( 40 | AnalyzeProgressStep, 41 | AntivirusSafeLevel, 42 | ExecuteType, 43 | FileType, 44 | MalwareFamilyTag, 45 | MatchAlgorithm, 46 | NoopReason, 47 | PseudoCodeAnnotationType, 48 | RelroLevel, 49 | SCAAlgo, 50 | SearchBinaryStatisticKey, 51 | Status, 52 | SymbolType, 53 | VulnerabilitySourceStatus, 54 | XRefType, 55 | ) 56 | from .exceptions import ( 57 | GraphQLClientError, 58 | GraphQLClientGraphQLError, 59 | GraphQLClientGraphQLMultiError, 60 | GraphQLClientHttpError, 61 | GraphQLClientInvalidResponseError, 62 | ) 63 | from .file_k_hash import ( 64 | FileKHash, 65 | FileKHashFile, 66 | FileKHashFileDecompileResult, 67 | FileKHashFileDecompileResultKHashInfo, 68 | FileKHashFileDecompileResultKHashInfoHash, 69 | ) 70 | from .file_malware_probability import ( 71 | FileMalwareProbability, 72 | FileMalwareProbabilityFile, 73 | FileMalwareProbabilityFileAnalyzeStatus, 74 | FileMalwareProbabilityFileDecompileResult, 75 | ) 76 | from .file_size import FileSize, FileSizeFile 77 | from .filename import Filename, FilenameFile 78 | from .function_info import ( 79 | FunctionInfo, 80 | FunctionInfoFile, 81 | FunctionInfoFileDecompileResult, 82 | FunctionInfoFileDecompileResultFunction, 83 | FunctionInfoFileDecompileResultFunctionEmbedding, 84 | FunctionInfoFileDecompileResultFunctionPseudoCode, 85 | ) 86 | from .function_list import ( 87 | FunctionList, 88 | FunctionListFile, 89 | FunctionListFileDecompileResult, 90 | FunctionListFileDecompileResultFunctions, 91 | ) 92 | from .function_match import ( 93 | FunctionMatch, 94 | FunctionMatchFile, 95 | FunctionMatchFileDecompileResult, 96 | FunctionMatchFileDecompileResultFunction, 97 | FunctionMatchFileDecompileResultFunctionMatch, 98 | FunctionMatchFileDecompileResultFunctionMatchFunction, 99 | ) 100 | from .functions_info import ( 101 | FunctionsInfo, 102 | FunctionsInfoFile, 103 | FunctionsInfoFileDecompileResult, 104 | FunctionsInfoFileDecompileResultFunctions, 105 | FunctionsInfoFileDecompileResultFunctionsEmbedding, 106 | FunctionsInfoFileDecompileResultFunctionsPseudoCode, 107 | ) 108 | from .input_types import ( 109 | BindiffMatchInput, 110 | CreateFileInput, 111 | CreateMatchInput, 112 | CreateUploadTicketInput, 113 | KHashInput, 114 | MatchTargetInput, 115 | OSSMatchInput, 116 | ReanalyzeInput, 117 | SearchBinaryInput, 118 | SearchBinaryStatisticInput, 119 | SearchCVESec, 120 | SearchFileInput, 121 | SearchThirdLib, 122 | SessionLoginInput, 123 | SymbolTableFilter, 124 | UpdateAccessKeyInput, 125 | WeixinSessionLoginInput, 126 | ) 127 | from .license import ( 128 | License, 129 | LicenseFile, 130 | LicenseFileScainfo, 131 | LicenseFileScainfoLicenselist, 132 | LicenseFileScainfoLicenselistTags, 133 | LicenseFileScainfoLicenselistTagsCondition, 134 | LicenseFileScainfoLicenselistTagsForbidden, 135 | LicenseFileScainfoLicenselistTagsPermission, 136 | ) 137 | from .license_short_name import ( 138 | LicenseShortName, 139 | LicenseShortNameFile, 140 | LicenseShortNameFileScainfo, 141 | ) 142 | from .mime_type import MIMEType, MIMETypeFile 143 | from .overview import ( 144 | Overview, 145 | OverviewFile, 146 | OverviewFileDecompileResult, 147 | OverviewFileDecompileResultBasicInfo, 148 | ) 149 | from .reanalyze import ( 150 | Reanalyze, 151 | ReanalyzeReanalyze, 152 | ReanalyzeReanalyzeFile, 153 | ReanalyzeReanalyzeFileAnalyzeStatus, 154 | ) 155 | from .sca import SCA, SCAFile, SCAFileScainfo 156 | from .sha_256 import Sha256, Sha256File 157 | 158 | __all__ = [ 159 | "ASCIIString", 160 | "ASCIIStringFile", 161 | "ASCIIStringFileExecutableCOFFInfo", 162 | "ASCIIStringFileExecutableELFInfo", 163 | "ASCIIStringFileExecutableMachoInfo", 164 | "ASCIIStringFileExecutablePEInfo", 165 | "AnalyzeProgressStep", 166 | "AntivirusSafeLevel", 167 | "BaseClient", 168 | "BaseModel", 169 | "BindiffMatchInput", 170 | "CVEName", 171 | "CVENameFile", 172 | "CVENameFileScainfo", 173 | "CVENameFileScainfoCves", 174 | "CheckOrUpload", 175 | "CheckOrUploadCreateUploadTicketFile", 176 | "CheckOrUploadCreateUploadTicketOwnershipTicket", 177 | "CheckOrUploadCreateUploadTicketUploadTicket", 178 | "CheckOrUploadCreateUploadTicketUploadTicketRequestHeaders", 179 | "CheckState", 180 | "CheckStateFile", 181 | "CheckStateFileDecompileResult", 182 | "CheckStateFileDecompileResultBasicInfo", 183 | "CheckStateFileSmartBeatStatus", 184 | "CheckStateFileSmartBinaryStatus", 185 | "CheckStateFileText", 186 | "Client", 187 | "CompressedFile", 188 | "CompressedFileFile", 189 | "CompressedFileFileDecompressedCompressedDirectory", 190 | "CompressedFileFileDecompressedCompressedFile", 191 | "CreateFile", 192 | "CreateFileCreateFile", 193 | "CreateFileInput", 194 | "CreateMatchInput", 195 | "CreateUploadTicketInput", 196 | "DownloadLink", 197 | "DownloadLinkFile", 198 | "ExecuteType", 199 | "FileKHash", 200 | "FileKHashFile", 201 | "FileKHashFileDecompileResult", 202 | "FileKHashFileDecompileResultKHashInfo", 203 | "FileKHashFileDecompileResultKHashInfoHash", 204 | "FileMalwareProbability", 205 | "FileMalwareProbabilityFile", 206 | "FileMalwareProbabilityFileAnalyzeStatus", 207 | "FileMalwareProbabilityFileDecompileResult", 208 | "FileSize", 209 | "FileSizeFile", 210 | "FileType", 211 | "Filename", 212 | "FilenameFile", 213 | "FunctionInfo", 214 | "FunctionInfoFile", 215 | "FunctionInfoFileDecompileResult", 216 | "FunctionInfoFileDecompileResultFunction", 217 | "FunctionInfoFileDecompileResultFunctionEmbedding", 218 | "FunctionInfoFileDecompileResultFunctionPseudoCode", 219 | "FunctionList", 220 | "FunctionListFile", 221 | "FunctionListFileDecompileResult", 222 | "FunctionListFileDecompileResultFunctions", 223 | "FunctionMatch", 224 | "FunctionMatchFile", 225 | "FunctionMatchFileDecompileResult", 226 | "FunctionMatchFileDecompileResultFunction", 227 | "FunctionMatchFileDecompileResultFunctionMatch", 228 | "FunctionMatchFileDecompileResultFunctionMatchFunction", 229 | "FunctionsInfo", 230 | "FunctionsInfoFile", 231 | "FunctionsInfoFileDecompileResult", 232 | "FunctionsInfoFileDecompileResultFunctions", 233 | "FunctionsInfoFileDecompileResultFunctionsEmbedding", 234 | "FunctionsInfoFileDecompileResultFunctionsPseudoCode", 235 | "GraphQLClientError", 236 | "GraphQLClientGraphQLError", 237 | "GraphQLClientGraphQLMultiError", 238 | "GraphQLClientHttpError", 239 | "GraphQLClientInvalidResponseError", 240 | "KHashInput", 241 | "License", 242 | "LicenseFile", 243 | "LicenseFileScainfo", 244 | "LicenseFileScainfoLicenselist", 245 | "LicenseFileScainfoLicenselistTags", 246 | "LicenseFileScainfoLicenselistTagsCondition", 247 | "LicenseFileScainfoLicenselistTagsForbidden", 248 | "LicenseFileScainfoLicenselistTagsPermission", 249 | "LicenseShortName", 250 | "LicenseShortNameFile", 251 | "LicenseShortNameFileScainfo", 252 | "MIMEType", 253 | "MIMETypeFile", 254 | "MalwareFamilyTag", 255 | "MatchAlgorithm", 256 | "MatchTargetInput", 257 | "NoopReason", 258 | "OSSMatchInput", 259 | "Overview", 260 | "OverviewFile", 261 | "OverviewFileDecompileResult", 262 | "OverviewFileDecompileResultBasicInfo", 263 | "PseudoCodeAnnotationType", 264 | "Reanalyze", 265 | "ReanalyzeInput", 266 | "ReanalyzeReanalyze", 267 | "ReanalyzeReanalyzeFile", 268 | "ReanalyzeReanalyzeFileAnalyzeStatus", 269 | "RelroLevel", 270 | "SCA", 271 | "SCAAlgo", 272 | "SCAFile", 273 | "SCAFileScainfo", 274 | "SearchBinaryInput", 275 | "SearchBinaryStatisticInput", 276 | "SearchBinaryStatisticKey", 277 | "SearchCVESec", 278 | "SearchFileInput", 279 | "SearchThirdLib", 280 | "SessionLoginInput", 281 | "Sha256", 282 | "Sha256File", 283 | "Status", 284 | "SymbolTableFilter", 285 | "SymbolType", 286 | "UpdateAccessKeyInput", 287 | "Upload", 288 | "VulnerabilitySourceStatus", 289 | "WeixinSessionLoginInput", 290 | "XRefType", 291 | ] 292 | -------------------------------------------------------------------------------- /src/binaryai/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import datetime 5 | import hashlib 6 | import hmac 7 | import typing 8 | from hashlib import sha256 9 | 10 | import httpx 11 | 12 | 13 | def sha256sum(path: str) -> str: 14 | """ 15 | Computes sha256 hash sum of a file. 16 | 17 | Args: 18 | path: path of file 19 | 20 | Returns: 21 | hex digest of sha256 22 | """ 23 | hash = sha256() 24 | with open(path, "rb") as f: 25 | for buf in iter(lambda: f.read(4096), b""): 26 | hash.update(buf) 27 | return str(hash.hexdigest()) 28 | 29 | 30 | def sign(key, msg): 31 | """ 32 | Modified from https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html 33 | """ 34 | return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() 35 | 36 | 37 | def getSignatureKey(key, dateStamp, serviceName): 38 | """ 39 | Modified from https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html 40 | """ 41 | kDate = sign(("TC3" + key).encode("utf-8"), dateStamp) 42 | kService = sign(kDate, serviceName) 43 | kSigning = sign(kService, "tc3_request") 44 | return kSigning 45 | 46 | 47 | class QCloudHttpxAuth(httpx.Auth): 48 | """ 49 | Auth class that allows us to connect to QCloud services 50 | """ 51 | 52 | requires_request_body = True 53 | 54 | def __init__( 55 | self, 56 | qcloud_secret_id, 57 | qcloud_secret_key, 58 | qcloud_host, 59 | qcloud_region, 60 | qcloud_service, 61 | qcloud_action, 62 | qcloud_apiversion, 63 | ): 64 | """ 65 | Example usage for talking to an QCloud CVM API: 66 | 67 | QCloudHttpxAuth(qcloud_access_key='YOURKEY', 68 | qcloud_secret_access_key='YOURSECRET', 69 | qcloud_host='cvm.tencentcloudapi.com', 70 | qcloud_region='ap-shanghai', 71 | qcloud_service='cvm') 72 | 73 | """ 74 | self.qcloud_secret_id = qcloud_secret_id 75 | self.qcloud_secret_key = qcloud_secret_key 76 | self.qcloud_host = qcloud_host 77 | self.qcloud_region = qcloud_region 78 | self.qcloud_service = qcloud_service 79 | self.qcloud_action = qcloud_action 80 | self.qcloud_apiversion = qcloud_apiversion 81 | 82 | def auth_flow(self, r: httpx.Request) -> typing.Generator[httpx.Request, httpx.Response, None]: 83 | """ 84 | Adds the authorization headers required by QCloud Signature v3. 85 | """ 86 | qcloud_headers = self.get_qcloud_request_headers_handler(r) 87 | r.headers.update(qcloud_headers) 88 | yield r 89 | 90 | def get_qcloud_request_headers_handler(self, r): 91 | """ 92 | Override get_qcloud_request_headers_handler() if you have a 93 | subclass that needs to call get_qcloud_request_headers() with 94 | an arbitrary set of QCloud credentials. The default implementation 95 | calls get_qcloud_request_headers() with self.qcloud_access_key, 96 | and self.qcloud_secret_access_key 97 | """ 98 | return self.get_qcloud_request_headers( 99 | r=r, qcloud_secret_id=self.qcloud_secret_id, qcloud_secret_key=self.qcloud_secret_key 100 | ) 101 | 102 | def get_qcloud_request_headers(self, r: httpx.Request, qcloud_secret_id, qcloud_secret_key): 103 | """ 104 | Returns a dictionary containing the necessary headers for Amazon's 105 | signature version 4 signing process. An example return value might 106 | look like 107 | 108 | { 109 | 'Authorization': '...', 110 | '...', 111 | } 112 | """ 113 | # Create a date for headers and the credential string 114 | t = datetime.datetime.now() 115 | amzdate = str(int(t.timestamp())) 116 | datestamp = t.utcfromtimestamp(t.timestamp()).strftime("%Y-%m-%d") # Date w/o time for credential_scope 117 | 118 | canonical_uri = QCloudHttpxAuth.get_canonical_path(r) 119 | 120 | canonical_querystring = QCloudHttpxAuth.get_canonical_querystring(r) 121 | 122 | if r.headers.get("content-type") is None: 123 | if not r.method == "GET": 124 | raise ValueError("content-type must be set for non GET methods") 125 | r.headers["content-type"] = "application/x-www-form-urlencoded" 126 | 127 | # Create the canonical headers and signed headers. Header names 128 | # and value must be trimmed and lowercase, and sorted in ASCII order. 129 | # Note that there is a trailing \n. 130 | canonical_headers = ( 131 | "content-type:" + r.headers.get("content-type", "") + "\n" + "host:" + self.qcloud_host + "\n" 132 | ) 133 | 134 | # Create the list of signed headers. This lists the headers 135 | # in the canonical_headers list, delimited with ";" and in alpha order. 136 | # Note: The request can include any headers; canonical_headers and 137 | # signed_headers lists those that you want to be included in the 138 | # hash of the request. "Host" and "x-amz-date" are always required. 139 | signed_headers = "content-type;host" 140 | 141 | # Create payload hash (hash of the request body content). For GET 142 | # requests, the payload is an empty string (''). 143 | 144 | payload_hash = hashlib.sha256(r.read()).hexdigest() 145 | 146 | # Combine elements to create create canonical request 147 | canonical_request = ( 148 | r.method 149 | + "\n" 150 | + canonical_uri 151 | + "\n" 152 | + canonical_querystring 153 | + "\n" 154 | + canonical_headers 155 | + "\n" 156 | + signed_headers 157 | + "\n" 158 | + payload_hash 159 | ) 160 | 161 | # Match the algorithm to the hashing algorithm you use, either SHA-1 or 162 | # SHA-256 (recommended) 163 | algorithm = "TC3-HMAC-SHA256" 164 | credential_scope = datestamp + "/" + self.qcloud_service + "/" + "tc3_request" 165 | string_to_sign = ( 166 | algorithm 167 | + "\n" 168 | + amzdate 169 | + "\n" 170 | + credential_scope 171 | + "\n" 172 | + hashlib.sha256(canonical_request.encode("utf-8")).hexdigest() 173 | ) 174 | 175 | # Create the signing key using the function defined above. 176 | signing_key = getSignatureKey(qcloud_secret_key, datestamp, self.qcloud_service) 177 | 178 | # Sign the string_to_sign using the signing_key 179 | string_to_sign_utf8 = string_to_sign.encode("utf-8") 180 | 181 | signature = hmac.new(signing_key, string_to_sign_utf8, hashlib.sha256).hexdigest() 182 | 183 | # The signing information can be either in a query string value or in 184 | # a header named Authorization. This code shows how to use a header. 185 | # Create authorization header and add to request headers 186 | authorization_header = ( 187 | algorithm 188 | + " " 189 | + "Credential=" 190 | + qcloud_secret_id 191 | + "/" 192 | + credential_scope 193 | + ", " 194 | + "SignedHeaders=" 195 | + signed_headers 196 | + ", " 197 | + "Signature=" 198 | + signature 199 | ) 200 | 201 | headers = { 202 | "Authorization": authorization_header, 203 | "x-tc-timestamp": amzdate, 204 | "x-tc-action": self.qcloud_action, 205 | "x-tc-region": self.qcloud_region, 206 | "x-tc-version": self.qcloud_apiversion, 207 | } 208 | return headers 209 | 210 | @classmethod 211 | def get_canonical_path(cls, r: httpx.Request): 212 | """ 213 | Create canonical path. According to QCloud, this should always be "/" 214 | """ 215 | return "/" 216 | 217 | @classmethod 218 | def get_canonical_querystring(cls, r: httpx.Request): 219 | """ 220 | Create the canonical query string. According to QCloud, by the 221 | end of this function our query string values must 222 | be URL-encoded (space=%20) and the parameters must be sorted 223 | by name. 224 | 225 | This method assumes that the query params in `r` are *already* 226 | url encoded. If they are not url encoded by the time they make 227 | it to this function, QCloud may complain that the signature for your 228 | request is incorrect. 229 | 230 | It appears elasticsearc-py url encodes query paramaters on its own: 231 | https://github.com/elastic/elasticsearch-py/blob/5dfd6985e5d32ea353d2b37d01c2521b2089ac2b/elasticsearch/connection/http_requests.py#L64 232 | 233 | If you are using a different client than elasticsearch-py, it 234 | will be your responsibility to urleconde your query params before 235 | this method is called. 236 | """ 237 | canonical_querystring = b"" 238 | 239 | querystring_sorted = b"&".join(sorted(r.url.query.split(b"&"))) 240 | 241 | for query_param in querystring_sorted.split(b"&"): 242 | key_val_split = query_param.split(b"=", 1) 243 | 244 | key = key_val_split[0] 245 | if len(key_val_split) > 1: 246 | val = key_val_split[1] 247 | else: 248 | val = b"" 249 | 250 | if key: 251 | if canonical_querystring: 252 | canonical_querystring += b"&" 253 | canonical_querystring += b"=".join([key, val]) 254 | 255 | # FIXME: use bytes instead of decode. httpx's request is better. 256 | return canonical_querystring.decode() 257 | -------------------------------------------------------------------------------- /docs/locale/zh_CN/LC_MESSAGES/autoapi/binaryai/client/index.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) 2020-2023, binaryai 3 | # This file is distributed under the same license as the BinaryAI SDK 4 | # package. 5 | # FIRST AUTHOR , 2023. 6 | # 7 | #, fuzzy 8 | msgid "" 9 | msgstr "" 10 | "Project-Id-Version: BinaryAI SDK \n" 11 | "Report-Msgid-Bugs-To: \n" 12 | "POT-Creation-Date: 2024-08-23 07:36+0000\n" 13 | "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" 14 | "Last-Translator: FULL NAME \n" 15 | "Language: zh_CN\n" 16 | "Language-Team: zh_CN \n" 17 | "Plural-Forms: nplurals=1; plural=0;\n" 18 | "MIME-Version: 1.0\n" 19 | "Content-Type: text/plain; charset=utf-8\n" 20 | "Content-Transfer-Encoding: 8bit\n" 21 | "Generated-By: Babel 2.14.0\n" 22 | 23 | #: ../../autoapi/binaryai/client/index.rst:2 24 | msgid ":py:mod:`binaryai.client`" 25 | msgstr "" 26 | 27 | #: ../../autoapi/binaryai/client/index.rst:8 28 | msgid "Module Contents" 29 | msgstr "" 30 | 31 | #: ../../autoapi/binaryai/client/index.rst:11 32 | msgid "Classes" 33 | msgstr "" 34 | 35 | #: ../../autoapi/binaryai/client/index.rst:19::1 36 | msgid ":py:obj:`BinaryAI `\\" 37 | msgstr "" 38 | 39 | #: ../../autoapi/binaryai/client/index.rst:19::1 40 | msgid "BinaryAI client used to interact with servers." 41 | msgstr "" 42 | 43 | #: ../../autoapi/binaryai/client/index.rst:21 44 | msgid "Attributes" 45 | msgstr "" 46 | 47 | #: ../../autoapi/binaryai/client/index.rst:33::1 48 | msgid ":py:obj:`SDK_VERSION `\\" 49 | msgstr "" 50 | 51 | #: ../../autoapi/binaryai/client/index.rst:33::1 52 | msgid ":py:obj:`DEFAULT_SDK_NAME `\\" 53 | msgstr "" 54 | 55 | #: ../../autoapi/binaryai/client/index.rst:33::1 56 | msgid ":py:obj:`DEFAULT_POLL_INTERVAL `\\" 57 | msgstr "" 58 | 59 | #: ../../autoapi/binaryai/client/index.rst:33::1 60 | msgid ":py:obj:`DEFAULT_POLL_TIMEOUT `\\" 61 | msgstr "" 62 | 63 | #: ../../autoapi/binaryai/client/index.rst:33::1 64 | msgid ":py:obj:`HEADER_REQUEST_SOURCE `\\" 65 | msgstr "" 66 | 67 | #: ../../autoapi/binaryai/client/index.rst:33::1 68 | msgid ":py:obj:`DEFAULT_ENDPOINT `\\" 69 | msgstr "" 70 | 71 | #: ../../autoapi/binaryai/client/index.rst:33::1 72 | msgid "" 73 | ":py:obj:`DEFAULT_LICENSE_SEPARATOR " 74 | "`\\" 75 | msgstr "" 76 | 77 | #: ../../autoapi/binaryai/client/index.rst:70 78 | msgid "Bases: :py:obj:`object`" 79 | msgstr "" 80 | 81 | #: ../../autoapi/binaryai/client/index.rst:72 82 | msgid "" 83 | "BinaryAI client used to interact with servers. Users can receive upload, " 84 | "do analysis, and receive the detailed results by using this client. .. " 85 | "note::" 86 | msgstr "" 87 | 88 | #: ../../autoapi/binaryai/client/index.rst:82 89 | msgid "Uploads a file to server." 90 | msgstr "" 91 | 92 | #: ../../autoapi/binaryai/client/index.rst:84 93 | msgid "" 94 | "At least one of following input should be not None: * File upload: fill " 95 | "`filepath` for the file to be upload on the disk * Memory upload: `mem` " 96 | "for the file to be upload in the memory" 97 | msgstr "" 98 | 99 | #: ../../autoapi/binaryai/client/index.rst:88 100 | msgid "" 101 | "If you only have the hash, you can try to fill `sha256` and `md5`, but " 102 | "the error FileRequiredError might be raised. Hash is ignored if file is " 103 | "already provided through `filepath` or `mem. When multiple hashes " 104 | "provided, only use sha256." 105 | msgstr "" 106 | 107 | #: ../../autoapi/binaryai/client/index.rst:92 108 | msgid "" 109 | "Memory upload, hash upload and `hooks` are experimental features. They " 110 | "might be changed without noticed." 111 | msgstr "" 112 | 113 | #: ../../autoapi/binaryai/client/index.rst 114 | msgid "Parameters" 115 | msgstr "" 116 | 117 | #: ../../autoapi/binaryai/client/index.rst:94 118 | msgid "A pathname to a given file for file upload." 119 | msgstr "" 120 | 121 | #: ../../autoapi/binaryai/client/index.rst:96 122 | msgid "A byte buffer for a file in memory to be upload." 123 | msgstr "" 124 | 125 | #: ../../autoapi/binaryai/client/index.rst:98 126 | msgid "A dict to modify arguments before certain operations." 127 | msgstr "" 128 | 129 | #: ../../autoapi/binaryai/client/index.rst:100 130 | #: ../../autoapi/binaryai/client/index.rst:102 131 | msgid "A string for hash upload." 132 | msgstr "" 133 | 134 | #: ../../autoapi/binaryai/client/index.rst 135 | msgid "Returns" 136 | msgstr "" 137 | 138 | #: ../../autoapi/binaryai/client/index.rst:105 139 | msgid "A actual sha256 that server calculates and returns." 140 | msgstr "" 141 | 142 | #: ../../autoapi/binaryai/client/index.rst:110 143 | msgid "Reanalyze target file." 144 | msgstr "" 145 | 146 | #: ../../autoapi/binaryai/client/index.rst:112 147 | #: ../../autoapi/binaryai/client/index.rst:139 148 | #: ../../autoapi/binaryai/client/index.rst:148 149 | #: ../../autoapi/binaryai/client/index.rst:156 150 | #: ../../autoapi/binaryai/client/index.rst:166 151 | #: ../../autoapi/binaryai/client/index.rst:176 152 | #: ../../autoapi/binaryai/client/index.rst:186 153 | #: ../../autoapi/binaryai/client/index.rst:196 154 | #: ../../autoapi/binaryai/client/index.rst:203 155 | #: ../../autoapi/binaryai/client/index.rst:213 156 | #: ../../autoapi/binaryai/client/index.rst:223 157 | #: ../../autoapi/binaryai/client/index.rst:233 158 | #: ../../autoapi/binaryai/client/index.rst:266 159 | msgid "File sha256sum." 160 | msgstr "" 161 | 162 | #: ../../autoapi/binaryai/client/index.rst:117 163 | msgid "" 164 | "Wait until having a latest stable result, by waiting for if all analysis " 165 | "on this file done. You can set the wait timeout in seconds. If no stable " 166 | "results available after timeout, a TimeoutError is raised." 167 | msgstr "" 168 | 169 | #: ../../autoapi/binaryai/client/index.rst:121 170 | msgid "" 171 | "If parts being waitied are failed instead of succeed, this function will " 172 | "*not* raise any exception. To get detailed info about status, call " 173 | "`get_analyze_status`." 174 | msgstr "" 175 | 176 | #: ../../autoapi/binaryai/client/index.rst:124 177 | msgid "" 178 | "For analyze in parallel, consider call this function in a seperate " 179 | "thread, since this function is wait by calling `threaing.Event`. This " 180 | "function's implementation is a good reference of judging if a file is " 181 | "finished analyzing." 182 | msgstr "" 183 | 184 | #: ../../autoapi/binaryai/client/index.rst:128 185 | msgid "File sha256 sum." 186 | msgstr "" 187 | 188 | #: ../../autoapi/binaryai/client/index.rst:129 189 | msgid "maxium wait time in seconds. If negative, wait forever." 190 | msgstr "" 191 | 192 | #: ../../autoapi/binaryai/client/index.rst:131 193 | msgid "pool interval in seconds. Raise error if not positive." 194 | msgstr "" 195 | 196 | #: ../../autoapi/binaryai/client/index.rst:137 197 | msgid "" 198 | "Return current state of each analyzers. Read API document about " 199 | "relationship between analyzer and results." 200 | msgstr "" 201 | 202 | #: ../../autoapi/binaryai/client/index.rst:144 203 | msgid "Get file sha256 by its md5." 204 | msgstr "" 205 | 206 | #: ../../autoapi/binaryai/client/index.rst:146 207 | msgid "File md5 hash." 208 | msgstr "" 209 | 210 | #: ../../autoapi/binaryai/client/index.rst 211 | msgid "Return type" 212 | msgstr "" 213 | 214 | #: ../../autoapi/binaryai/client/index.rst:154 215 | msgid "Get all uploaded filenames for a given file." 216 | msgstr "" 217 | 218 | #: ../../autoapi/binaryai/client/index.rst:158 219 | msgid "A list of filenames." 220 | msgstr "" 221 | 222 | #: ../../autoapi/binaryai/client/index.rst:164 223 | msgid "Get MIME type for a given file." 224 | msgstr "" 225 | 226 | #: ../../autoapi/binaryai/client/index.rst:168 227 | msgid "MIME type string." 228 | msgstr "" 229 | 230 | #: ../../autoapi/binaryai/client/index.rst:174 231 | msgid "Get size in bytes of a given file." 232 | msgstr "" 233 | 234 | #: ../../autoapi/binaryai/client/index.rst:178 235 | msgid "File size in bytes." 236 | msgstr "" 237 | 238 | #: ../../autoapi/binaryai/client/index.rst:184 239 | msgid "Get a list of files inside a compressed file identified by a sha256." 240 | msgstr "" 241 | 242 | #: ../../autoapi/binaryai/client/index.rst:186 243 | msgid "File sha256sum. Returns: int: File size in bytes." 244 | msgstr "" 245 | 246 | #: ../../autoapi/binaryai/client/index.rst:188 247 | msgid "Returns:" 248 | msgstr "" 249 | 250 | #: ../../autoapi/binaryai/client/index.rst:189 251 | msgid "int: File size in bytes." 252 | msgstr "" 253 | 254 | #: ../../autoapi/binaryai/client/index.rst:194 255 | msgid "Get all CVE names for a given file." 256 | msgstr "" 257 | 258 | #: ../../autoapi/binaryai/client/index.rst:201 259 | msgid "Get all licenses for a given file." 260 | msgstr "" 261 | 262 | #: ../../autoapi/binaryai/client/index.rst:205 263 | msgid "A list of license string." 264 | msgstr "" 265 | 266 | #: ../../autoapi/binaryai/client/index.rst:211 267 | msgid "Get all license short names for a given file." 268 | msgstr "" 269 | 270 | #: ../../autoapi/binaryai/client/index.rst:215 271 | msgid "A list of license short names." 272 | msgstr "" 273 | 274 | #: ../../autoapi/binaryai/client/index.rst:221 275 | msgid "Get all ASCII strings for a given file." 276 | msgstr "" 277 | 278 | #: ../../autoapi/binaryai/client/index.rst:225 279 | msgid "A list of ASCII strings." 280 | msgstr "" 281 | 282 | #: ../../autoapi/binaryai/client/index.rst:231 283 | msgid "Get SCA result for a given file." 284 | msgstr "" 285 | 286 | #: ../../autoapi/binaryai/client/index.rst:235 287 | msgid "A list of software components." 288 | msgstr "" 289 | 290 | #: ../../autoapi/binaryai/client/index.rst:241 291 | msgid "Fetch analysis overview from BinaryAI Beat server by file's sha256." 292 | msgstr "" 293 | 294 | #: ../../autoapi/binaryai/client/index.rst:243 295 | msgid "A key-value pair containing overview of the binary file" 296 | msgstr "" 297 | 298 | #: ../../autoapi/binaryai/client/index.rst:248 299 | msgid "Fetch file download link by file's sha256." 300 | msgstr "" 301 | 302 | #: ../../autoapi/binaryai/client/index.rst:250 303 | msgid "A link can be used to download link later. The link might expire." 304 | msgstr "" 305 | 306 | #: ../../autoapi/binaryai/client/index.rst:255 307 | msgid "Fetch offsets of functions from analysis." 308 | msgstr "" 309 | 310 | #: ../../autoapi/binaryai/client/index.rst:257 311 | msgid "list of function offset" 312 | msgstr "" 313 | 314 | #: ../../autoapi/binaryai/client/index.rst:262 315 | msgid "" 316 | "Parses the list of functions and returns a Function instance containing " 317 | "the given function's name, fileoffset, bytes, pseudocode and returns the " 318 | "list with a generator." 319 | msgstr "" 320 | 321 | #: ../../autoapi/binaryai/client/index.rst:267 322 | msgid "Batch size to get functions' info" 323 | msgstr "" 324 | 325 | #: ../../autoapi/binaryai/client/index.rst:269 326 | msgid "Function Iterator" 327 | msgstr "" 328 | 329 | #: ../../autoapi/binaryai/client/index.rst:274 330 | msgid "" 331 | "Fetch detailed information about the given function identified by its " 332 | "offset address." 333 | msgstr "" 334 | 335 | #: ../../autoapi/binaryai/client/index.rst:278 336 | #: ../../autoapi/binaryai/client/index.rst:291 337 | #: ../../autoapi/binaryai/client/index.rst:309 338 | msgid "Params:" 339 | msgstr "" 340 | 341 | #: ../../autoapi/binaryai/client/index.rst:278 342 | #: ../../autoapi/binaryai/client/index.rst:309 343 | msgid "offset: offset address of desired function" 344 | msgstr "" 345 | 346 | #: ../../autoapi/binaryai/client/index.rst:280 347 | msgid "" 348 | "Function instance containing the given function's name, fileoffset, " 349 | "bytes, pseudocode" 350 | msgstr "" 351 | 352 | #: ../../autoapi/binaryai/client/index.rst:286 353 | msgid "" 354 | "Fetch detailed information about the given functions identified by its " 355 | "offset address." 356 | msgstr "" 357 | 358 | #: ../../autoapi/binaryai/client/index.rst:290 359 | msgid "" 360 | "offsets: A list of offset addresses of desired functions batch_size: " 361 | "Batch size to get functions' info." 362 | msgstr "" 363 | 364 | #: ../../autoapi/binaryai/client/index.rst:293 365 | msgid "Function iterator" 366 | msgstr "" 367 | 368 | #: ../../autoapi/binaryai/client/index.rst 369 | msgid "Raises" 370 | msgstr "" 371 | 372 | #: ../../autoapi/binaryai/client/index.rst:295 373 | msgid "invalid batch size" 374 | msgstr "" 375 | 376 | #: ../../autoapi/binaryai/client/index.rst:300 377 | msgid "Get functions' info in batches" 378 | msgstr "" 379 | 380 | #: ../../autoapi/binaryai/client/index.rst:305 381 | msgid "Match functions about the given function identified by its offset address." 382 | msgstr "" 383 | 384 | #: ../../autoapi/binaryai/client/index.rst:311 385 | msgid "" 386 | "a List containing 10 match results, every result is a Dict the contains " 387 | "score and pseudocode. The List is sorted by score from high to low" 388 | msgstr "" 389 | 390 | #: ../../autoapi/binaryai/client/index.rst:318 391 | msgid "" 392 | "Return the KHash of this file. See website for detailed introduction on " 393 | "KHash." 394 | msgstr "" 395 | 396 | #: ../../autoapi/binaryai/client/index.rst:320 397 | msgid "" 398 | "KHash's value and version. Only compare if version is same." 399 | " You are not expected to parse version." 400 | msgstr "" 401 | 402 | #: ../../autoapi/binaryai/client/index.rst:322 403 | msgid "KHash's value and version. Only compare if version is same." 404 | msgstr "" 405 | 406 | #: ../../autoapi/binaryai/client/index.rst:323 407 | msgid "You are not expected to parse version." 408 | msgstr "" 409 | 410 | #: ../../autoapi/binaryai/client/index.rst:329 411 | msgid "" 412 | "Return the malware probability of this file. 0 usually mean a white file," 413 | " while 1 mean the file is risky." 414 | msgstr "" 415 | 416 | #: ../../autoapi/binaryai/client/index.rst:331 417 | msgid "This is a experimental feature. This might be changed without noticed." 418 | msgstr "" 419 | 420 | #: ../../autoapi/binaryai/client/index.rst:333 421 | msgid "Probability of the file. None means no result is available." 422 | msgstr "" 423 | 424 | -------------------------------------------------------------------------------- /src/binaryai/client_stub/client.py: -------------------------------------------------------------------------------- 1 | # Generated by ariadne-codegen 2 | # Source: ./src/binaryai/query.graphql 3 | 4 | from typing import Any, Dict, List, Optional, Union 5 | 6 | from .ascii_string import ASCIIString 7 | from .base_client import BaseClient 8 | from .base_model import UNSET, UnsetType 9 | from .check_or_upload import CheckOrUpload 10 | from .check_state import CheckState 11 | from .compressed_file import CompressedFile 12 | from .create_file import CreateFile 13 | from .cve_name import CVEName 14 | from .download_link import DownloadLink 15 | from .file_k_hash import FileKHash 16 | from .file_malware_probability import FileMalwareProbability 17 | from .file_size import FileSize 18 | from .filename import Filename 19 | from .function_info import FunctionInfo 20 | from .function_list import FunctionList 21 | from .function_match import FunctionMatch 22 | from .functions_info import FunctionsInfo 23 | from .input_types import CreateFileInput, CreateUploadTicketInput, ReanalyzeInput 24 | from .license import License 25 | from .license_short_name import LicenseShortName 26 | from .mime_type import MIMEType 27 | from .overview import Overview 28 | from .reanalyze import Reanalyze 29 | from .sca import SCA 30 | from .sha_256 import Sha256 31 | 32 | 33 | def gql(q: str) -> str: 34 | return q 35 | 36 | 37 | class Client(BaseClient): 38 | def sha_256(self, md_5: str, **kwargs: Any) -> Sha256: 39 | query = gql( 40 | """ 41 | query Sha256($md5: String!) { 42 | file: fileByHash(input: {md5: $md5}) { 43 | sha256 44 | } 45 | } 46 | """ 47 | ) 48 | variables: Dict[str, object] = {"md5": md_5} 49 | response = self.execute( 50 | query=query, operation_name="Sha256", variables=variables, **kwargs 51 | ) 52 | data = self.get_data(response) 53 | return Sha256.model_validate(data) 54 | 55 | def filename(self, sha_256: str, **kwargs: Any) -> Filename: 56 | query = gql( 57 | """ 58 | query Filename($sha256: String!) { 59 | file: fileByHash(input: {sha256: $sha256}) { 60 | name 61 | } 62 | } 63 | """ 64 | ) 65 | variables: Dict[str, object] = {"sha256": sha_256} 66 | response = self.execute( 67 | query=query, operation_name="Filename", variables=variables, **kwargs 68 | ) 69 | data = self.get_data(response) 70 | return Filename.model_validate(data) 71 | 72 | def mime_type(self, sha_256: str, **kwargs: Any) -> MIMEType: 73 | query = gql( 74 | """ 75 | query MIMEType($sha256: String!) { 76 | file: fileByHash(input: {sha256: $sha256}) { 77 | mimeType 78 | } 79 | } 80 | """ 81 | ) 82 | variables: Dict[str, object] = {"sha256": sha_256} 83 | response = self.execute( 84 | query=query, operation_name="MIMEType", variables=variables, **kwargs 85 | ) 86 | data = self.get_data(response) 87 | return MIMEType.model_validate(data) 88 | 89 | def file_size(self, sha_256: str, **kwargs: Any) -> FileSize: 90 | query = gql( 91 | """ 92 | query FileSize($sha256: String!) { 93 | file: fileByHash(input: {sha256: $sha256}) { 94 | size 95 | } 96 | } 97 | """ 98 | ) 99 | variables: Dict[str, object] = {"sha256": sha_256} 100 | response = self.execute( 101 | query=query, operation_name="FileSize", variables=variables, **kwargs 102 | ) 103 | data = self.get_data(response) 104 | return FileSize.model_validate(data) 105 | 106 | def cve_name(self, sha_256: str, **kwargs: Any) -> CVEName: 107 | query = gql( 108 | """ 109 | query CVEName($sha256: String!) { 110 | file: fileByHash(input: {sha256: $sha256}) { 111 | scainfo { 112 | cves { 113 | name 114 | } 115 | } 116 | } 117 | } 118 | """ 119 | ) 120 | variables: Dict[str, object] = {"sha256": sha_256} 121 | response = self.execute( 122 | query=query, operation_name="CVEName", variables=variables, **kwargs 123 | ) 124 | data = self.get_data(response) 125 | return CVEName.model_validate(data) 126 | 127 | def license_short_name(self, sha_256: str, **kwargs: Any) -> LicenseShortName: 128 | query = gql( 129 | """ 130 | query LicenseShortName($sha256: String!) { 131 | file: fileByHash(input: {sha256: $sha256}) { 132 | scainfo { 133 | license 134 | } 135 | } 136 | } 137 | """ 138 | ) 139 | variables: Dict[str, object] = {"sha256": sha_256} 140 | response = self.execute( 141 | query=query, 142 | operation_name="LicenseShortName", 143 | variables=variables, 144 | **kwargs 145 | ) 146 | data = self.get_data(response) 147 | return LicenseShortName.model_validate(data) 148 | 149 | def license(self, sha_256: str, **kwargs: Any) -> License: 150 | query = gql( 151 | """ 152 | query License($sha256: String!) { 153 | file: fileByHash(input: {sha256: $sha256}) { 154 | scainfo { 155 | licenselist { 156 | checkreason 157 | content 158 | extra 159 | fullName 160 | pass 161 | risk 162 | shortName 163 | source 164 | url 165 | tags { 166 | permission { 167 | tagName 168 | description 169 | } 170 | condition { 171 | tagName 172 | description 173 | } 174 | forbidden { 175 | tagName 176 | description 177 | } 178 | } 179 | } 180 | } 181 | } 182 | } 183 | """ 184 | ) 185 | variables: Dict[str, object] = {"sha256": sha_256} 186 | response = self.execute( 187 | query=query, operation_name="License", variables=variables, **kwargs 188 | ) 189 | data = self.get_data(response) 190 | return License.model_validate(data) 191 | 192 | def ascii_string(self, sha_256: str, **kwargs: Any) -> ASCIIString: 193 | query = gql( 194 | """ 195 | query ASCIIString($sha256: String!) { 196 | file: fileByHash(input: {sha256: $sha256}) { 197 | executable { 198 | __typename 199 | ... on COFFInfo { 200 | asciiStrings 201 | } 202 | ... on ELFInfo { 203 | asciiStrings 204 | } 205 | ... on MachoInfo { 206 | asciiStrings 207 | } 208 | ... on PEInfo { 209 | asciiStrings 210 | } 211 | } 212 | } 213 | } 214 | """ 215 | ) 216 | variables: Dict[str, object] = {"sha256": sha_256} 217 | response = self.execute( 218 | query=query, operation_name="ASCIIString", variables=variables, **kwargs 219 | ) 220 | data = self.get_data(response) 221 | return ASCIIString.model_validate(data) 222 | 223 | def sca(self, sha_256: str, **kwargs: Any) -> SCA: 224 | query = gql( 225 | """ 226 | query SCA($sha256: String!) { 227 | file: fileByHash(input: {sha256: $sha256}) { 228 | scainfo { 229 | name 230 | version 231 | description 232 | sourceCodeURL 233 | summary 234 | } 235 | } 236 | } 237 | """ 238 | ) 239 | variables: Dict[str, object] = {"sha256": sha_256} 240 | response = self.execute( 241 | query=query, operation_name="SCA", variables=variables, **kwargs 242 | ) 243 | data = self.get_data(response) 244 | return SCA.model_validate(data) 245 | 246 | def overview(self, sha_256: str, **kwargs: Any) -> Overview: 247 | query = gql( 248 | """ 249 | query Overview($sha256: String!) { 250 | file: fileByHash(input: {sha256: $sha256}) { 251 | decompileResult { 252 | basicInfo { 253 | fileType 254 | machine 255 | platform 256 | endian 257 | loader 258 | entryPoint 259 | baseAddress 260 | } 261 | } 262 | } 263 | } 264 | """ 265 | ) 266 | variables: Dict[str, object] = {"sha256": sha_256} 267 | response = self.execute( 268 | query=query, operation_name="Overview", variables=variables, **kwargs 269 | ) 270 | data = self.get_data(response) 271 | return Overview.model_validate(data) 272 | 273 | def download_link(self, sha_256: str, **kwargs: Any) -> DownloadLink: 274 | query = gql( 275 | """ 276 | query DownloadLink($sha256: String!) { 277 | file: fileByHash(input: {sha256: $sha256}) { 278 | downloadLink 279 | } 280 | } 281 | """ 282 | ) 283 | variables: Dict[str, object] = {"sha256": sha_256} 284 | response = self.execute( 285 | query=query, operation_name="DownloadLink", variables=variables, **kwargs 286 | ) 287 | data = self.get_data(response) 288 | return DownloadLink.model_validate(data) 289 | 290 | def check_state(self, sha_256: str, **kwargs: Any) -> CheckState: 291 | query = gql( 292 | """ 293 | query CheckState($sha256: String!) { 294 | file: fileByHash(input: {sha256: $sha256}) { 295 | smartBinaryStatus: analyzeStatus(analyzer: SmartBinary) { 296 | status 297 | } 298 | smartBeatStatus: analyzeStatus(analyzer: SmartBeat) { 299 | status 300 | } 301 | text { 302 | content 303 | } 304 | decompileResult { 305 | basicInfo { 306 | fileType 307 | } 308 | } 309 | } 310 | } 311 | """ 312 | ) 313 | variables: Dict[str, object] = {"sha256": sha_256} 314 | response = self.execute( 315 | query=query, operation_name="CheckState", variables=variables, **kwargs 316 | ) 317 | data = self.get_data(response) 318 | return CheckState.model_validate(data) 319 | 320 | def function_list(self, sha_256: str, **kwargs: Any) -> FunctionList: 321 | query = gql( 322 | """ 323 | query FunctionList($sha256: String!) { 324 | file: fileByHash(input: {sha256: $sha256}) { 325 | decompileResult { 326 | functions { 327 | offset 328 | } 329 | } 330 | } 331 | } 332 | """ 333 | ) 334 | variables: Dict[str, object] = {"sha256": sha_256} 335 | response = self.execute( 336 | query=query, operation_name="FunctionList", variables=variables, **kwargs 337 | ) 338 | data = self.get_data(response) 339 | return FunctionList.model_validate(data) 340 | 341 | def function_info( 342 | self, sha_256: str, offset: Any, with_embedding: bool, **kwargs: Any 343 | ) -> FunctionInfo: 344 | query = gql( 345 | """ 346 | query FunctionInfo($sha256: String!, $offset: BigInt!, $withEmbedding: Boolean!) { 347 | file: fileByHash(input: {sha256: $sha256}) { 348 | decompileResult { 349 | function(offset: $offset) { 350 | offset 351 | name 352 | embedding @include(if: $withEmbedding) { 353 | vector 354 | version 355 | } 356 | pseudoCode { 357 | code 358 | } 359 | } 360 | } 361 | } 362 | } 363 | """ 364 | ) 365 | variables: Dict[str, object] = { 366 | "sha256": sha_256, 367 | "offset": offset, 368 | "withEmbedding": with_embedding, 369 | } 370 | response = self.execute( 371 | query=query, operation_name="FunctionInfo", variables=variables, **kwargs 372 | ) 373 | data = self.get_data(response) 374 | return FunctionInfo.model_validate(data) 375 | 376 | def functions_info( 377 | self, 378 | sha_256: str, 379 | with_embedding: bool, 380 | offset: Union[Optional[List[Any]], UnsetType] = UNSET, 381 | **kwargs: Any 382 | ) -> FunctionsInfo: 383 | query = gql( 384 | """ 385 | query FunctionsInfo($sha256: String!, $offset: [BigInt!], $withEmbedding: Boolean!) { 386 | file: fileByHash(input: {sha256: $sha256}) { 387 | decompileResult { 388 | functions(offset: $offset) { 389 | offset 390 | name 391 | embedding @include(if: $withEmbedding) { 392 | vector 393 | version 394 | } 395 | pseudoCode { 396 | code 397 | } 398 | } 399 | } 400 | } 401 | } 402 | """ 403 | ) 404 | variables: Dict[str, object] = { 405 | "sha256": sha_256, 406 | "offset": offset, 407 | "withEmbedding": with_embedding, 408 | } 409 | response = self.execute( 410 | query=query, operation_name="FunctionsInfo", variables=variables, **kwargs 411 | ) 412 | data = self.get_data(response) 413 | return FunctionsInfo.model_validate(data) 414 | 415 | def function_match(self, sha_256: str, offset: Any, **kwargs: Any) -> FunctionMatch: 416 | query = gql( 417 | """ 418 | query FunctionMatch($sha256: String!, $offset: BigInt!) { 419 | file: fileByHash(input: {sha256: $sha256}) { 420 | decompileResult { 421 | function(offset: $offset) { 422 | match(topK: 10) { 423 | score 424 | function { 425 | code 426 | } 427 | } 428 | } 429 | } 430 | } 431 | } 432 | """ 433 | ) 434 | variables: Dict[str, object] = {"sha256": sha_256, "offset": offset} 435 | response = self.execute( 436 | query=query, operation_name="FunctionMatch", variables=variables, **kwargs 437 | ) 438 | data = self.get_data(response) 439 | return FunctionMatch.model_validate(data) 440 | 441 | def file_k_hash(self, sha_256: str, **kwargs: Any) -> FileKHash: 442 | query = gql( 443 | """ 444 | query FileKHash($sha256: String!) { 445 | file: fileByHash(input: {sha256: $sha256}) { 446 | decompileResult { 447 | kHashInfo { 448 | hash { 449 | hash 450 | version 451 | } 452 | } 453 | } 454 | } 455 | } 456 | """ 457 | ) 458 | variables: Dict[str, object] = {"sha256": sha_256} 459 | response = self.execute( 460 | query=query, operation_name="FileKHash", variables=variables, **kwargs 461 | ) 462 | data = self.get_data(response) 463 | return FileKHash.model_validate(data) 464 | 465 | def file_malware_probability( 466 | self, sha_256: str, **kwargs: Any 467 | ) -> FileMalwareProbability: 468 | query = gql( 469 | """ 470 | query FileMalwareProbability($sha256: String!) { 471 | file: fileByHash(input: {sha256: $sha256}) { 472 | decompileResult { 473 | malwareProbability 474 | } 475 | analyzeStatus(analyzer: SmartBeat) { 476 | status 477 | } 478 | } 479 | } 480 | """ 481 | ) 482 | variables: Dict[str, object] = {"sha256": sha_256} 483 | response = self.execute( 484 | query=query, 485 | operation_name="FileMalwareProbability", 486 | variables=variables, 487 | **kwargs 488 | ) 489 | data = self.get_data(response) 490 | return FileMalwareProbability.model_validate(data) 491 | 492 | def compressed_file(self, sha_256: str, **kwargs: Any) -> CompressedFile: 493 | query = gql( 494 | """ 495 | query CompressedFile($sha256: String!) { 496 | file: fileByHash(input: {sha256: $sha256}) { 497 | decompressed { 498 | __typename 499 | ... on CompressedFile { 500 | path 501 | sha256 502 | } 503 | } 504 | } 505 | } 506 | """ 507 | ) 508 | variables: Dict[str, object] = {"sha256": sha_256} 509 | response = self.execute( 510 | query=query, operation_name="CompressedFile", variables=variables, **kwargs 511 | ) 512 | data = self.get_data(response) 513 | return CompressedFile.model_validate(data) 514 | 515 | def reanalyze(self, input: ReanalyzeInput, **kwargs: Any) -> Reanalyze: 516 | query = gql( 517 | """ 518 | mutation Reanalyze($input: ReanalyzeInput!) { 519 | reanalyze(input: $input) { 520 | noopReason 521 | file { 522 | analyzeStatus { 523 | status 524 | } 525 | } 526 | } 527 | } 528 | """ 529 | ) 530 | variables: Dict[str, object] = {"input": input} 531 | response = self.execute( 532 | query=query, operation_name="Reanalyze", variables=variables, **kwargs 533 | ) 534 | data = self.get_data(response) 535 | return Reanalyze.model_validate(data) 536 | 537 | def check_or_upload( 538 | self, input: CreateUploadTicketInput, **kwargs: Any 539 | ) -> CheckOrUpload: 540 | query = gql( 541 | """ 542 | mutation CheckOrUpload($input: CreateUploadTicketInput!) { 543 | createUploadTicket(input: $input) { 544 | __typename 545 | ... on File { 546 | sha256 547 | } 548 | ... on UploadTicket { 549 | ticketID 550 | url 551 | requestHeaders { 552 | key 553 | value 554 | } 555 | } 556 | ... on OwnershipTicket { 557 | ticketID 558 | secretPrepend 559 | secretAppend 560 | } 561 | } 562 | } 563 | """ 564 | ) 565 | variables: Dict[str, object] = {"input": input} 566 | response = self.execute( 567 | query=query, operation_name="CheckOrUpload", variables=variables, **kwargs 568 | ) 569 | data = self.get_data(response) 570 | return CheckOrUpload.model_validate(data) 571 | 572 | def create_file(self, input: CreateFileInput, **kwargs: Any) -> CreateFile: 573 | query = gql( 574 | """ 575 | mutation CreateFile($input: CreateFileInput!) { 576 | createFile(input: $input) { 577 | sha256 578 | md5 579 | name 580 | size 581 | mimeType 582 | } 583 | } 584 | """ 585 | ) 586 | variables: Dict[str, object] = {"input": input} 587 | response = self.execute( 588 | query=query, operation_name="CreateFile", variables=variables, **kwargs 589 | ) 590 | data = self.get_data(response) 591 | return CreateFile.model_validate(data) 592 | --------------------------------------------------------------------------------