├── .codecov.yml ├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ └── add_to_project.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .prettierrc ├── .taskcluster.yml ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── VERSION ├── bugbug ├── __init__.py ├── bug_features.py ├── bug_snapshot.py ├── bugzilla.py ├── code_search │ ├── __init__.py │ ├── function_search.py │ ├── mozilla.py │ ├── parser.py │ ├── searchfox_api.py │ ├── searchfox_data.py │ └── searchfox_download.py ├── commit_features.py ├── db.py ├── feature_cleanup.py ├── generative_model_tool.py ├── github.py ├── issue_features.py ├── issue_snapshot.py ├── labels.py ├── labels │ ├── annotateignore.csv │ ├── bug_nobug.csv │ ├── defect_enhancement_task.csv │ ├── defect_enhancement_task_e.csv │ ├── defect_enhancement_task_h.csv │ ├── defect_enhancement_task_p.csv │ ├── defect_enhancement_task_s.csv │ ├── regression_bug_nobug.csv │ ├── regressionrange.csv │ ├── str.csv │ └── tracking.csv ├── model.py ├── model_calibration.py ├── models │ ├── __init__.py │ ├── accessibility.py │ ├── annotate_ignore.py │ ├── assignee.py │ ├── backout.py │ ├── browsername.py │ ├── bugtype.py │ ├── component.py │ ├── defect.py │ ├── defect_enhancement_task.py │ ├── devdocneeded.py │ ├── fenixcomponent.py │ ├── fixtime.py │ ├── invalid_compatibility_report.py │ ├── needsdiagnosis.py │ ├── performancebug.py │ ├── qaneeded.py │ ├── rcatype.py │ ├── regression.py │ ├── regressionrange.py │ ├── regressor.py │ ├── spambug.py │ ├── stepstoreproduce.py │ ├── testfailure.py │ ├── testselect.py │ ├── tracking.py │ ├── uplift.py │ └── worksforme.py ├── nlp.py ├── phabricator.py ├── repository.py ├── rust_code_analysis_server.py ├── swarm.py ├── test_scheduling.py ├── test_scheduling_features.py ├── tools │ ├── __init__.py │ ├── code_review.py │ └── comment_resolver.py ├── utils.py └── vectordb.py ├── docker-compose.yml ├── docs ├── README.md ├── data.md └── models │ └── regressor.md ├── experiments └── review_helper_modify_filtering_step.ipy ├── extra-nlp-requirements.txt ├── extra-nn-requirements.txt ├── functions ├── diff2html │ ├── index.js │ ├── package-lock.json │ └── package.json └── sync-review-comments-db │ ├── database.py │ ├── main.py │ ├── models.py │ └── requirements.txt ├── http_service ├── .dockerignore ├── Dockerfile ├── Dockerfile.bg_worker ├── MANIFEST.in ├── README.md ├── bugbug_http │ ├── __init__.py │ ├── app.py │ ├── boot.py │ ├── download_models.py │ ├── listener.py │ ├── models.py │ ├── readthrough_cache.py │ ├── sentry.py │ ├── templates │ │ └── doc.html │ └── worker.py ├── docker-compose.yml ├── ensure_models.sh ├── requirements.txt ├── setup.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── pytest.ini │ ├── test_bug_classification.py │ ├── test_get_config_specific_groups.py │ ├── test_integration.py │ ├── test_push_schedules.py │ ├── test_readthrough_cache.py │ └── test_schedule_tests.py ├── infra ├── check-pipeline.yml ├── data-pipeline.yml ├── dockerfile.base ├── dockerfile.base-nlp ├── dockerfile.commit_retrieval ├── dockerfile.spawn_pipeline ├── hgrc ├── landings-pipeline.yml ├── mozci_config.toml ├── set_hook_version.py ├── spawn_pipeline.py ├── spawn_pipeline_requirements.txt ├── taskcluster-hook-check-models-start.json ├── taskcluster-hook-classify-patch.json ├── taskcluster-hook-data-pipeline.json ├── taskcluster-hook-landings-risk-report.json ├── taskcluster-hook-test-select.json └── version_check.py ├── pyproject.toml ├── requirements.txt ├── scripts ├── __init__.py ├── analyze_training_metrics.py ├── backout_related_test_regressions.py ├── bug_classifier.py ├── bug_retriever.py ├── check.py ├── check_all_metrics.py ├── code_review_tool_evaluator.py ├── code_review_tool_evaluator_report.py ├── code_review_tool_runner.py ├── comment_level_labeler.py ├── comment_resolver_evaluator.py ├── comment_resolver_runner.py ├── commit_classifier.py ├── commit_retriever.py ├── compatibility_report_classifier.py ├── generate_landings_risk_report.py ├── generate_sheet.py ├── get_type_labels.py ├── get_untriaged.py ├── github_issue_classifier.py ├── github_issue_retriever.py ├── inline_comments_data_collection.py ├── integration_test.sh ├── maintenance_effectiveness_indicator.py ├── microannotate_generator.py ├── past_bugs_by_unit.py ├── redundant_failures.py ├── regressor_finder.py ├── retrieve_training_metrics.py ├── review_comments_retriever.py ├── revision_retriever.py ├── shadow_scheduler_stats.py ├── test_scheduling_history_retriever.py ├── testing_policy_stats.py ├── trainer.py └── trainer_extract_args.py ├── setup.py ├── test-requirements.txt ├── tests ├── conftest.py ├── fixtures │ ├── bug_features │ │ ├── blocked_bugs_number.json │ │ ├── bug_reporter.json │ │ ├── bug_types.json │ │ ├── comment_count.json │ │ ├── comment_length.json │ │ ├── component.json │ │ ├── has_crash_signature.json │ │ ├── has_cve_in_alias.json │ │ ├── has_github_url.json │ │ ├── has_regression_range.json │ │ ├── has_str.json │ │ ├── has_url.json │ │ ├── has_w3c_url.json │ │ ├── is_coverity_issue.json │ │ ├── is_mozillian.json │ │ ├── keywords.json │ │ ├── landings.json │ │ ├── nightly_uplift.json │ │ ├── patches.json │ │ ├── product.json │ │ ├── severity.json │ │ └── whiteboard.json │ ├── bugs.json │ ├── commits.json │ └── github_webcompat_web-bugs_issues.json ├── test_assignee.py ├── test_backout.py ├── test_bug.py ├── test_bug_features.py ├── test_bug_snapshot.py ├── test_bugtype.py ├── test_bugzilla.py ├── test_code_review.py ├── test_commit_features.py ├── test_db.py ├── test_defect.py ├── test_defect_enhancement_task.py ├── test_devdocneeded.py ├── test_feature_cleanup.py ├── test_github.py ├── test_github_issue_retriever.py ├── test_hooks.py ├── test_invalid_compatibility_report.py ├── test_labels.py ├── test_models.py ├── test_needsdiagnosis.py ├── test_performancebug.py ├── test_phabricator.py ├── test_pipelines.py ├── test_qaneeded.py ├── test_rcatype.py ├── test_regression.py ├── test_repository.py ├── test_stepstoreproduce.py ├── test_test_scheduling.py ├── test_test_scheduling_features.py ├── test_testselect.py ├── test_tracking.py ├── test_trainer.py ├── test_uplift.py └── test_utils.py └── ui └── changes ├── .eslintrc.yml ├── package-lock.json ├── package.json ├── snowpack.config.js └── src ├── bug.html ├── bug.js ├── common.js ├── css ├── common.css └── page.css ├── feature.html ├── feature.js ├── index.html ├── index.js ├── release.html ├── release.js ├── team.html ├── team.js ├── testing.html └── testing.js /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | only_pulls: true 7 | patch: 8 | default: 9 | only_pulls: true 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # CI 6 | .codeclimate.yml 7 | .travis.yml 8 | 9 | # Docker 10 | docker-compose.yml 11 | .docker 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | */__pycache__/ 16 | */*/__pycache__/ 17 | */*/*/__pycache__/ 18 | *.py[cod] 19 | */*.py[cod] 20 | */*/*.py[cod] 21 | */*/*/*.py[cod] 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | env/ 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Virtual environment 75 | .env/ 76 | .venv/ 77 | venv/ 78 | 79 | # PyCharm 80 | .idea 81 | 82 | # Python mode for VIM 83 | .ropeproject 84 | */.ropeproject 85 | */*/.ropeproject 86 | */*/*/.ropeproject 87 | 88 | # Vim swap files 89 | *.swp 90 | */*.swp 91 | */*/*.swp 92 | */*/*/*.swp 93 | 94 | # Pytest files 95 | **/.pytest_cache/ 96 | 97 | # Project-specific stuff 98 | cache/ 99 | data/ 100 | http_service/ 101 | .taskcluster.yml 102 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: docker 4 | directory: "/http_service" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 99 8 | - package-ecosystem: docker 9 | directory: "/infra" 10 | schedule: 11 | interval: weekly 12 | open-pull-requests-limit: 99 13 | - package-ecosystem: pip 14 | directory: "/" 15 | schedule: 16 | interval: weekly 17 | open-pull-requests-limit: 99 18 | allow: 19 | - dependency-type: direct 20 | - dependency-type: indirect 21 | - package-ecosystem: npm 22 | directory: "/ui/changes" 23 | schedule: 24 | interval: weekly 25 | open-pull-requests-limit: 99 26 | -------------------------------------------------------------------------------- /.github/workflows/add_to_project.yaml: -------------------------------------------------------------------------------- 1 | name: Add new issues to the team project 2 | 3 | on: 4 | issues: 5 | types: 6 | - opened 7 | 8 | jobs: 9 | add-to-project: 10 | name: Add issue to project 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/add-to-project@v0.5.0 14 | with: 15 | project-url: https://github.com/orgs/mozilla/projects/214 16 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *model 2 | *model.zst 3 | *model.zst.etag 4 | *data_X 5 | *data_y 6 | *data_X.zst 7 | *data_X.zst.etag 8 | *data_y.zst 9 | *data_y.zst.etag 10 | feature_importance.png 11 | importance.html 12 | importances.json 13 | metrics.json 14 | probs.json 15 | http_service/models/*model* 16 | 17 | data/ 18 | sheets/ 19 | 20 | .mypy_cache/ 21 | .pytest_cache/ 22 | *.pyc 23 | .coverage 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | cache/ 46 | node_modules/ 47 | 48 | # Logs 49 | *.log 50 | # Desktop Service Store 51 | *.DS_Store 52 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-prettier 3 | rev: v4.0.0-alpha.8 4 | hooks: 5 | - id: prettier 6 | exclude: ^tests/fixtures/ 7 | - repo: https://github.com/astral-sh/ruff-pre-commit 8 | rev: v0.11.5 9 | hooks: 10 | - id: ruff 11 | args: [--fix] 12 | - id: ruff-format 13 | - repo: https://github.com/pycqa/pydocstyle 14 | rev: 6.3.0 15 | hooks: 16 | - id: pydocstyle 17 | exclude: ^http_service/ 18 | args: 19 | - --convention=google 20 | # Ignoring warnings about missing docstrings. 21 | - --add-ignore=D100,D101,D102,D103,D104,D105,D107 22 | - repo: https://github.com/pre-commit/pre-commit-hooks 23 | rev: v5.0.0 24 | hooks: 25 | - id: check-ast 26 | - id: check-docstring-first 27 | - id: check-executables-have-shebangs 28 | - id: check-merge-conflict 29 | - id: check-symlinks 30 | - id: debug-statements 31 | - id: trailing-whitespace 32 | exclude: ^tests/test_repository.py 33 | - id: check-yaml 34 | - id: mixed-line-ending 35 | - id: name-tests-test 36 | args: ["--django"] 37 | - id: check-json 38 | exclude: ^tests/fixtures/ 39 | - id: requirements-txt-fixer 40 | - id: check-vcs-permalinks 41 | - repo: https://github.com/codespell-project/codespell 42 | rev: v2.4.1 43 | hooks: 44 | - id: codespell 45 | exclude_types: [json] 46 | - repo: https://github.com/marco-c/taskcluster_yml_validator 47 | rev: v0.0.12 48 | hooks: 49 | - id: taskcluster_yml 50 | - repo: https://github.com/asottile/yesqa 51 | rev: v1.5.0 52 | hooks: 53 | - id: yesqa 54 | - repo: https://github.com/pre-commit/mirrors-mypy 55 | rev: v1.15.0 56 | hooks: 57 | - id: mypy 58 | name: mypy-bugbug 59 | files: ^bugbug/|^scripts/|^tests/ 60 | entry: mypy bugbug/ scripts/ tests/ 61 | pass_filenames: false 62 | additional_dependencies: 63 | - types-pkg_resources==0.1.2 64 | - types-python-dateutil==0.1.3 65 | - types-PyYAML==5.4.0 66 | - types-orjson==0.1.0 67 | - types-tabulate==0.9.0.20240106 68 | - types-requests==0.1.11 69 | - id: mypy 70 | name: mypy-bugbug-http 71 | files: ^http_service/ 72 | entry: mypy http_service/ 73 | pass_filenames: false 74 | additional_dependencies: 75 | - types-pkg_resources==0.1.2 76 | - types-requests==0.1.11 77 | - types-Flask==1.1.0 78 | - types-redis==3.5.1 79 | - types-python-dateutil==0.1.3 80 | - types-orjson==0.1.0 81 | - types-tabulate==0.9.0.20240106 82 | - repo: meta 83 | hooks: 84 | - id: check-useless-excludes 85 | default_language_version: 86 | python: python3.12 87 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "endOfLine": "lf", 3 | "printWidth": 80, 4 | "tabWidth": 2, 5 | "trailingComma": "es5" 6 | } 7 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | type: software 4 | authors: 5 | - family-names: Castelluccio 6 | given-names: Marco 7 | orcid: https://orcid.org/0000-0002-3285-5121 8 | affiliation: Mozilla 9 | title: bugbug 10 | doi: 10.5281/zenodo.4911345 11 | identifiers: 12 | - type: doi 13 | value: 10.5281/zenodo.4911345 14 | repository-code: https://github.com/mozilla/bugbug 15 | license: MPL-2.0 16 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Community Participation Guidelines 2 | 3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines. 4 | For more details, please read the 5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). 6 | 7 | ## How to Report 8 | 9 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page. 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | Chat with us in the [bugbug](https://chat.mozilla.org/#/room/#bugbug:mozilla.org) Matrix room. 4 | 5 | 1. [Issues marked as `good-first-bug`](https://github.com/mozilla/bugbug/labels/good-first-bug) are self-contained enough that a contributor should be able to work on them. 6 | 2. Issues are considered not assigned, until there is a PR linked to them. Feel free to work on any unassigned issue, you don't need to ask first. 7 | 3. If you have any problem, it could be already answered in [Discussions](https://github.com/mozilla/bugbug/discussions), if not, feel free to start a new discussion in the [Q&A](https://github.com/mozilla/bugbug/discussions/categories/q-a) category. 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include VERSION 2 | include requirements.txt 3 | include extra-nlp-requirements.txt 4 | include extra-nn-requirements.txt 5 | recursive-include bugbug/labels * 6 | 7 | recursive-exclude * __pycache__ 8 | recursive-exclude * *.py[co] 9 | recursive-exclude tests * 10 | recursive-exclude data * 11 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.0.578 2 | -------------------------------------------------------------------------------- /bugbug/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import importlib.metadata 4 | import logging 5 | 6 | logging.basicConfig( 7 | level=logging.INFO, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s" 8 | ) 9 | 10 | 11 | def get_bugbug_version(): 12 | return importlib.metadata.version("bugbug") 13 | -------------------------------------------------------------------------------- /bugbug/code_search/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | -------------------------------------------------------------------------------- /bugbug/code_search/function_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from abc import ABC, abstractmethod 7 | from dataclasses import dataclass 8 | 9 | 10 | @dataclass 11 | class Function: 12 | name: str 13 | start: int 14 | file: str 15 | source: str 16 | 17 | 18 | class FunctionSearch(ABC): 19 | @abstractmethod 20 | def get_function_by_line( 21 | self, commit_hash: str, path: str, line: int 22 | ) -> list[Function]: 23 | raise NotImplementedError 24 | 25 | @abstractmethod 26 | def get_function_by_name( 27 | self, commit_hash: str, path: str, function_name: str 28 | ) -> list[Function]: 29 | raise NotImplementedError 30 | 31 | 32 | function_search_classes = {} 33 | 34 | 35 | def register_function_search(name, cls): 36 | function_search_classes[name] = cls 37 | -------------------------------------------------------------------------------- /bugbug/issue_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import sys 7 | 8 | import pandas as pd 9 | from sklearn.base import BaseEstimator, TransformerMixin 10 | 11 | from bugbug import issue_snapshot 12 | 13 | 14 | class CommentCount(object): 15 | name = "# of comments" 16 | 17 | def __call__(self, issue, **kwargs): 18 | return issue["comments"] 19 | 20 | 21 | class IssueExtractor(BaseEstimator, TransformerMixin): 22 | def __init__( 23 | self, 24 | feature_extractors, 25 | cleanup_functions, 26 | rollback=False, 27 | rollback_when=None, 28 | ): 29 | assert len(set(type(fe) for fe in feature_extractors)) == len( 30 | feature_extractors 31 | ), "Duplicate Feature Extractors" 32 | self.feature_extractors = feature_extractors 33 | 34 | assert len(set(type(cf) for cf in cleanup_functions)) == len( 35 | cleanup_functions 36 | ), "Duplicate Cleanup Functions" 37 | self.cleanup_functions = cleanup_functions 38 | self.rollback = rollback 39 | self.rollback_when = rollback_when 40 | 41 | def fit(self, x, y=None): 42 | for feature in self.feature_extractors: 43 | if hasattr(feature, "fit"): 44 | feature.fit(x()) 45 | 46 | return self 47 | 48 | def transform(self, issues): 49 | results = [] 50 | 51 | for issue in issues(): 52 | if self.rollback: 53 | issue = issue_snapshot.rollback(issue, self.rollback_when) 54 | 55 | data = {} 56 | 57 | for feature_extractor in self.feature_extractors: 58 | res = feature_extractor(issue) 59 | 60 | if hasattr(feature_extractor, "name"): 61 | feature_extractor_name = feature_extractor.name 62 | else: 63 | feature_extractor_name = feature_extractor.__class__.__name__ 64 | 65 | if res is None: 66 | continue 67 | 68 | if isinstance(res, (list, set)): 69 | for item in res: 70 | data[sys.intern(f"{item} in {feature_extractor_name}")] = True 71 | continue 72 | 73 | data[feature_extractor_name] = res 74 | 75 | title = issue["title"] 76 | body = issue["body"] 77 | for cleanup_function in self.cleanup_functions: 78 | title = cleanup_function(title) 79 | body = cleanup_function(body) 80 | 81 | results.append( 82 | { 83 | "data": data, 84 | "title": title, 85 | "first_comment": body, 86 | } 87 | ) 88 | 89 | return pd.DataFrame(results) 90 | -------------------------------------------------------------------------------- /bugbug/issue_snapshot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | def rollback(issue, when=None): 8 | assert when is None, "Rollback to a specific point in history is not supported yet." 9 | 10 | if issue["events"]: 11 | for event in issue["events"]: 12 | # Extract original title that issue got at the moment of creation 13 | if ( 14 | event["event"] == "renamed" 15 | and event["rename"]["from"] != "In the moderation queue." 16 | and event["rename"]["from"] != "Issue closed." 17 | ): 18 | issue["title"] = event["rename"]["from"] 19 | 20 | return issue 21 | -------------------------------------------------------------------------------- /bugbug/labels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import csv 7 | import os 8 | import sys 9 | 10 | 11 | def get_labels_dir(): 12 | return os.path.join(os.path.dirname(sys.modules[__package__].__file__), "labels") 13 | 14 | 15 | def get_labels(file_name): 16 | path = os.path.join(get_labels_dir(), f"{file_name}.csv") 17 | 18 | with open(path, "r") as f: 19 | reader = csv.reader(f) 20 | next(reader) 21 | yield from reader 22 | 23 | 24 | def get_all_bug_ids(): 25 | bug_ids = set() 26 | 27 | labels_dir = get_labels_dir() 28 | for csv_file in os.listdir(labels_dir): 29 | with open(os.path.join(labels_dir, csv_file)) as f: 30 | reader = csv.DictReader(f) 31 | if "bug_id" not in reader.fieldnames: 32 | continue 33 | 34 | bug_ids.update(int(row["bug_id"]) for row in reader) 35 | 36 | return list(bug_ids) 37 | -------------------------------------------------------------------------------- /bugbug/labels/tracking.csv: -------------------------------------------------------------------------------- 1 | bug_id,tracking 2 | 1521010,False 3 | 1521022,False 4 | 1521034,False 5 | 1521037,False 6 | 1521039,False 7 | 1521071,False 8 | 1521080,False 9 | 1521082,False 10 | 1521085,False 11 | 1521088,False 12 | 1521095,False 13 | 1521156,False 14 | 1521158,False 15 | 1521169,False 16 | 1521205,False 17 | 1521221,False 18 | 1521249,False 19 | 1521308,False 20 | 1521336,False 21 | 1521372,False 22 | 1521473,False 23 | 1521498,False 24 | 1521568,True 25 | 1521577,False 26 | 1521579,True 27 | 1521583,False 28 | 1521591,False 29 | 1521597,False 30 | 1521630,True 31 | 1521989,False 32 | 1521991,False 33 | 1521992,False 34 | 1521993,False 35 | 1521994,False 36 | 1521995,False 37 | 1521999,False 38 | 1522000,False 39 | 1522002,False 40 | 1522007,False 41 | 1522008,False 42 | 1522010,False 43 | 1522012,False 44 | 1522017,False 45 | 1522018,False 46 | 1522019,True 47 | 1522023,False 48 | 1522029,False 49 | 1522061,False 50 | 1522077,False 51 | 1522083,False 52 | 1522109,False 53 | 1522118,False 54 | 1522122,False 55 | 1522125,False 56 | 1522127,False 57 | 1522129,False 58 | 1522130,False 59 | 1522134,False 60 | 1522136,False 61 | 1522138,False 62 | 1522139,False 63 | 1522173,False 64 | 1522181,False 65 | 1522182,False 66 | 1522186,False 67 | 1522187,False 68 | 1522188,False 69 | 1522189,False 70 | 1522190,False 71 | 1522191,False 72 | 1522194,False 73 | 1522195,False 74 | 1522201,False 75 | 1522202,False 76 | 1522203,False 77 | 1522204,False 78 | 1522205,False 79 | 1522207,False 80 | 1522208,False 81 | 1522210,False 82 | 1522237,False 83 | 1522242,False 84 | 1522249,False 85 | 1522254,False 86 | 1522259,False 87 | 1522268,False 88 | 1522276,False 89 | 1522277,False 90 | 1522279,False 91 | 1522280,False 92 | 1522294,False 93 | 1522298,False 94 | 1522300,False 95 | 1522302,False 96 | 1522314,False 97 | 1522315,True 98 | -------------------------------------------------------------------------------- /bugbug/model_calibration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from sklearn.base import BaseEstimator, ClassifierMixin 7 | from sklearn.calibration import CalibratedClassifierCV 8 | from sklearn.model_selection import train_test_split 9 | 10 | 11 | class IsotonicRegressionCalibrator(BaseEstimator, ClassifierMixin): 12 | def __init__(self, base_clf): 13 | self.base_clf = base_clf 14 | self.calibrated_clf = CalibratedClassifierCV( 15 | base_clf, cv="prefit", method="isotonic" 16 | ) 17 | 18 | def fit(self, X_train, y_train): 19 | X_train, X_val, y_train, y_val = train_test_split( 20 | X_train, y_train, test_size=0.2, random_state=42 21 | ) 22 | self.base_clf.fit(X_train, y_train) 23 | self.calibrated_clf.fit(X_val, y_val) 24 | 25 | def predict(self, X): 26 | return self.calibrated_clf.predict(X) 27 | 28 | def predict_proba(self, X): 29 | return self.calibrated_clf.predict_proba(X) 30 | 31 | @property 32 | def n_features_in_(self): 33 | return self.base_clf.n_features_in_ 34 | -------------------------------------------------------------------------------- /bugbug/models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import importlib 3 | import logging 4 | from typing import Type 5 | 6 | from bugbug.model import Model 7 | 8 | LOGGER = logging.getLogger() 9 | 10 | 11 | MODELS = { 12 | "accessibility": "bugbug.models.accessibility.AccessibilityModel", 13 | "annotateignore": "bugbug.models.annotate_ignore.AnnotateIgnoreModel", 14 | "assignee": "bugbug.models.assignee.AssigneeModel", 15 | "backout": "bugbug.models.backout.BackoutModel", 16 | "browsername": "bugbug.models.browsername.BrowserNameModel", 17 | "bugtype": "bugbug.models.bugtype.BugTypeModel", 18 | "component": "bugbug.models.component.ComponentModel", 19 | "defect": "bugbug.models.defect.DefectModel", 20 | "defectenhancementtask": "bugbug.models.defect_enhancement_task.DefectEnhancementTaskModel", 21 | "devdocneeded": "bugbug.models.devdocneeded.DevDocNeededModel", 22 | "fixtime": "bugbug.models.fixtime.FixTimeModel", 23 | "invalidcompatibilityreport": "bugbug.models.invalid_compatibility_report.InvalidCompatibilityReportModel", 24 | "needsdiagnosis": "bugbug.models.needsdiagnosis.NeedsDiagnosisModel", 25 | "performancebug": "bugbug.models.performancebug.PerformanceBugModel", 26 | "qaneeded": "bugbug.models.qaneeded.QANeededModel", 27 | "rcatype": "bugbug.models.rcatype.RCATypeModel", 28 | "regression": "bugbug.models.regression.RegressionModel", 29 | "regressionrange": "bugbug.models.regressionrange.RegressionRangeModel", 30 | "regressor": "bugbug.models.regressor.RegressorModel", 31 | "spambug": "bugbug.models.spambug.SpamBugModel", 32 | "stepstoreproduce": "bugbug.models.stepstoreproduce.StepsToReproduceModel", 33 | "testlabelselect": "bugbug.models.testselect.TestLabelSelectModel", 34 | "testgroupselect": "bugbug.models.testselect.TestGroupSelectModel", 35 | "testconfiggroupselect": "bugbug.models.testselect.TestConfigGroupSelectModel", 36 | "testfailure": "bugbug.models.testfailure.TestFailureModel", 37 | "tracking": "bugbug.models.tracking.TrackingModel", 38 | "uplift": "bugbug.models.uplift.UpliftModel", 39 | "worksforme": "bugbug.models.worksforme.WorksForMeModel", 40 | "fenixcomponent": "bugbug.models.fenixcomponent.FenixComponentModel", 41 | } 42 | 43 | 44 | def get_model_class(model_name: str) -> Type[Model]: 45 | if model_name not in MODELS: 46 | err_msg = f"Invalid name {model_name}, not in {list(MODELS.keys())}" 47 | raise ValueError(err_msg) 48 | 49 | full_qualified_class_name = MODELS[model_name] 50 | module_name, class_name = full_qualified_class_name.rsplit(".", 1) 51 | 52 | module = importlib.import_module(module_name) 53 | 54 | return getattr(module, class_name) 55 | -------------------------------------------------------------------------------- /bugbug/models/browsername.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | 8 | import xgboost 9 | from sklearn.compose import ColumnTransformer 10 | from sklearn.feature_extraction import DictVectorizer 11 | from sklearn.pipeline import Pipeline 12 | 13 | from bugbug import feature_cleanup, github, issue_features, utils 14 | from bugbug.model import IssueModel 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class BrowserNameModel(IssueModel): 20 | def __init__(self, lemmatization=False): 21 | IssueModel.__init__(self, lemmatization) 22 | 23 | feature_extractors = [ 24 | issue_features.CommentCount(), 25 | ] 26 | 27 | cleanup_functions = [ 28 | feature_cleanup.fileref(), 29 | feature_cleanup.url(), 30 | feature_cleanup.synonyms(), 31 | ] 32 | 33 | self.extraction_pipeline = Pipeline( 34 | [ 35 | ( 36 | "issue_extractor", 37 | issue_features.IssueExtractor( 38 | feature_extractors, cleanup_functions 39 | ), 40 | ), 41 | ] 42 | ) 43 | 44 | self.clf = Pipeline( 45 | [ 46 | ( 47 | "union", 48 | ColumnTransformer( 49 | [ 50 | ("data", DictVectorizer(), "data"), 51 | ("title", self.text_vectorizer(min_df=0.0001), "title"), 52 | ( 53 | "first_comment", 54 | self.text_vectorizer(min_df=0.0001), 55 | "first_comment", 56 | ), 57 | ] 58 | ), 59 | ), 60 | ( 61 | "estimator", 62 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()), 63 | ), 64 | ] 65 | ) 66 | 67 | def get_labels(self): 68 | classes = {} 69 | 70 | for issue in github.get_issues(): 71 | for label in issue["labels"]: 72 | if label["name"] == "browser-firefox": 73 | classes[issue["number"]] = 1 74 | 75 | if issue["number"] not in classes: 76 | classes[issue["number"]] = 0 77 | 78 | logger.info( 79 | "%d issues belong to Firefox", 80 | sum(label == 1 for label in classes.values()), 81 | ) 82 | logger.info( 83 | "%d issues do not belong to Firefox", 84 | sum(label == 0 for label in classes.values()), 85 | ) 86 | 87 | return classes, [0, 1] 88 | 89 | def get_feature_names(self): 90 | return self.clf.named_steps["union"].get_feature_names_out() 91 | -------------------------------------------------------------------------------- /bugbug/models/defect_enhancement_task.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | from typing import Any 8 | 9 | from bugbug.models.defect import DefectModel 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class DefectEnhancementTaskModel(DefectModel): 16 | def __init__(self, lemmatization=False, historical=False): 17 | DefectModel.__init__(self, lemmatization, historical) 18 | 19 | self.calculate_importance = False 20 | 21 | def get_labels(self) -> tuple[dict[int, Any], list[Any]]: 22 | classes = self.get_bugbug_labels("defect_enhancement_task") 23 | 24 | logger.info("%d defects", sum(label == "defect" for label in classes.values())) 25 | logger.info( 26 | "%d enhancements", 27 | sum(label == "enhancement" for label in classes.values()), 28 | ) 29 | logger.info("%d tasks", sum(label == "task" for label in classes.values())) 30 | 31 | return classes, ["defect", "enhancement", "task"] 32 | 33 | def overwrite_classes(self, bugs, classes, probabilities): 34 | for i, bug in enumerate(bugs): 35 | if ( 36 | any( 37 | keyword in bug["keywords"] 38 | for keyword in ["regression", "talos-regression"] 39 | ) 40 | or ( 41 | "cf_has_regression_range" in bug 42 | and bug["cf_has_regression_range"] == "yes" 43 | ) 44 | or len(bug["regressed_by"]) > 0 45 | ): 46 | classes[i] = "defect" if not probabilities else [1.0, 0.0, 0.0] 47 | elif "feature" in bug["keywords"]: 48 | classes[i] = "enhancement" if not probabilities else [0.0, 1.0, 0.0] 49 | 50 | return classes 51 | 52 | def get_extra_data(self): 53 | labels = self.le.inverse_transform([0, 1, 2]) 54 | labels_map = {str(label): index for label, index in zip(labels, [0, 1, 2])} 55 | 56 | return {"labels_map": labels_map} 57 | -------------------------------------------------------------------------------- /bugbug/models/needsdiagnosis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | 8 | import xgboost 9 | from sklearn.compose import ColumnTransformer 10 | from sklearn.pipeline import Pipeline 11 | 12 | from bugbug import feature_cleanup, issue_features, utils 13 | from bugbug.model import IssueModel 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class NeedsDiagnosisModel(IssueModel): 19 | def __init__(self, lemmatization=False): 20 | IssueModel.__init__( 21 | self, owner="webcompat", repo="web-bugs", lemmatization=lemmatization 22 | ) 23 | 24 | self.calculate_importance = False 25 | 26 | feature_extractors = [] 27 | 28 | cleanup_functions = [ 29 | feature_cleanup.fileref(), 30 | feature_cleanup.url(), 31 | feature_cleanup.synonyms(), 32 | ] 33 | 34 | self.extraction_pipeline = Pipeline( 35 | [ 36 | ( 37 | "issue_extractor", 38 | issue_features.IssueExtractor( 39 | feature_extractors, cleanup_functions, rollback=True 40 | ), 41 | ), 42 | ] 43 | ) 44 | 45 | self.clf = Pipeline( 46 | [ 47 | ( 48 | "union", 49 | ColumnTransformer( 50 | [ 51 | ("title", self.text_vectorizer(min_df=0.0001), "title"), 52 | ( 53 | "first_comment", 54 | self.text_vectorizer(min_df=0.0001), 55 | "first_comment", 56 | ), 57 | ] 58 | ), 59 | ), 60 | ( 61 | "estimator", 62 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()), 63 | ), 64 | ] 65 | ) 66 | 67 | def get_labels(self): 68 | classes = {} 69 | 70 | for issue in self.github.get_issues(): 71 | # Skip issues with empty title or body 72 | if issue["title"] is None or issue["body"] is None: 73 | continue 74 | 75 | # Skip issues that are not moderated yet as they don't have a meaningful title or body 76 | if issue["title"] == "In the moderation queue.": 77 | continue 78 | 79 | for event in issue["events"]: 80 | if event["event"] == "milestoned" and ( 81 | event["milestone"]["title"] == "needsdiagnosis" 82 | or event["milestone"]["title"] == "moved" 83 | ): 84 | classes[issue["number"]] = 0 85 | 86 | if issue["number"] not in classes: 87 | classes[issue["number"]] = 1 88 | 89 | logger.info( 90 | "%d issues have not been moved to needsdiagnosis", 91 | sum(label == 1 for label in classes.values()), 92 | ) 93 | logger.info( 94 | "%d issues have been moved to needsdiagnosis", 95 | sum(label == 0 for label in classes.values()), 96 | ) 97 | 98 | return classes, [0, 1] 99 | 100 | def get_feature_names(self): 101 | return self.clf.named_steps["union"].get_feature_names_out() 102 | -------------------------------------------------------------------------------- /bugbug/models/regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | from typing import Any 8 | 9 | from bugbug.models.defect import DefectModel 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class RegressionModel(DefectModel): 16 | def __init__(self, lemmatization=False, historical=False): 17 | DefectModel.__init__(self, lemmatization, historical) 18 | self.calculate_importance = False 19 | 20 | def get_labels(self) -> tuple[dict[int, Any], list[int]]: 21 | classes = self.get_bugbug_labels("regression") 22 | 23 | logger.info("%d regression bugs", sum(label == 1 for label in classes.values())) 24 | logger.info( 25 | "%d non-regression bugs", sum(label == 0 for label in classes.values()) 26 | ) 27 | 28 | return classes, [0, 1] 29 | 30 | def overwrite_classes(self, bugs, classes, probabilities): 31 | for i, bug in enumerate(bugs): 32 | regression_keyword_removed = False 33 | for history in bug["history"]: 34 | for change in history["changes"]: 35 | if change["field_name"] == "keywords": 36 | if "regression" in [ 37 | k.strip() for k in change["removed"].split(",") 38 | ]: 39 | regression_keyword_removed = True 40 | elif "regression" in [ 41 | k.strip() for k in change["added"].split(",") 42 | ]: 43 | regression_keyword_removed = False 44 | 45 | if regression_keyword_removed: 46 | classes[i] = 0 if not probabilities else [1.0, 0.0] 47 | 48 | super().overwrite_classes(bugs, classes, probabilities) 49 | 50 | return classes 51 | -------------------------------------------------------------------------------- /bugbug/models/regressionrange.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | 8 | import xgboost 9 | from imblearn.pipeline import Pipeline as ImblearnPipeline 10 | from imblearn.under_sampling import RandomUnderSampler 11 | from sklearn.compose import ColumnTransformer 12 | from sklearn.feature_extraction import DictVectorizer 13 | from sklearn.pipeline import Pipeline 14 | 15 | from bugbug import bug_features, bugzilla, feature_cleanup, utils 16 | from bugbug.model import BugModel 17 | 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class RegressionRangeModel(BugModel): 23 | def __init__(self, lemmatization=False): 24 | BugModel.__init__(self, lemmatization) 25 | 26 | feature_extractors = [ 27 | bug_features.HasSTR(), 28 | bug_features.Severity(), 29 | bug_features.Keywords({"regression", "regressionwindow-wanted"}), 30 | bug_features.IsCoverityIssue(), 31 | bug_features.HasCrashSignature(), 32 | bug_features.HasURL(), 33 | bug_features.HasW3CURL(), 34 | bug_features.HasGithubURL(), 35 | bug_features.Whiteboard(), 36 | bug_features.Patches(), 37 | bug_features.Landings(), 38 | ] 39 | 40 | cleanup_functions = [ 41 | feature_cleanup.fileref(), 42 | feature_cleanup.url(), 43 | feature_cleanup.synonyms(), 44 | ] 45 | 46 | self.extraction_pipeline = Pipeline( 47 | [ 48 | ( 49 | "bug_extractor", 50 | bug_features.BugExtractor(feature_extractors, cleanup_functions), 51 | ), 52 | ] 53 | ) 54 | 55 | self.clf = ImblearnPipeline( 56 | [ 57 | ( 58 | "union", 59 | ColumnTransformer( 60 | [ 61 | ("data", DictVectorizer(), "data"), 62 | ("title", self.text_vectorizer(), "title"), 63 | ("comments", self.text_vectorizer(), "comments"), 64 | ] 65 | ), 66 | ), 67 | ("sampler", RandomUnderSampler(random_state=0)), 68 | ( 69 | "estimator", 70 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()), 71 | ), 72 | ] 73 | ) 74 | 75 | def get_labels(self): 76 | classes = {} 77 | 78 | for bug_data in bugzilla.get_bugs(): 79 | if "regression" not in bug_data["keywords"]: 80 | continue 81 | 82 | bug_id = int(bug_data["id"]) 83 | if ( 84 | bug_data.get("regressed_by") 85 | or "regressionwindow-wanted" in bug_data["keywords"] 86 | ): 87 | classes[bug_id] = 1 88 | else: 89 | classes[bug_id] = 0 90 | 91 | logger.info( 92 | "%d bugs have regression range", 93 | sum(label == 1 for label in classes.values()), 94 | ) 95 | logger.info( 96 | "%d bugs don't have a regression range", 97 | sum(label == 0 for label in classes.values()), 98 | ) 99 | 100 | return classes, [0, 1] 101 | 102 | def get_feature_names(self): 103 | return self.clf.named_steps["union"].get_feature_names_out() 104 | -------------------------------------------------------------------------------- /bugbug/models/uplift.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import xgboost 7 | from imblearn.pipeline import Pipeline as ImblearnPipeline 8 | from imblearn.under_sampling import RandomUnderSampler 9 | from sklearn.compose import ColumnTransformer 10 | from sklearn.feature_extraction import DictVectorizer 11 | from sklearn.pipeline import Pipeline 12 | 13 | from bugbug import bug_features, bugzilla, feature_cleanup, utils 14 | from bugbug.model import BugModel 15 | 16 | 17 | class UpliftModel(BugModel): 18 | def __init__(self, lemmatization=False): 19 | BugModel.__init__(self, lemmatization) 20 | 21 | feature_extractors = [ 22 | bug_features.HasSTR(), 23 | bug_features.HasRegressionRange(), 24 | bug_features.Severity(), 25 | bug_features.Keywords(), 26 | bug_features.IsCoverityIssue(), 27 | bug_features.HasCrashSignature(), 28 | bug_features.HasURL(), 29 | bug_features.HasW3CURL(), 30 | bug_features.HasGithubURL(), 31 | bug_features.Whiteboard(), 32 | bug_features.Patches(), 33 | bug_features.Landings(), 34 | ] 35 | 36 | cleanup_functions = [ 37 | feature_cleanup.fileref(), 38 | feature_cleanup.url(), 39 | feature_cleanup.synonyms(), 40 | ] 41 | 42 | self.extraction_pipeline = Pipeline( 43 | [ 44 | ( 45 | "bug_extractor", 46 | bug_features.BugExtractor( 47 | feature_extractors, 48 | cleanup_functions, 49 | rollback=True, 50 | rollback_when=self.rollback, 51 | ), 52 | ), 53 | ] 54 | ) 55 | 56 | self.clf = ImblearnPipeline( 57 | [ 58 | ( 59 | "union", 60 | ColumnTransformer( 61 | [ 62 | ("data", DictVectorizer(), "data"), 63 | ("title", self.text_vectorizer(), "title"), 64 | ("comments", self.text_vectorizer(), "comments"), 65 | ] 66 | ), 67 | ), 68 | ("sampler", RandomUnderSampler(random_state=0)), 69 | ( 70 | "estimator", 71 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()), 72 | ), 73 | ] 74 | ) 75 | 76 | def rollback(self, change): 77 | return ( 78 | change["field_name"] == "flagtypes.name" 79 | and change["added"].startswith("approval-mozilla-") 80 | and (change["added"].endswith("+") or change["added"].endswith("-")) 81 | ) 82 | 83 | def get_labels(self): 84 | classes = {} 85 | 86 | for bug_data in bugzilla.get_bugs(): 87 | bug_id = int(bug_data["id"]) 88 | 89 | for attachment in bug_data["attachments"]: 90 | for flag in attachment["flags"]: 91 | if not flag["name"].startswith("approval-mozilla-") or flag[ 92 | "status" 93 | ] not in ["+", "-"]: 94 | continue 95 | 96 | if flag["status"] == "+": 97 | classes[bug_id] = 1 98 | elif flag["status"] == "-": 99 | classes[bug_id] = 0 100 | 101 | return classes, [0, 1] 102 | 103 | def get_feature_names(self): 104 | return self.clf.named_steps["union"].get_feature_names_out() 105 | -------------------------------------------------------------------------------- /bugbug/rust_code_analysis_server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | import subprocess 8 | import time 9 | 10 | import requests 11 | 12 | from bugbug import utils 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | START_RETRIES = 14 18 | HEADERS = {"Content-type": "application/octet-stream"} 19 | 20 | 21 | class RustCodeAnalysisServer: 22 | def __init__(self, thread_num: int | None = None): 23 | for _ in range(START_RETRIES): 24 | self.start_process(thread_num) 25 | 26 | for _ in range(START_RETRIES): 27 | if self.ping(): 28 | logger.info("Rust code analysis server is ready to accept queries") 29 | return 30 | else: 31 | if self.proc.poll() is not None: 32 | break 33 | 34 | time.sleep(0.35) 35 | 36 | self.terminate() 37 | raise RuntimeError("Unable to run rust-code-analysis server") 38 | 39 | @property 40 | def base_url(self): 41 | return f"http://127.0.0.1:{self.port}" 42 | 43 | def start_process(self, thread_num: int | None = None): 44 | self.port = utils.get_free_tcp_port() 45 | 46 | try: 47 | cmd = ["rust-code-analysis-web", "--port", str(self.port)] 48 | if thread_num is not None: 49 | cmd += ["-j", str(thread_num)] 50 | self.proc = subprocess.Popen(cmd) 51 | except FileNotFoundError: 52 | raise RuntimeError("rust-code-analysis is required for code analysis") 53 | 54 | def terminate(self): 55 | if self.proc is not None: 56 | self.proc.terminate() 57 | 58 | def __str__(self): 59 | return f"Server running at {self.base_url}" 60 | 61 | def ping(self): 62 | try: 63 | r = requests.get(f"{self.base_url}/ping") 64 | return r.ok 65 | except requests.exceptions.ConnectionError: 66 | return False 67 | 68 | def metrics(self, filename, code, unit=True): 69 | """Get code metrics for a file. 70 | 71 | Args: 72 | filename: the path for the file that we want to analyze 73 | code: the content of the file 74 | unit: when unit is True, then only metrics for top-level is 75 | returned, when False, then we get detailed metrics for all 76 | classes, functions, nested functions, ... 77 | """ 78 | unit = 1 if unit else 0 79 | url = f"{self.base_url}/metrics?file_name={filename}&unit={unit}" 80 | r = requests.post(url, data=code, headers=HEADERS) 81 | 82 | if not r.ok: 83 | return {} 84 | 85 | return r.json() 86 | -------------------------------------------------------------------------------- /bugbug/swarm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import json 7 | import subprocess 8 | from datetime import datetime 9 | from typing import Collection 10 | 11 | 12 | def api_revinfo(instance, rev_id): 13 | u = f"https://{instance}api/v10/reviews/{rev_id}" 14 | return u 15 | 16 | 17 | def api_filelist_v_fromto(instance, rev_id, v1=0, v2=1): 18 | u = f"https://{instance}api/v10/reviews/{rev_id}/files?from={v1}&to={v2}" 19 | return u 20 | 21 | 22 | def call(auth, g): 23 | command = f'curl -u "{auth["user"]}:{auth["password"]}" "{g}"' 24 | process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) 25 | return process.stdout 26 | 27 | 28 | def p4_connect(auth): 29 | from P4 import P4 # from pip p4python 30 | 31 | p4 = P4() 32 | p4.port = auth["port"] 33 | p4.user = auth["user"] 34 | p4.password = auth["password"] 35 | p4.connect() 36 | p4.run_login() 37 | p4.tagged = False 38 | return p4 39 | 40 | 41 | def get_review(instance, rev_id, version_num, auth): 42 | p4 = p4_connect(auth) 43 | data_rev = {} 44 | g = api_revinfo(instance, rev_id) 45 | message = json.loads(call(auth, g)) 46 | data_rev = message["data"] 47 | 48 | fl = api_filelist_v_fromto(instance, rev_id, v1=version_num[0], v2=version_num[1]) 49 | file_list = json.loads(call(auth, fl)) 50 | for what in file_list["data"]: 51 | data_rev[what] = file_list["data"][what] 52 | 53 | commit_id = data_rev["reviews"][0]["versions"][version_num[1] - 1]["change"] 54 | 55 | diffs = {} 56 | for file in data_rev["files"]: 57 | filename1 = file["fromFile"] if "fromFile" in file else file["depotFile"] 58 | filename2 = file["depotFile"] 59 | commit_id1 = file["diffFrom"] if "diffFrom" in file else f"#{file['rev']}" 60 | commit_id2 = file["diffTo"] if "diffTo" in file else f"@={commit_id}" 61 | 62 | diffs[filename2] = "\n".join( 63 | p4.run( 64 | "diff2", 65 | "-u", 66 | "-du5", 67 | f"{filename1}{commit_id1}", 68 | f"{filename2}{commit_id2}", 69 | ) 70 | ) 71 | 72 | data_rev["diffs"] = diffs 73 | 74 | return data_rev 75 | 76 | 77 | def get( 78 | AUTH, 79 | rev_ids: Collection[int] | None = None, 80 | modified_start: datetime | None = None, 81 | version_l=[0, 1], 82 | ): 83 | data = [] 84 | instance = AUTH["instance"] 85 | if rev_ids is not None: 86 | for r in rev_ids: 87 | loc = get_review(instance, r, version_l, AUTH) 88 | 89 | full_diff = "".join([loc["diffs"][e] for e in loc["diffs"]]) 90 | 91 | data += [ 92 | { 93 | "fields": { 94 | "diffID": int(r), 95 | "version": version_l, 96 | "file_diff": loc["diffs"], 97 | "diff": full_diff, 98 | } 99 | } 100 | ] 101 | 102 | return data 103 | -------------------------------------------------------------------------------- /bugbug/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.2" 2 | services: 3 | bugbug-base: 4 | build: 5 | context: . 6 | dockerfile: infra/dockerfile.base 7 | image: mozilla/bugbug-base 8 | 9 | bugbug-nlp: 10 | build: 11 | context: . 12 | dockerfile: infra/dockerfile.base-nlp 13 | image: mozilla/bugbug-base-nlp 14 | 15 | bugbug-commit-retrieval: 16 | build: 17 | context: . 18 | dockerfile: infra/dockerfile.commit_retrieval 19 | image: mozilla/bugbug-commit-retrieval 20 | volumes: 21 | - type: bind 22 | source: ./cache/ 23 | target: /cache/ 24 | volume: 25 | nocopy: true 26 | 27 | bugbug-http-service: 28 | build: 29 | context: http_service 30 | image: mozilla/bugbug-http-service 31 | environment: 32 | - BUGBUG_BUGZILLA_TOKEN 33 | - BUGBUG_GITHUB_TOKEN 34 | - PORT=8000 35 | ports: 36 | - target: 8000 37 | published: 8000 38 | protocol: tcp 39 | mode: host 40 | 41 | bugbug-http-service-bg-worker: 42 | build: 43 | context: http_service 44 | dockerfile: Dockerfile.bg_worker 45 | image: mozilla/bugbug-http-service-bg-worker 46 | environment: 47 | - BUGBUG_BUGZILLA_TOKEN 48 | - BUGBUG_GITHUB_TOKEN 49 | 50 | bugbug-spawn-pipeline: 51 | build: 52 | context: infra/ 53 | dockerfile: dockerfile.spawn_pipeline 54 | image: mozilla/bugbug-spawn-pipeline 55 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Detailed documentation per model 2 | 3 | - [Regressor model for predicting risky commits](models/regressor.md) 4 | -------------------------------------------------------------------------------- /docs/data.md: -------------------------------------------------------------------------------- 1 | # Downloading Data Using BugBug 2 | 3 | BugBug relies on various types of data, such as bugs, commits, issues, and crash reports, to build its models. Although all this data is publicly available through different APIs, retrieving it every time we train a model is not an efficient solution. Hence, a copy of the data is saved as downloadable compressed files through a simple API. 4 | 5 | > **Note:** 6 | > You can use the data outside this project by using BugBug as a dependency (`pip install bugbug`). 7 | 8 | ## Bugzilla Bugs 9 | 10 | ```py 11 | from bugbug import bugzilla, db 12 | 13 | # Downland the latest version if the data set if it is not already downloaded 14 | db.download(bugzilla.BUGS_DB) 15 | 16 | # Iterate over all bugs in the dataset 17 | for bug in bugzilla.get_bugs(): 18 | # This is the same as if you retrieved the bug through Bugzilla REST API: 19 | # https://bmo.readthedocs.io/en/latest/api/core/v1/bug.html 20 | print(bug["id"]) 21 | ``` 22 | 23 | ## Phabricator Revisions 24 | 25 | ```py 26 | from bugbug import phabricator, db 27 | 28 | db.download(phabricator.REVISIONS_DB) 29 | 30 | for revision in phabricator.get_revisions(): 31 | # The revision here combines the results retrieved from two API endpoints: 32 | # https://phabricator.services.mozilla.com/conduit/method/differential.revision.search/ 33 | # https://phabricator.services.mozilla.com/conduit/method/transaction.search/ 34 | print(revision["id"]) 35 | ``` 36 | 37 | ## Repository Commits 38 | 39 | ```py 40 | from bugbug import repository, db 41 | 42 | db.download(bugzilla.COMMITS_DB) 43 | 44 | for commit in repository.get_commits(): 45 | print(commit["node"]) 46 | ``` 47 | 48 | ## Github Issues 49 | 50 | > _TODO_ 51 | 52 | ## Mozilla Crash Reports 53 | 54 | > _TODO_ 55 | -------------------------------------------------------------------------------- /docs/models/regressor.md: -------------------------------------------------------------------------------- 1 | ## Supported languages 2 | 3 | The regressor model supports all languages supported by rust-code-analysis: https://github.com/mozilla/rust-code-analysis#supported-languages. 4 | 5 | ## Training the model for another project 6 | 7 | There are quite a few steps to reproduce the results on another project, and they kind of depend on the processes followed by the specific project. Here is the current pipeline, which depends on Mozilla's processes. Some steps might me not necessary for other projects (and some projects might require additional steps). 8 | 9 | 1. Gather bugs from the project's Bugzilla; 10 | 1. Mine commits from the repository; 11 | 1. Create a list of commits to ignore (formatting changes and so on, which surely can't have introduced regressions); 12 | 1. Classify bugs between actual bugs and feature requests (we recently introduced a new "type" field in Bugzilla that developers fill, so we have a high precision in this step; for old bugs where the type field is absent, we use the "defect" model to classify the bug); 13 | 1. Use SZZ to find the commits which introduced the bugs from the list from step 4 (making git blame ignore and skip over commits from step 3); 14 | 1. Now we have a dataset of commits which introduced bugs and commits which did not introduce bugs, so we can actually train the regressor model. 15 | 16 | - Step 1 is in scripts/bug_retriever.py and bugbug/bugzilla.py; 17 | - Step 2 is scripts/commit_retriever.py and bugbug/repository.py; 18 | - Step 3 and 4 and 5 are in scripts/regressor_finder.py; 19 | - Step 6 is the actual "regressor" model, in bugbug/models/regressor.py. 20 | -------------------------------------------------------------------------------- /extra-nlp-requirements.txt: -------------------------------------------------------------------------------- 1 | gensim==4.3.2 2 | spacy==3.8.7 3 | -------------------------------------------------------------------------------- /extra-nn-requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/extra-nn-requirements.txt -------------------------------------------------------------------------------- /functions/diff2html/index.js: -------------------------------------------------------------------------------- 1 | const https = require("https"); 2 | const functions = require("@google-cloud/functions-framework"); 3 | const Diff2html = require("diff2html"); 4 | 5 | const agent = new https.Agent({ keepAlive: true }); 6 | const headers = new Headers({ 7 | "User-Agent": "bugbug-diff2html", 8 | }); 9 | const configuration = { 10 | // Diff2Html Configuration 11 | outputFormat: "line-by-line", 12 | matching: "lines", 13 | renderNothingWhenEmpty: false, 14 | diffStyle: "word", 15 | // Diff2HtmlUI Configuration 16 | synchronisedScroll: true, 17 | highlight: true, 18 | fileListToggle: true, 19 | fileListStartVisible: false, 20 | fileContentToggle: true, 21 | stickyFileHeaders: true, 22 | }; 23 | 24 | /** 25 | * Responds to any HTTP request. 26 | * 27 | * @param {!express:Request} req HTTP request context. 28 | * @param {!express:Response} res HTTP response context. 29 | */ 30 | functions.http("diff2html", (req, res) => { 31 | res.set("Access-Control-Allow-Origin", "*"); 32 | 33 | let revision_id = req.query.revision_id; 34 | let diff_id = req.query.diff_id; 35 | let changeset = req.query.changeset; 36 | let enableJS = req.query.format !== "html"; 37 | 38 | if ( 39 | changeset == undefined && 40 | (revision_id == undefined || diff_id == undefined) 41 | ) { 42 | res.status(400).send("Missing required parameters"); 43 | return; 44 | } 45 | 46 | const url = 47 | changeset != undefined 48 | ? `https://hg.mozilla.org/mozilla-central/raw-rev/${changeset}` 49 | : `https://phabricator.services.mozilla.com/D${revision_id}?id=${diff_id}&download=true`; 50 | 51 | fetch(url, { agent, headers }) 52 | .then((res) => { 53 | if (!res.ok) throw Error(res.statusText); 54 | return res.text(); 55 | }) 56 | .then((text) => strDiff2Html(text, enableJS)) 57 | .then((output) => res.status(200).send(output)) 58 | .catch((err) => res.status(500).send(`Error: ${err.message}`)); 59 | }); 60 | 61 | const jsTemplate = ` 62 | 63 | 67 | 71 | 81 | `; 82 | 83 | function strDiff2Html(strDiff, enableJS) { 84 | const diffHtml = Diff2html.html(strDiff, configuration); 85 | return ` 86 | 87 | ${enableJS ? jsTemplate : ""} 88 | 93 | 94 | 95 |
${diffHtml}
96 | 97 | 98 | `; 99 | } 100 | -------------------------------------------------------------------------------- /functions/diff2html/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "diff2html", 3 | "scripts": { 4 | "start": "npx functions-framework --target=diff2html" 5 | }, 6 | "dependencies": { 7 | "@google-cloud/functions-framework": "^3.1.2", 8 | "diff2html": "3.4.35" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /functions/sync-review-comments-db/database.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import json 7 | import os 8 | 9 | import pg8000 10 | import sqlalchemy 11 | from google.cloud.sql.connector import Connector, IPTypes 12 | 13 | 14 | def init_connection_pool_engine() -> sqlalchemy.engine.base.Engine: 15 | """Initializes a connection pool for a Cloud SQL instance of Postgres. 16 | 17 | Uses the Cloud SQL Python Connector package. 18 | """ 19 | connector = Connector() 20 | credentials = json.loads(os.environ["DATABASE_CREDENTIALS"]) 21 | ip_type = IPTypes.PRIVATE if credentials["private_ip"] else IPTypes.PUBLIC 22 | 23 | def getconn() -> pg8000.dbapi.Connection: 24 | conn: pg8000.dbapi.Connection = connector.connect( 25 | credentials["instance_connection_name"], 26 | "pg8000", 27 | user=credentials["db_user"], 28 | password=credentials["db_password"], 29 | db=credentials["db_name"], 30 | ip_type=ip_type, 31 | ) 32 | return conn 33 | 34 | engine = sqlalchemy.create_engine( 35 | "postgresql+pg8000://", 36 | creator=getconn, 37 | pool_size=5, 38 | max_overflow=2, 39 | pool_timeout=30, # 30 seconds 40 | pool_recycle=1800, # 30 minutes 41 | ) 42 | return engine 43 | -------------------------------------------------------------------------------- /functions/sync-review-comments-db/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import functions_framework 4 | from database import init_connection_pool_engine 5 | from models import ( 6 | Evaluation, 7 | Suggestion, 8 | ) 9 | from qdrant_client import QdrantClient 10 | from sqlalchemy import select 11 | from sqlalchemy.orm import Session 12 | 13 | from bugbug.tools import code_review 14 | from bugbug.utils import get_secret 15 | from bugbug.vectordb import QdrantVectorDB 16 | 17 | logging.basicConfig() 18 | logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO) 19 | 20 | pg_engine = init_connection_pool_engine() 21 | qdrant_client = QdrantClient( 22 | location=get_secret("QDRANT_LOCATION"), api_key=get_secret("QDRANT_API_KEY") 23 | ) 24 | 25 | 26 | def get_recent_evaluations(min_id: int): 27 | with Session(pg_engine) as session: 28 | stmt = ( 29 | select(Evaluation, Suggestion) 30 | .join(Suggestion) 31 | .where(Evaluation.id > min_id) 32 | ) 33 | 34 | evaluations = session.scalars(stmt) 35 | yield from evaluations 36 | 37 | 38 | @functions_framework.cloud_event 39 | def event_handler(cloud_event): 40 | vector_db = QdrantVectorDB("suggestions_feedback") 41 | vector_db.setup() 42 | 43 | largest_evaluation_id = vector_db.get_largest_id() 44 | logging.info( 45 | "Retrieving evaluations from the PostgreSQL database starting from evaluation ID %d", 46 | largest_evaluation_id, 47 | ) 48 | 49 | feedback_db = code_review.SuggestionsFeedbackDB(vector_db) 50 | feedback_db.add_suggestions_feedback( 51 | code_review.SuggestionFeedback( 52 | id=evaluation.id, 53 | action=evaluation.action.name, 54 | comment=evaluation.suggestion.content, 55 | file_path=evaluation.suggestion.file_path, 56 | user=evaluation.user, 57 | ) 58 | for evaluation in get_recent_evaluations(largest_evaluation_id) 59 | ) 60 | -------------------------------------------------------------------------------- /functions/sync-review-comments-db/requirements.txt: -------------------------------------------------------------------------------- 1 | bugbug 2 | cloud-sql-python-connector[pg8000]==1.13.0 3 | functions-framework==3.5.0 4 | SQLAlchemy==2.0.25 5 | -------------------------------------------------------------------------------- /http_service/.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | 5 | # CI 6 | .codeclimate.yml 7 | .travis.yml 8 | 9 | # Docker 10 | docker-compose.yml 11 | .docker 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | */__pycache__/ 16 | */*/__pycache__/ 17 | */*/*/__pycache__/ 18 | *.py[cod] 19 | */*.py[cod] 20 | */*/*.py[cod] 21 | */*/*/*.py[cod] 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | env/ 29 | build/ 30 | develop-eggs/ 31 | dist/ 32 | downloads/ 33 | eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Virtual environment 75 | .env/ 76 | .venv/ 77 | venv/ 78 | 79 | # PyCharm 80 | .idea 81 | 82 | # Python mode for VIM 83 | .ropeproject 84 | */.ropeproject 85 | */*/.ropeproject 86 | */*/*/.ropeproject 87 | 88 | # Vim swap files 89 | *.swp 90 | */*.swp 91 | */*/*.swp 92 | */*/*/*.swp 93 | 94 | # Pytest files 95 | **/.pytest_cache/ 96 | 97 | # Project-specific stuff 98 | data/ 99 | */data 100 | 101 | # Integrations tests cache 102 | cache/ 103 | */cache/ 104 | */*/cache/ -------------------------------------------------------------------------------- /http_service/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BUGBUG_VERSION=latest 2 | 3 | FROM mozilla/bugbug-base:$BUGBUG_VERSION 4 | 5 | # Install dependencies first 6 | COPY requirements.txt /requirements-http.txt 7 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements-http.txt 8 | 9 | # Setup http service as package 10 | COPY . /code/http_service 11 | # Use same version as bugbug 12 | RUN python -c "import importlib.metadata; print(importlib.metadata.version('bugbug'))" > /code/http_service/VERSION 13 | RUN pip install --disable-pip-version-check --no-cache-dir /code/http_service 14 | 15 | # Run the Pulse listener in the background 16 | CMD (bugbug-http-pulse-listener &) && gunicorn -b 0.0.0.0:$PORT bugbug_http.app --preload --timeout 30 -w 3 17 | -------------------------------------------------------------------------------- /http_service/Dockerfile.bg_worker: -------------------------------------------------------------------------------- 1 | ARG BUGBUG_VERSION=latest 2 | 3 | FROM mozilla/bugbug-commit-retrieval:$BUGBUG_VERSION 4 | 5 | # Install dependencies first 6 | COPY requirements.txt /requirements-http.txt 7 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements-http.txt 8 | 9 | # Setup http service as package 10 | COPY . /code/http_service 11 | # Use same version as bugbug 12 | RUN python -c "import importlib.metadata; print(importlib.metadata.version('bugbug'))" > /code/http_service/VERSION 13 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir /code/http_service 14 | 15 | # Load the models 16 | WORKDIR /code/ 17 | 18 | ARG CHECK_MODELS 19 | ENV CHECK_MODELS="${CHECK_MODELS}" 20 | 21 | ARG TAG 22 | ENV TAG="${TAG}" 23 | 24 | RUN bash /code/http_service/ensure_models.sh 25 | 26 | CMD bugbug-http-worker high default low 27 | -------------------------------------------------------------------------------- /http_service/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include bugbug_http/templates/*.html 2 | -------------------------------------------------------------------------------- /http_service/README.md: -------------------------------------------------------------------------------- 1 | ### Local development 2 | 3 | **For starting the service locally run the following commands.** 4 | 5 | Start Redis: 6 | 7 | docker-compose up redis 8 | 9 | Build the http service image: 10 | 11 | docker build -t mozilla/bugbug-http-service -f Dockerfile . 12 | 13 | Start the http service: 14 | 15 | docker-compose up bugbug-http-service 16 | 17 | Build the background worker image: 18 | 19 | docker build -t mozilla/bugbug-http-service-bg-worker --build-arg TAG=latest -f Dockerfile.bg_worker . 20 | 21 | Run the background worker: 22 | 23 | docker-compose up bugbug-http-service-bg-worker 24 | -------------------------------------------------------------------------------- /http_service/bugbug_http/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import tempfile 4 | 5 | ALLOW_MISSING_MODELS = bool(int(os.environ.get("BUGBUG_ALLOW_MISSING_MODELS", "0"))) 6 | 7 | REPO_DIR = os.environ.get( 8 | "BUGBUG_REPO_DIR", os.path.join(tempfile.gettempdir(), "bugbug-hg") 9 | ) 10 | -------------------------------------------------------------------------------- /http_service/bugbug_http/download_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | 8 | from bugbug import utils 9 | from bugbug_http import ALLOW_MISSING_MODELS 10 | from bugbug_http.models import MODEL_CACHE, MODELS_NAMES 11 | 12 | LOGGER = logging.getLogger() 13 | 14 | 15 | def download_models(): 16 | for model_name in MODELS_NAMES: 17 | utils.download_model(model_name) 18 | # Try loading the model 19 | try: 20 | m = MODEL_CACHE.get(model_name) 21 | m.download_eval_dbs(extract=False, ensure_exist=not ALLOW_MISSING_MODELS) 22 | except FileNotFoundError: 23 | if ALLOW_MISSING_MODELS: 24 | LOGGER.info( 25 | "Missing %r model, skipping because ALLOW_MISSING_MODELS is set" 26 | % model_name 27 | ) 28 | return None 29 | else: 30 | raise 31 | 32 | 33 | if __name__ == "__main__": 34 | download_models() 35 | -------------------------------------------------------------------------------- /http_service/bugbug_http/readthrough_cache.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | import datetime 8 | import logging 9 | import threading 10 | import time 11 | from datetime import timedelta 12 | from typing import Callable, Generic, TypeVar 13 | 14 | LOGGER = logging.getLogger() 15 | 16 | # A simple TTL cache to use with models. Because we expect the number of models 17 | # in the service to not be very large, simplicity of implementation is 18 | # preferred to algorithmic efficiency of operations. 19 | # 20 | # Called an 'Idle' TTL cache because TTL of items is reset after every get 21 | Key = TypeVar("Key") 22 | Value = TypeVar("Value") 23 | 24 | 25 | class ReadthroughTTLCache(Generic[Key, Value]): 26 | def __init__(self, ttl: timedelta, load_item_function: Callable[[Key], Value]): 27 | self.ttl = ttl 28 | self.load_item_function = load_item_function 29 | self.items_last_accessed: dict[Key, datetime.datetime] = {} 30 | self.items_storage: dict[Key, Value] = {} 31 | 32 | def __contains__(self, key): 33 | return key in self.items_storage 34 | 35 | def get(self, key, force_store=False): 36 | store_item = force_store 37 | if key in self.items_storage: 38 | item = self.items_storage[key] 39 | else: 40 | item = self.load_item_function(key) 41 | # Cache the item only if it was last accessed within the past TTL seconds 42 | # Note that all entries in items_last_accessed are purged if item was not 43 | # accessed in the last TTL seconds. 44 | if key in self.items_last_accessed: 45 | store_item = True 46 | 47 | self.items_last_accessed[key] = datetime.datetime.now() 48 | if store_item: 49 | LOGGER.info( 50 | f"Storing item with the following key in readthroughcache: {key}" 51 | ) 52 | self.items_storage[key] = item 53 | 54 | return item 55 | 56 | def purge_expired_entries(self): 57 | purge_entries_before = datetime.datetime.now() - self.ttl 58 | for key, time_last_touched in list(self.items_last_accessed.items()): 59 | if time_last_touched < purge_entries_before: 60 | LOGGER.info( 61 | f"Evicting item with the following key from readthroughcache: {key}" 62 | ) 63 | del self.items_last_accessed[key] 64 | del self.items_storage[key] 65 | 66 | def start_ttl_thread(self): 67 | def purge_expired_entries_with_wait(): 68 | while True: 69 | time.sleep(self.ttl.total_seconds()) 70 | self.purge_expired_entries() 71 | 72 | thread = threading.Thread(target=purge_expired_entries_with_wait) 73 | thread.setDaemon(True) 74 | thread.start() 75 | -------------------------------------------------------------------------------- /http_service/bugbug_http/sentry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | 7 | import logging 8 | 9 | import sentry_sdk 10 | from sentry_sdk.integrations.logging import LoggingIntegration 11 | 12 | from bugbug import get_bugbug_version 13 | 14 | 15 | def setup_sentry(dsn, integrations=[]): 16 | logging_integration = LoggingIntegration( 17 | # Default behaviour: INFO messages will be included as breadcrumbs 18 | level=logging.INFO, 19 | # Change default behaviour (ERROR messages events) 20 | event_level=logging.WARNING, 21 | ) 22 | sentry_sdk.init( 23 | dsn=dsn, 24 | integrations=[logging_integration] + integrations, 25 | release=get_bugbug_version(), 26 | ) 27 | -------------------------------------------------------------------------------- /http_service/bugbug_http/templates/doc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | BugBug documentation 5 | 6 | 7 | 11 | 12 | 15 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /http_service/bugbug_http/worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # This Source Code Form is subject to the terms of the Mozilla Public 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 | # You can obtain one at http://mozilla.org/MPL/2.0/. 6 | 7 | import os 8 | import sys 9 | from urllib.parse import urlparse 10 | 11 | from redis import Redis 12 | from rq import Worker 13 | from sentry_sdk.integrations.rq import RqIntegration 14 | 15 | import bugbug_http.boot 16 | from bugbug_http.sentry import setup_sentry 17 | 18 | if os.environ.get("SENTRY_DSN"): 19 | setup_sentry(dsn=os.environ.get("SENTRY_DSN"), integrations=[RqIntegration()]) 20 | 21 | 22 | def main(): 23 | # Bootstrap the worker assets 24 | bugbug_http.boot.boot_worker() 25 | 26 | # Provide queue names to listen to as arguments to this script, 27 | # similar to rq worker 28 | url = urlparse(os.environ.get("REDIS_URL", "redis://localhost/0")) 29 | assert url.hostname is not None 30 | redis_conn = Redis( 31 | host=url.hostname, 32 | port=url.port if url.port is not None else 6379, 33 | password=url.password, 34 | ssl=True if url.scheme == "rediss" else False, 35 | ssl_cert_reqs=None, 36 | ) 37 | qs = sys.argv[1:] or ["default"] 38 | w = Worker(qs, connection=redis_conn) 39 | w.work() 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /http_service/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.2" 2 | services: 3 | bugbug-http-service: 4 | build: 5 | context: . 6 | image: mozilla/bugbug-http-service 7 | environment: 8 | - BUGBUG_BUGZILLA_TOKEN 9 | - BUGBUG_GITHUB_TOKEN 10 | - REDIS_URL=redis://redis:6379/0 11 | - PORT=8000 12 | - PULSE_USER 13 | - PULSE_PASSWORD 14 | - SENTRY_DSN 15 | ports: 16 | - target: 8000 17 | published: 8000 18 | protocol: tcp 19 | mode: host 20 | depends_on: 21 | - redis 22 | 23 | bugbug-http-service-bg-worker: 24 | build: 25 | context: . 26 | dockerfile: Dockerfile.bg_worker 27 | image: mozilla/bugbug-http-service-bg-worker 28 | environment: 29 | - BUGBUG_BUGZILLA_TOKEN 30 | - BUGBUG_GITHUB_TOKEN 31 | - REDIS_URL=redis://redis:6379/0 32 | - BUGBUG_ALLOW_MISSING_MODELS 33 | - BUGBUG_REPO_DIR 34 | - SENTRY_DSN 35 | depends_on: 36 | - redis 37 | 38 | bugbug-http-service-rq-dasboard: 39 | build: 40 | context: . 41 | dockerfile: Dockerfile 42 | image: mozilla/bugbug-http-service-bg-worker 43 | command: 44 | - rq-dashboard 45 | - "-u" 46 | - "redis://redis:6379/0" 47 | ports: 48 | - target: 9181 49 | published: 9181 50 | protocol: tcp 51 | mode: host 52 | depends_on: 53 | - redis 54 | 55 | redis: 56 | image: redis:4 57 | ports: 58 | - target: 6379 59 | published: 6379 60 | protocol: tcp 61 | mode: host 62 | -------------------------------------------------------------------------------- /http_service/ensure_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | # 6 | # Download models and check that models can be correctly be loaded. Can be 7 | # disabled by passing CHECK_MODELS=0 as an environment variable 8 | 9 | set -eox pipefail 10 | 11 | if [ "$CHECK_MODELS" == "0" ]; then 12 | echo "Skipping downloading and checking models!" 13 | exit 0; 14 | fi 15 | 16 | python -m bugbug_http.download_models 17 | -------------------------------------------------------------------------------- /http_service/requirements.txt: -------------------------------------------------------------------------------- 1 | apispec-webframeworks==1.2.0 2 | apispec[yaml]==6.8.2 3 | cerberus==1.3.7 4 | Flask==3.1.1 5 | flask-apispec==0.11.4 6 | flask-cors==6.0.0 7 | gunicorn==23.0.0 8 | kombu==5.5.4 9 | marshmallow==3.26.1 10 | requests==2.32.3 11 | rq==2.3.3 12 | rq-dashboard==0.8.2.2 13 | sentry-sdk[flask]==2.29.1 14 | -------------------------------------------------------------------------------- /http_service/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | 8 | from setuptools import find_packages, setup 9 | 10 | here = os.path.dirname(__file__) 11 | 12 | 13 | def read_requirements(file_): 14 | with open(os.path.join(here, file_)) as f: 15 | return sorted(list(set(line.split("#")[0].strip() for line in f))) 16 | 17 | 18 | install_requires = read_requirements("requirements.txt") 19 | 20 | with open(os.path.join(here, "VERSION")) as f: 21 | version = f.read().strip() 22 | 23 | setup( 24 | name="bugbug-http-service", 25 | version=version, 26 | description="ML tools for Mozilla projects", 27 | author="Marco Castelluccio", 28 | author_email="mcastelluccio@mozilla.com", 29 | install_requires=install_requires, 30 | packages=find_packages(), 31 | include_package_data=True, 32 | license="MPL2", 33 | entry_points={ 34 | "console_scripts": [ 35 | "bugbug-http-worker = bugbug_http.worker:main", 36 | "bugbug-http-pulse-listener = bugbug_http.listener:main", 37 | ] 38 | }, 39 | classifiers=[ 40 | "Programming Language :: Python :: 3.7", 41 | "Programming Language :: Python :: 3 :: Only", 42 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 43 | ], 44 | ) 45 | -------------------------------------------------------------------------------- /http_service/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/http_service/tests/__init__.py -------------------------------------------------------------------------------- /http_service/tests/pytest.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/http_service/tests/pytest.ini -------------------------------------------------------------------------------- /http_service/tests/test_get_config_specific_groups.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from typing import Callable 7 | 8 | import orjson 9 | import zstandard 10 | 11 | from bugbug_http import models 12 | 13 | 14 | def test_get_config_specific_groups( 15 | mock_get_config_specific_groups: Callable[ 16 | [dict[str, float], dict[str, float]], None 17 | ], 18 | ) -> None: 19 | assert models.get_config_specific_groups("test-linux1804-64/opt-*") == "OK" 20 | 21 | # Assert the test selection result is stored in Redis. 22 | value = models.redis.get( 23 | "bugbug:job_result:get_config_specific_groups:test-linux1804-64/opt-*" 24 | ) 25 | assert value is not None 26 | result = orjson.loads(zstandard.ZstdDecompressor().decompress(value)) 27 | assert result == [{"name": "test-group1"}] 28 | -------------------------------------------------------------------------------- /http_service/tests/test_integration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | import time 8 | from logging import INFO, basicConfig, getLogger 9 | 10 | import requests 11 | 12 | basicConfig(level=INFO) 13 | logger = getLogger(__name__) 14 | 15 | BUGBUG_HTTP_SERVER = os.environ.get("BUGBUG_HTTP_SERVER", "http://localhost:8000/") 16 | 17 | 18 | # Test classifying a single bug. 19 | def integration_test_single(): 20 | timeout = 1200 21 | for _ in range(timeout): 22 | response = requests.get( 23 | f"{BUGBUG_HTTP_SERVER}/defectenhancementtask/predict/1376406", 24 | headers={"X-Api-Key": "integration_test_single"}, 25 | ) 26 | 27 | if response.status_code == 200: 28 | break 29 | 30 | time.sleep(1) 31 | 32 | response_json = response.json() 33 | 34 | if not response.ok: 35 | raise requests.HTTPError( 36 | f"Couldn't get an answer in {timeout} seconds: {response_json}", 37 | response=response, 38 | ) 39 | 40 | logger.info("Response for bug 1376406 %s", response_json) 41 | assert response_json["class"] is not None 42 | 43 | 44 | # Test classifying a batch of bugs. 45 | def integration_test_batch(): 46 | timeout = 100 47 | for _ in range(timeout): 48 | response = requests.post( 49 | f"{BUGBUG_HTTP_SERVER}/defectenhancementtask/predict/batch", 50 | headers={"X-Api-Key": "integration_test_batch"}, 51 | json={"bugs": [1376544, 1376412]}, 52 | ) 53 | 54 | if response.status_code == 200: 55 | break 56 | 57 | time.sleep(1) 58 | 59 | response_json = response.json() 60 | 61 | if not response.ok: 62 | raise requests.HTTPError( 63 | f"Couldn't get an answer in {timeout} seconds: {response_json}", 64 | response=response, 65 | ) 66 | 67 | response_1376544 = response_json["bugs"]["1376544"] 68 | logger.info("Response for bug 1376544 %s", response_1376544) 69 | assert response_1376544["class"] is not None 70 | response_1376412 = response_json["bugs"]["1376412"] 71 | logger.info("Response for bug 1376412 %s", response_1376412) 72 | assert response_1376412["class"] is not None 73 | 74 | 75 | if __name__ == "__main__": 76 | integration_test_single() 77 | integration_test_batch() 78 | -------------------------------------------------------------------------------- /http_service/tests/test_push_schedules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import gzip 7 | 8 | import orjson 9 | 10 | from bugbug_http.app import API_TOKEN 11 | 12 | 13 | def retrieve_compressed_reponse(response): 14 | # Response is of type "" - Flask Client's Response 15 | # Not applicable for " " 16 | if response.headers["Content-Encoding"] == "gzip": 17 | return orjson.loads(gzip.decompress(response.data)) 18 | return response.json 19 | 20 | 21 | def test_queue_job_valid(client, add_result, jobs): 22 | # schedule job 23 | rv = client.get( 24 | "/push/autoland/abcdef/schedules", 25 | headers={API_TOKEN: "test"}, 26 | ) 27 | 28 | assert rv.status_code == 202 29 | assert rv.json == {"ready": False} 30 | 31 | # still not ready 32 | rv = client.get( 33 | "/push/autoland/abcdef/schedules", 34 | headers={API_TOKEN: "test"}, 35 | ) 36 | 37 | assert rv.status_code == 202 38 | assert rv.json == {"ready": False} 39 | 40 | # job done 41 | result = { 42 | "groups": ["foo/mochitest.ini", "bar/xpcshell.ini"], 43 | "tasks": ["test-linux/opt-mochitest-1"], 44 | } 45 | keys = next(iter(jobs.values())) 46 | add_result(keys[0], result) 47 | 48 | rv = client.get( 49 | "/push/autoland/abcdef/schedules", 50 | headers={API_TOKEN: "test"}, 51 | ) 52 | assert rv.status_code == 200 53 | assert retrieve_compressed_reponse(rv) == result 54 | 55 | 56 | def test_no_api_key(client): 57 | rv = client.get("/push/autoland/foobar/schedules") 58 | 59 | assert rv.status_code == 401 60 | assert rv.json == {"message": "Error, missing X-API-KEY"} 61 | -------------------------------------------------------------------------------- /infra/check-pipeline.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | tasks: 3 | - ID: check-component 4 | created: { $fromNow: "" } 5 | deadline: { $fromNow: "12 hours" } 6 | expires: { $fromNow: "1 week" } 7 | provisionerId: proj-bugbug 8 | workerType: batch 9 | payload: 10 | maxRunTime: 3600 11 | image: mozilla/bugbug-base:${version} 12 | command: 13 | - bugbug-check 14 | - component 15 | 16 | routes: 17 | - notify.email.release-mgmt-analysis@mozilla.com.on-failed 18 | - notify.irc-channel.#bugbug.on-failed 19 | metadata: 20 | name: bugbug check component 21 | description: bugbug check component 22 | owner: release-mgmt-analysis@mozilla.com 23 | source: https://github.com/mozilla/bugbug/raw/master/infra/check-pipeline.yml 24 | 25 | - ID: shadow-scheduler-stats 26 | created: { $fromNow: "" } 27 | deadline: { $fromNow: "12 hours" } 28 | expires: { $fromNow: "1 week" } 29 | provisionerId: proj-bugbug 30 | workerType: compute-large 31 | payload: 32 | maxRunTime: 43200 33 | image: mozilla/bugbug-base:${version} 34 | command: 35 | - bugbug-shadow-scheduler-stats 36 | - "7" 37 | 38 | artifacts: 39 | public/average_group_scheduled.svg: 40 | path: /average_group_scheduled.svg 41 | type: file 42 | public/percentage_group_caught_at_least_one.svg: 43 | path: /percentage_group_caught_at_least_one.svg 44 | type: file 45 | public/percentage_group_caught.svg: 46 | path: /percentage_group_caught.svg 47 | type: file 48 | public/average_config_group_scheduled.svg: 49 | path: /average_config_group_scheduled.svg 50 | type: file 51 | public/percentage_config_group_caught_at_least_one.svg: 52 | path: /percentage_config_group_caught_at_least_one.svg 53 | type: file 54 | public/percentage_config_group_caught.svg: 55 | path: /percentage_config_group_caught.svg 56 | type: file 57 | 58 | features: 59 | taskclusterProxy: true 60 | scopes: 61 | - auth:aws-s3:read-write:communitytc-bugbug/* 62 | routes: 63 | - notify.email.release-mgmt-analysis@mozilla.com.on-failed 64 | - notify.irc-channel.#bugbug.on-failed 65 | - index.project.bugbug.shadow_scheduler_stats.latest 66 | metadata: 67 | name: bugbug shadow scheduler stats 68 | description: bugbug shadow scheduler stats 69 | owner: release-mgmt-analysis@mozilla.com 70 | source: https://github.com/mozilla/bugbug/raw/master/infra/check-pipeline.yml 71 | -------------------------------------------------------------------------------- /infra/dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM python:3.12.7-slim 2 | 3 | # Setup dependencies in a cacheable step 4 | RUN --mount=type=bind,source=requirements.txt,target=/requirements.txt \ 5 | apt-get update && \ 6 | apt-get install -y --no-install-recommends gcc g++ libgomp1 libffi-dev libjemalloc2 zstd patch git && \ 7 | pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements.txt && \ 8 | apt-get purge -y gcc g++ libffi-dev patch git && \ 9 | apt-get autoremove -y && \ 10 | rm -rf /var/lib/apt/lists/* 11 | 12 | ENV LD_PRELOAD="libjemalloc.so.2" 13 | 14 | COPY infra/mozci_config.toml /root/.config/mozci/config.toml 15 | 16 | RUN --mount=type=bind,target=/tmp/bugbug,rw \ 17 | pip install --disable-pip-version-check --quiet --no-cache-dir /tmp/bugbug 18 | -------------------------------------------------------------------------------- /infra/dockerfile.base-nlp: -------------------------------------------------------------------------------- 1 | FROM mozilla/bugbug-base:latest 2 | 3 | # Setup dependencies in a cacheable step 4 | ADD extra-nlp-requirements.txt / 5 | 6 | RUN apt-get update && \ 7 | apt-get install -y --no-install-recommends gcc g++ libgomp1 && \ 8 | pip install --disable-pip-version-check --quiet --no-cache-dir -r /extra-nlp-requirements.txt && \ 9 | apt-get purge -y gcc g++ && \ 10 | apt-get autoremove -y && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | RUN python -m spacy download en_core_web_sm 14 | -------------------------------------------------------------------------------- /infra/dockerfile.commit_retrieval: -------------------------------------------------------------------------------- 1 | FROM mozilla/bugbug-base:latest 2 | 3 | ENV PATH="${PATH}:/git-cinnabar" 4 | 5 | # git is required by the annotate pipeline. 6 | # libcurl4 is required by git-cinnabar. 7 | RUN apt-get update && \ 8 | apt-get install -y --no-install-recommends git xz-utils curl libcurl4 && \ 9 | hg clone -r 90302f015ac8dd8877ef3ee24b5a62541142378b https://hg.mozilla.org/hgcustom/version-control-tools /version-control-tools/ && \ 10 | rm -r /version-control-tools/.hg /version-control-tools/ansible /version-control-tools/docs /version-control-tools/testing && \ 11 | git clone https://github.com/glandium/git-cinnabar.git /git-cinnabar && \ 12 | cd /git-cinnabar && git -c advice.detachedHead=false checkout fd17180c439c3eb3ab9de5cfc47923b04242394a && cd .. && \ 13 | git config --global cinnabar.experiments python3 && \ 14 | git config --global cinnabar.check no-version-check && \ 15 | git config --global fetch.prune true && \ 16 | git cinnabar download && \ 17 | rm -r /git-cinnabar/.git /git-cinnabar/CI /git-cinnabar/tests && \ 18 | curl -L https://github.com/mozilla/rust-code-analysis/releases/download/v0.0.23/rust-code-analysis-linux-web-x86_64.tar.gz | tar -C /usr/bin -xzv && \ 19 | apt-get purge -y xz-utils curl && \ 20 | apt-get autoremove -y && \ 21 | rm -r /var/lib/apt/lists/* 22 | 23 | COPY infra/hgrc /etc/mercurial/hgrc.d/bugbug.rc 24 | 25 | CMD bugbug-data-commits /cache/ 26 | -------------------------------------------------------------------------------- /infra/dockerfile.spawn_pipeline: -------------------------------------------------------------------------------- 1 | FROM python:3.12.7-slim 2 | 3 | # Setup dependencies in a cacheable step 4 | ADD spawn_pipeline_requirements.txt /code/ 5 | 6 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /code/spawn_pipeline_requirements.txt 7 | 8 | ADD spawn_pipeline.py /code 9 | 10 | ADD *-pipeline.yml /code/ 11 | 12 | CMD python /code/spawn_pipeline.py 13 | -------------------------------------------------------------------------------- /infra/hgrc: -------------------------------------------------------------------------------- 1 | [extensions] 2 | purge = 3 | strip = 4 | robustcheckout = /version-control-tools/hgext/robustcheckout/__init__.py 5 | hgmo = /version-control-tools/hgext/hgmo 6 | pushlog = /version-control-tools/hgext/pushlog 7 | mozext = /version-control-tools/hgext/mozext 8 | -------------------------------------------------------------------------------- /infra/mozci_config.toml: -------------------------------------------------------------------------------- 1 | [mozci] 2 | data_sources = ["treeherder_client", "hgmo", "taskcluster", "errorsummary"] 3 | 4 | [mozci.cache] 5 | retention = 40320 6 | serializer = "compressedpickle" 7 | 8 | [mozci.cache.stores] 9 | s3 = { driver = "s3", bucket = "communitytc-bugbug", prefix = "data/adr_cache/" } 10 | -------------------------------------------------------------------------------- /infra/set_hook_version.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import json 8 | import sys 9 | 10 | 11 | def set_hook(hook_path, version): 12 | with open(hook_path, "r") as hook_file: 13 | hook_data = json.load(hook_file) 14 | 15 | task_payload = hook_data["task"]["payload"] 16 | 17 | task_image = task_payload.get("image") 18 | 19 | # 1) Insert or replace the environment variable 20 | if task_payload["env"]: 21 | if "$merge" not in task_payload["env"]: 22 | task_payload["env"] = {"$merge": [task_payload["env"]]} 23 | 24 | task_payload["env"]["$merge"].append({"TAG": version}) 25 | else: 26 | task_payload["env"]["TAG"] = version 27 | 28 | # 2) Set the version for the hook docker image 29 | if task_image: 30 | image_name = task_image.split(":", 1)[0] 31 | if image_name.startswith("mozilla/bugbug-"): 32 | task_payload["image"] = f"{image_name}:{version}" 33 | 34 | with open(hook_path, "w") as hook_file: 35 | json.dump( 36 | hook_data, hook_file, sort_keys=True, indent=4, separators=(",", ": ") 37 | ) 38 | 39 | 40 | def parse_args(raw_args): 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument( 43 | "version", 44 | metavar="version", 45 | type=str, 46 | help="The version to set in the hook definition", 47 | ) 48 | parser.add_argument( 49 | "hook_file", 50 | metavar="hook-file", 51 | type=str, 52 | help="The hook definition file to update in-place", 53 | ) 54 | 55 | return parser.parse_args(raw_args) 56 | 57 | 58 | if __name__ == "__main__": 59 | args = parse_args(sys.argv[1:]) 60 | set_hook(args.hook_file, args.version) 61 | -------------------------------------------------------------------------------- /infra/spawn_pipeline_requirements.txt: -------------------------------------------------------------------------------- 1 | json-e==4.8.0 2 | pyyaml==6.0.2 3 | requests==2.32.3 4 | taskcluster==84.0.2 5 | -------------------------------------------------------------------------------- /infra/taskcluster-hook-check-models-start.json: -------------------------------------------------------------------------------- 1 | { 2 | "schedule": ["0 0 0 * * *"], 3 | "metadata": { 4 | "description": "", 5 | "name": "BugBug check tasks", 6 | "owner": "mcastelluccio@mozilla.com" 7 | }, 8 | "task": { 9 | "created": { 10 | "$fromNow": "0 seconds" 11 | }, 12 | "deadline": { 13 | "$fromNow": "2 hours" 14 | }, 15 | "expires": { 16 | "$fromNow": "1 week" 17 | }, 18 | "extra": {}, 19 | "metadata": { 20 | "description": "", 21 | "name": "BugBug check tasks", 22 | "owner": "mcastelluccio@mozilla.com", 23 | "source": "https://github.com/mozilla/bugbug" 24 | }, 25 | "payload": { 26 | "artifacts": {}, 27 | "cache": {}, 28 | "capabilities": {}, 29 | "env": {}, 30 | "features": { 31 | "taskclusterProxy": true 32 | }, 33 | "command": [ 34 | "/usr/local/bin/python3", 35 | "/code/spawn_pipeline.py", 36 | "/code/check-pipeline.yml" 37 | ], 38 | "image": "mozilla/bugbug-spawn-pipeline", 39 | "maxRunTime": 7200 40 | }, 41 | "priority": "normal", 42 | "provisionerId": "proj-bugbug", 43 | "retries": 5, 44 | "routes": [ 45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed", 46 | "notify.irc-channel.#bugbug.on-failed" 47 | ], 48 | "schedulerId": "-", 49 | "scopes": ["assume:hook-id:project-bugbug/bugbug-checks"], 50 | "tags": {}, 51 | "workerType": "batch" 52 | }, 53 | "triggerSchema": { 54 | "additionalProperties": false, 55 | "type": "object" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /infra/taskcluster-hook-classify-patch.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "description": "", 4 | "name": "BugBug classify patch", 5 | "owner": "mcastelluccio@mozilla.com" 6 | }, 7 | "task": { 8 | "created": { 9 | "$fromNow": "0 seconds" 10 | }, 11 | "deadline": { 12 | "$fromNow": "2 hours" 13 | }, 14 | "expires": { 15 | "$fromNow": "1 month" 16 | }, 17 | "extra": {}, 18 | "metadata": { 19 | "description": "", 20 | "name": "BugBug classify patch", 21 | "owner": "mcastelluccio@mozilla.com", 22 | "source": "https://github.com/mozilla/bugbug" 23 | }, 24 | "payload": { 25 | "artifacts": { 26 | "public/results.json": { 27 | "path": "/results.json", 28 | "type": "file" 29 | }, 30 | "public/importances.json": { 31 | "path": "/importances.json", 32 | "type": "file" 33 | }, 34 | "public/method_level.json": { 35 | "path": "/method_level.json", 36 | "type": "file" 37 | } 38 | }, 39 | "cache": { 40 | "bugbug-mercurial-repository": "/cache" 41 | }, 42 | "capabilities": {}, 43 | "env": { 44 | "TC_SECRET_ID": "project/bugbug/production" 45 | }, 46 | "features": { 47 | "taskclusterProxy": true 48 | }, 49 | "command": [ 50 | "bugbug-classify-commit", 51 | "regressor", 52 | "/cache/mozilla-central", 53 | "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}", 54 | "--diff-id=${payload['DIFF_ID']}", 55 | "--git_repo_dir=/gecko-dev", 56 | "--method_defect_predictor_dir=/MethodDefectPredictor" 57 | ], 58 | "image": "mozilla/bugbug-commit-retrieval", 59 | "maxRunTime": 7200 60 | }, 61 | "priority": "normal", 62 | "provisionerId": "proj-bugbug", 63 | "retries": 5, 64 | "routes": [ 65 | "notify.email.mcastelluccio@mozilla.com.on-failed", 66 | "notify.irc-channel.#bugbug.on-failed", 67 | "index.project.bugbug.classify_patch.latest", 68 | "index.project.bugbug.classify_patch.diff.${payload['DIFF_ID']}" 69 | ], 70 | "schedulerId": "-", 71 | "scopes": ["assume:hook-id:project-bugbug/bugbug-classify-patch"], 72 | "tags": {}, 73 | "workerType": "compute-small" 74 | }, 75 | "triggerSchema": { 76 | "additionalProperties": false, 77 | "properties": { 78 | "PHABRICATOR_DEPLOYMENT": { 79 | "type": "string", 80 | "enum": ["prod", "dev"] 81 | }, 82 | "DIFF_ID": { 83 | "type": "number" 84 | } 85 | }, 86 | "required": ["PHABRICATOR_DEPLOYMENT", "DIFF_ID"], 87 | "type": "object" 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /infra/taskcluster-hook-data-pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "schedule": ["0 0 1,16 * *"], 3 | "metadata": { 4 | "description": "", 5 | "name": "BugBug data pipeline", 6 | "owner": "mcastelluccio@mozilla.com" 7 | }, 8 | "task": { 9 | "created": { 10 | "$fromNow": "0 seconds" 11 | }, 12 | "deadline": { 13 | "$fromNow": "2 hours" 14 | }, 15 | "expires": { 16 | "$fromNow": "1 year" 17 | }, 18 | "extra": {}, 19 | "metadata": { 20 | "description": "", 21 | "name": "BugBug data pipeline", 22 | "owner": "mcastelluccio@mozilla.com", 23 | "source": "https://github.com/mozilla/bugbug" 24 | }, 25 | "payload": { 26 | "artifacts": {}, 27 | "cache": {}, 28 | "capabilities": {}, 29 | "env": {}, 30 | "features": { 31 | "taskclusterProxy": true 32 | }, 33 | "command": [ 34 | "/usr/local/bin/python3", 35 | "/code/spawn_pipeline.py", 36 | "/code/data-pipeline.yml" 37 | ], 38 | "image": "mozilla/bugbug-spawn-pipeline", 39 | "maxRunTime": 7200 40 | }, 41 | "priority": "normal", 42 | "provisionerId": "proj-bugbug", 43 | "retries": 5, 44 | "routes": [ 45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed", 46 | "notify.irc-channel.#bugbug.on-failed", 47 | "index.project.bugbug.data-pipeline-start" 48 | ], 49 | "schedulerId": "-", 50 | "scopes": ["assume:hook-id:project-bugbug/bugbug"], 51 | "tags": {}, 52 | "workerType": "batch" 53 | }, 54 | "triggerSchema": { 55 | "additionalProperties": false, 56 | "type": "object" 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /infra/taskcluster-hook-landings-risk-report.json: -------------------------------------------------------------------------------- 1 | { 2 | "schedule": ["0 0 0 * * *"], 3 | "metadata": { 4 | "description": "", 5 | "name": "BugBug landings risk report pipeline", 6 | "owner": "mcastelluccio@mozilla.com" 7 | }, 8 | "task": { 9 | "created": { 10 | "$fromNow": "0 seconds" 11 | }, 12 | "deadline": { 13 | "$fromNow": "2 hours" 14 | }, 15 | "expires": { 16 | "$fromNow": "1 week" 17 | }, 18 | "extra": {}, 19 | "metadata": { 20 | "description": "", 21 | "name": "BugBug landings risk report pipeline", 22 | "owner": "mcastelluccio@mozilla.com", 23 | "source": "https://github.com/mozilla/bugbug" 24 | }, 25 | "payload": { 26 | "artifacts": {}, 27 | "cache": {}, 28 | "capabilities": {}, 29 | "env": {}, 30 | "features": { 31 | "taskclusterProxy": true 32 | }, 33 | "command": [ 34 | "/usr/local/bin/python3", 35 | "/code/spawn_pipeline.py", 36 | "/code/landings-pipeline.yml" 37 | ], 38 | "image": "mozilla/bugbug-spawn-pipeline", 39 | "maxRunTime": 7200 40 | }, 41 | "priority": "normal", 42 | "provisionerId": "proj-bugbug", 43 | "retries": 5, 44 | "routes": [ 45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed", 46 | "notify.irc-channel.#bugbug.on-failed" 47 | ], 48 | "schedulerId": "-", 49 | "scopes": ["assume:hook-id:project-bugbug/bugbug-landings-risk-report"], 50 | "tags": {}, 51 | "workerType": "batch" 52 | }, 53 | "triggerSchema": { 54 | "additionalProperties": false, 55 | "type": "object" 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /infra/taskcluster-hook-test-select.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "description": "", 4 | "name": "BugBug test select", 5 | "owner": "mcastelluccio@mozilla.com" 6 | }, 7 | "task": { 8 | "created": { 9 | "$fromNow": "0 seconds" 10 | }, 11 | "deadline": { 12 | "$fromNow": "2 hours" 13 | }, 14 | "expires": { 15 | "$fromNow": "1 month" 16 | }, 17 | "extra": { 18 | "phabricator-deployment": "${payload['PHABRICATOR_DEPLOYMENT']}", 19 | "phabricator-diff-id": "${payload['DIFF_ID']}" 20 | }, 21 | "metadata": { 22 | "description": "", 23 | "name": "BugBug test select", 24 | "owner": "mcastelluccio@mozilla.com", 25 | "source": "https://github.com/mozilla/bugbug" 26 | }, 27 | "payload": { 28 | "artifacts": { 29 | "public/selected_tasks": { 30 | "path": "/selected_tasks", 31 | "type": "file" 32 | }, 33 | "public/failure_risk": { 34 | "path": "/failure_risk", 35 | "type": "file" 36 | } 37 | }, 38 | "cache": { 39 | "bugbug-mercurial-repository": "/cache" 40 | }, 41 | "capabilities": {}, 42 | "env": { 43 | "TC_SECRET_ID": "project/bugbug/production" 44 | }, 45 | "features": { 46 | "taskclusterProxy": true 47 | }, 48 | "command": [ 49 | "bugbug-classify-commit", 50 | "testlabelselect", 51 | "/cache/mozilla-central", 52 | "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}", 53 | "--diff-id=${payload['DIFF_ID']}", 54 | "--runnable-jobs=${payload['RUNNABLE_JOBS']}" 55 | ], 56 | "image": "mozilla/bugbug-commit-retrieval", 57 | "maxRunTime": 7200 58 | }, 59 | "priority": "normal", 60 | "provisionerId": "proj-bugbug", 61 | "retries": 5, 62 | "routes": [ 63 | "notify.email.mcastelluccio@mozilla.com.on-failed", 64 | "notify.irc-channel.#bugbug.on-failed", 65 | "index.project.bugbug.test_select.latest", 66 | "index.project.bugbug.test_select.diff.${payload['DIFF_ID']}", 67 | "project.bugbug.test_select" 68 | ], 69 | "schedulerId": "-", 70 | "scopes": [ 71 | "assume:hook-id:project-bugbug/bugbug-test-select", 72 | "queue:route:project.bugbug.test_select.*" 73 | ], 74 | "tags": {}, 75 | "workerType": "compute-small" 76 | }, 77 | "triggerSchema": { 78 | "additionalProperties": false, 79 | "properties": { 80 | "PHABRICATOR_DEPLOYMENT": { 81 | "type": "string", 82 | "enum": ["prod", "dev"] 83 | }, 84 | "DIFF_ID": { 85 | "type": "number" 86 | }, 87 | "RUNNABLE_JOBS": { 88 | "type": "string" 89 | } 90 | }, 91 | "required": ["PHABRICATOR_DEPLOYMENT", "DIFF_ID"], 92 | "type": "object" 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /infra/version_check.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import subprocess 7 | 8 | with open("VERSION", "r") as f: 9 | version = f.read().rstrip() 10 | 11 | try: 12 | p = subprocess.run( 13 | ["git", "describe", "--abbrev=0", "--tags"], check=True, capture_output=True 14 | ) 15 | except subprocess.CalledProcessError as e: 16 | print(f"{e.cmd} failed with return code {e.returncode}") 17 | print("stdout:") 18 | print(e.stdout) 19 | print("stderr:") 20 | print(e.stderr) 21 | raise RuntimeError("Failure while getting latest tag") 22 | 23 | cur_tag = p.stdout.decode("utf-8")[1:].rstrip() 24 | 25 | assert version == cur_tag, ( 26 | f"Version in the VERSION file ({version}) should be the same as the current tag ({cur_tag})" 27 | ) 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | extend-exclude = ["data"] 3 | 4 | [tool.ruff.lint] 5 | select = ["E4", "E7", "E9", "F", "I", "T10", "CPY"] 6 | 7 | [tool.ruff.lint.isort] 8 | known-first-party = ["bugbug_http"] 9 | 10 | [tool.codespell] 11 | ignore-words-list = ["aFile", "thirdparty", "checkin"] 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | amqp==5.3.1 2 | beautifulsoup4==4.13.4 3 | boto3==1.38.27 4 | imbalanced-learn==0.13.0 5 | langchain==0.3.25 6 | langchain-anthropic==0.3.13 7 | langchain-community==0.3.24 8 | langchain-google-genai==2.1.5 9 | langchain-mistralai==0.2.10 10 | langchain-openai==0.3.18 11 | libmozdata==0.2.10 12 | llama-cpp-python==0.3.9 13 | lmdb==1.6.2 14 | lxml-html-clean==0.4.2 15 | markdown2==2.5.3 16 | matplotlib==3.10.1 17 | mercurial==7.0.2 18 | microannotate==0.0.24 19 | mozci==2.4.1 20 | numpy==2.0.2 21 | orjson==3.10.18 22 | ortools==9.12.4544 23 | pandas==2.2.3 24 | psutil==7.0.0 25 | pydriller==1.12 26 | pyOpenSSL>=0.14 # Could not find a version that satisfies the requirement pyOpenSSL>=0.14; extra == "security" (from requests[security]>=2.7.0->libmozdata==0.1.43) 27 | python-dateutil==2.9.0.post0 28 | python-hglib==2.6.2 29 | qdrant-client==1.14.2 30 | ratelimit==2.2.1 31 | requests==2.32.3 32 | requests-html==0.10.0 33 | rs_parsepatch==0.4.4 34 | scikit-learn==1.6.1 35 | scipy==1.15.2 36 | sendgrid==6.12.3 37 | shap[plots]==0.47.2 38 | tabulate==0.9.0 39 | taskcluster==84.0.2 40 | tenacity==9.1.2 41 | tqdm==4.67.1 42 | unidiff==0.7.5 43 | xgboost==2.1.4 44 | zstandard==0.23.0 45 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/backout_related_test_regressions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | import argparse 6 | import json 7 | from logging import INFO, basicConfig, getLogger 8 | 9 | from mozci.push import Push 10 | from tqdm import tqdm 11 | 12 | from bugbug import db, repository 13 | 14 | basicConfig(level=INFO) 15 | logger = getLogger(__name__) 16 | 17 | 18 | def go() -> None: 19 | assert db.download(repository.COMMITS_DB) 20 | 21 | backouts = [] 22 | backedouts = [] 23 | for commit in repository.get_commits(include_backouts=True): 24 | if commit["backedoutby"]: 25 | backouts.append(commit["node"]) 26 | if commit["backsout"]: 27 | backedouts += commit["backsout"] 28 | 29 | backouts = backouts[-100:] 30 | backedouts = backedouts[-100:] 31 | 32 | likely_label_count = 0 33 | possible_label_count = 0 34 | likely_group_count = 0 35 | possible_group_count = 0 36 | 37 | backout_regressions = {} 38 | 39 | for backout in tqdm(backouts): 40 | p = Push(backout) 41 | 42 | label_regressions = p.get_regressions("label") 43 | likely_label_count += len(p.get_likely_regressions("label")) 44 | possible_label_count += len(p.get_possible_regressions("label")) 45 | 46 | group_regressions = p.get_regressions("group") 47 | likely_group_count += len(p.get_likely_regressions("label")) 48 | possible_group_count += len(p.get_possible_regressions("label")) 49 | 50 | if len(label_regressions) > 0 or len(group_regressions) > 0: 51 | backout_regressions[backout] = { 52 | "label": label_regressions, 53 | "group": group_regressions, 54 | } 55 | 56 | logger.info("Likely labels for backouts: %d", likely_label_count) 57 | logger.info("Likely groups for backouts: %d", likely_group_count) 58 | logger.info("Possible labels for backouts: %d", possible_label_count) 59 | logger.info("Possible groups for backouts: %d", possible_group_count) 60 | 61 | backedout_regressions = {} 62 | 63 | for backedout in tqdm(backedouts): 64 | p = Push(backedout) 65 | 66 | label_regressions = p.get_regressions("label") 67 | group_regressions = p.get_regressions("group") 68 | 69 | if ( 70 | len(p.get_likely_regressions("label")) == 0 71 | or len(p.get_likely_regressions("group")) == 0 72 | ): 73 | backedout_regressions[backedout] = { 74 | "label": label_regressions, 75 | "group": group_regressions, 76 | } 77 | 78 | with open("backout_regressions.json", "w") as f: 79 | json.dump(backout_regressions, f) 80 | 81 | with open("backedout_regressions.json", "w") as f: 82 | json.dump(backedout_regressions, f) 83 | 84 | 85 | def main() -> None: 86 | description = ( 87 | "Find likely and possible test regressions of backouts and backed-out commits" 88 | ) 89 | parser = argparse.ArgumentParser(description=description) 90 | parser.parse_args() 91 | 92 | go() 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /scripts/bug_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import os 5 | from logging import INFO, basicConfig, getLogger 6 | 7 | import numpy as np 8 | import requests 9 | 10 | from bugbug import bugzilla, db 11 | from bugbug.models import get_model_class 12 | from bugbug.utils import download_model 13 | 14 | basicConfig(level=INFO) 15 | logger = getLogger(__name__) 16 | 17 | 18 | def classify_bugs(model_name: str, bug_id: int) -> None: 19 | model_file_name = f"{model_name}model" 20 | 21 | if not os.path.exists(model_file_name): 22 | logger.info("%s does not exist. Downloading the model....", model_file_name) 23 | try: 24 | download_model(model_name) 25 | except requests.HTTPError: 26 | logger.error( 27 | "A pre-trained model is not available, you will need to train it yourself using the trainer script" 28 | ) 29 | raise SystemExit(1) 30 | 31 | model_class = get_model_class(model_name) 32 | model = model_class.load(model_file_name) 33 | 34 | if bug_id: 35 | bugs = bugzilla.get(bug_id).values() 36 | assert bugs, f"A bug with a bug id of {bug_id} was not found" 37 | else: 38 | assert db.download(bugzilla.BUGS_DB) 39 | bugs = bugzilla.get_bugs() 40 | 41 | for bug in bugs: 42 | print( 43 | f"https://bugzilla.mozilla.org/show_bug.cgi?id={bug['id']} - {bug['summary']} " 44 | ) 45 | 46 | if model.calculate_importance: 47 | probas, importance = model.classify( 48 | bug, probabilities=True, importances=True 49 | ) 50 | 51 | model.print_feature_importances( 52 | importance["importances"], class_probabilities=probas 53 | ) 54 | else: 55 | probas = model.classify(bug, probabilities=True, importances=False) 56 | 57 | probability = probas[0] 58 | pred_index = np.argmax(probability) 59 | if len(probability) > 2: 60 | pred_class = model.le.inverse_transform([pred_index])[0] 61 | else: 62 | pred_class = "Positive" if pred_index == 1 else "Negative" 63 | print(f"{pred_class} {probability}") 64 | input() 65 | 66 | 67 | def main() -> None: 68 | description = "Perform evaluation on bugs using the specified model" 69 | parser = argparse.ArgumentParser(description=description) 70 | 71 | parser.add_argument("model", help="Which model to use for evaluation") 72 | parser.add_argument("--bug-id", help="Classify the given bug id", type=int) 73 | 74 | args = parser.parse_args() 75 | 76 | classify_bugs(args.model, args.bug_id) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /scripts/check.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import sys 5 | from logging import INFO, basicConfig, getLogger 6 | 7 | from bugbug.model import Model 8 | from bugbug.utils import download_model 9 | 10 | basicConfig(level=INFO) 11 | logger = getLogger(__name__) 12 | 13 | 14 | class ModelChecker: 15 | def go(self, model_name: str) -> None: 16 | # Load the model 17 | model = Model.load(download_model(model_name)) 18 | 19 | # Then call the check method of the model 20 | success = model.check() 21 | 22 | if not success: 23 | msg = f"Check of model {model.__class__!r} failed, check the output for reasons why" 24 | logger.warning(msg) 25 | sys.exit(1) 26 | 27 | 28 | def main() -> None: 29 | description = "Check the models" 30 | parser = argparse.ArgumentParser(description=description) 31 | 32 | parser.add_argument("model", help="Which model to check.") 33 | 34 | args = parser.parse_args() 35 | 36 | checker = ModelChecker() 37 | checker.go(args.model) 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /scripts/check_all_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import logging 8 | import os 9 | import subprocess 10 | from fnmatch import fnmatch 11 | from pathlib import Path 12 | 13 | import taskcluster 14 | 15 | from bugbug.utils import get_taskcluster_options 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | logging.basicConfig(level=logging.INFO) 20 | 21 | QUEUE_ROUTE_PATTERN = "index.project.bugbug.train_*.per_date.*" 22 | 23 | CURRENT_DIR = Path(__file__).resolve().parent 24 | 25 | 26 | def download_metric(model_name: str, metric_directory: str): 27 | download_script_path = "bugbug-retrieve-training-metrics" 28 | 29 | cli_args: list[str] = [ 30 | download_script_path, 31 | model_name, 32 | "2019", 33 | "-d", 34 | metric_directory, 35 | ] 36 | 37 | LOGGER.info("Download metrics for %r", model_name) 38 | 39 | subprocess.run(cli_args, check=True) 40 | 41 | 42 | def check_metrics(metric_directory: str, output_directory: str): 43 | analyze_script_path = "bugbug-analyze-training-metrics" 44 | 45 | cli_args: list[str] = [analyze_script_path, metric_directory, output_directory] 46 | 47 | LOGGER.info("Checking metrics") 48 | 49 | subprocess.run(cli_args, check=True) 50 | 51 | 52 | def get_model_name(queue, task_id: str): 53 | dependency_task = queue.task(task_id) 54 | 55 | # Check the route to detect training tasks 56 | for route in dependency_task["routes"]: 57 | if fnmatch(route, QUEUE_ROUTE_PATTERN): 58 | model_name = route.split(".")[4] # model_name = "train_component" 59 | return model_name[6:] 60 | 61 | # Show a warning if no matching route was found, this can happen when the 62 | # current task has a dependency to a non-training task or if the route 63 | # pattern changes. 64 | LOGGER.warning(f"No matching route found for task id {task_id}") 65 | 66 | 67 | def get_model_names(task_id: str) -> list[str]: 68 | options = get_taskcluster_options() 69 | queue = taskcluster.Queue(options) 70 | task = queue.task(task_id) 71 | 72 | model_names = [] 73 | 74 | for i, task_id in enumerate(task["dependencies"]): 75 | LOGGER.info( 76 | "Loading task dependencies {}/{} {}".format( 77 | i + 1, len(task["dependencies"]), task_id 78 | ) 79 | ) 80 | 81 | model_name = get_model_name(queue, task_id) 82 | 83 | if model_name: 84 | LOGGER.info("Adding model %r to download list", model_name) 85 | model_names.append(model_name) 86 | 87 | return model_names 88 | 89 | 90 | def main(): 91 | description = "Get all the metrics name from taskcluster dependency, download them and check them" 92 | parser = argparse.ArgumentParser(description=description) 93 | 94 | parser.add_argument( 95 | "metric_directory", 96 | metavar="metric-directory", 97 | help="Which directory to download metrics to", 98 | ) 99 | parser.add_argument( 100 | "output_directory", 101 | metavar="output-directory", 102 | help="Which directory to output graphs to", 103 | ) 104 | 105 | parser.add_argument( 106 | "--task-id", 107 | type=str, 108 | default=os.environ.get("TASK_ID"), 109 | help="Taskcluster task id to analyse", 110 | ) 111 | 112 | args = parser.parse_args() 113 | 114 | model_names = get_model_names(args.task_id) 115 | 116 | for model in model_names: 117 | download_metric(model, args.metric_directory) 118 | 119 | check_metrics(args.metric_directory, args.output_directory) 120 | 121 | 122 | if __name__ == "__main__": 123 | main() 124 | -------------------------------------------------------------------------------- /scripts/code_review_tool_evaluator_report.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | import pandas as pd 4 | 5 | from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file 6 | 7 | evaluation_results = pd.read_csv( 8 | get_latest_evaluation_results_file("../evaluation_results") 9 | ) 10 | 11 | # %% 12 | 13 | variant_names = evaluation_results["variant_name"].unique() 14 | variant_name = variant_names[0] 15 | 16 | df = evaluation_results[evaluation_results["variant_name"] == variant_name] 17 | 18 | 19 | # %% 20 | new_comments_count = df["new_comment"].count() 21 | new_valid_comments = len(df[~df["new_comment"].isna() & (df["evaluation"] == "VALID")]) 22 | new_invalid_comments = len( 23 | df[~df["new_comment"].isna() & (df["evaluation"] == "INVALID")] 24 | ) 25 | new_unevaluated_comments = len(df[~df["new_comment"].isna() & df["evaluation"].isna()]) 26 | 27 | old_comments_count = df["old_comments_count"].sum() 28 | old_valid_comments = df[df["evaluation"] == "VALID"]["old_comments_count"].sum() 29 | old_invalid_comments = df[df["evaluation"] == "INVALID"]["old_comments_count"].sum() 30 | 31 | matched_valid_comments = df[ 32 | ~df["new_comment"].isna() 33 | & ~df["old_comment"].isna() 34 | & (df["evaluation"] == "VALID") 35 | ]["old_comments_count"].sum() 36 | matched_invalid_comments = df[ 37 | ~df["new_comment"].isna() 38 | & ~df["old_comment"].isna() 39 | & (df["evaluation"] == "INVALID") 40 | ]["old_comments_count"].sum() 41 | 42 | print("--------------------") 43 | print("Variant Name:", variant_name) 44 | print("--------------------") 45 | print("New Comments:", new_comments_count) 46 | print("New Valid Comments:", new_valid_comments) 47 | print("New Invalid Comments:", new_invalid_comments) 48 | print("New Unevaluated Comments:", new_unevaluated_comments) 49 | print("--------------------") 50 | print("Old Comments:", old_comments_count) 51 | print("Old Valid Comments:", old_valid_comments) 52 | print("Old Invalid Comments:", old_invalid_comments) 53 | print("--------------------") 54 | print( 55 | "Recalled comments:", 56 | (matched_valid_comments + matched_invalid_comments) / old_comments_count * 100, 57 | ) 58 | print("Recalled valid comments:", matched_valid_comments / old_valid_comments * 100) 59 | print( 60 | "Recalled invalid comments:", matched_invalid_comments / old_invalid_comments * 100 61 | ) 62 | print("--------------------") 63 | print( 64 | "Missed valid comments:", 65 | (old_valid_comments - matched_valid_comments) / old_valid_comments * 100, 66 | ) 67 | print( 68 | "Missed invalid comments:", 69 | (old_invalid_comments - matched_invalid_comments) / old_invalid_comments * 100, 70 | ) 71 | 72 | 73 | # %% 74 | 75 | 76 | df = evaluation_results[ 77 | evaluation_results["evaluation"].isin(["VALID", "INVALID"]) 78 | & ~evaluation_results["new_comment"].isna() 79 | ].sort_values(by=["evaluation", "revision_id", "new_comment"]) 80 | 81 | 82 | df["id"] = df["diff_id"].astype(str) + " | " + df["new_comment"].astype(str) 83 | df_old = df[df["variant_name"] == variant_names[0]] 84 | df_new = df[df["variant_name"] == variant_names[1]] 85 | 86 | in_new_but_not_in_old = df_new[~df_new["id"].isin(df_old["id"])] 87 | 88 | print( 89 | "Examples of comments that were filtered by the old version but were not filtered by the new version:\n" 90 | ) 91 | print( 92 | in_new_but_not_in_old[["revision_id", "new_comment", "evaluation"]].to_markdown( 93 | index=False 94 | ) 95 | ) 96 | 97 | 98 | in_old_but_not_in_new = df_old[~df_old["id"].isin(df_new["id"])] 99 | print( 100 | "\n\nExamples of comments that were filtered by the new version but were not filtered by the old version:\n" 101 | ) 102 | print( 103 | in_old_but_not_in_new[["revision_id", "new_comment", "evaluation"]].to_markdown( 104 | index=False 105 | ) 106 | ) 107 | 108 | # %% 109 | -------------------------------------------------------------------------------- /scripts/code_review_tool_runner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import sys 8 | 9 | from bugbug import generative_model_tool 10 | from bugbug.code_search.function_search import function_search_classes 11 | from bugbug.tools import code_review 12 | from bugbug.vectordb import QdrantVectorDB 13 | 14 | 15 | def run(args) -> None: 16 | llm = generative_model_tool.create_llm_from_args(args) 17 | 18 | function_search = ( 19 | function_search_classes[args.function_search_type]() 20 | if args.function_search_type is not None 21 | else None 22 | ) 23 | vector_db = QdrantVectorDB("diff_comments") 24 | review_comments_db = code_review.ReviewCommentsDB(vector_db) 25 | code_review_tool = code_review.CodeReviewTool( 26 | [llm], 27 | llm, 28 | function_search=function_search, 29 | review_comments_db=review_comments_db, 30 | show_patch_example=False, 31 | ) 32 | 33 | review_data = code_review.review_data_classes[args.review_platform]() 34 | 35 | revision = review_data.get_review_request_by_id(args.review_request_id) 36 | patch = review_data.get_patch_by_id(revision.patch_id) 37 | 38 | print(patch) 39 | print(code_review_tool.run(patch)) 40 | input() 41 | 42 | 43 | def parse_args(args): 44 | parser = argparse.ArgumentParser( 45 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 46 | ) 47 | parser.add_argument( 48 | "--review_platform", 49 | help="Review platform", 50 | choices=list(code_review.review_data_classes.keys()), 51 | ) 52 | parser.add_argument( 53 | "--review_request_id", 54 | help="Review request ID", 55 | ) 56 | generative_model_tool.create_llm_to_args(parser) 57 | parser.add_argument( 58 | "--function_search_type", 59 | help="Function search tool", 60 | choices=list(function_search_classes.keys()), 61 | ) 62 | return parser.parse_args(args) 63 | 64 | 65 | if __name__ == "__main__": 66 | args = parse_args(sys.argv[1:]) 67 | run(args) 68 | -------------------------------------------------------------------------------- /scripts/comment_level_labeler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import csv 8 | import os 9 | import random 10 | 11 | from bugbug import bugzilla 12 | from bugbug.models.bug import BugModel 13 | from bugbug.models.regression import RegressionModel 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument( 17 | "--goal", 18 | help="Goal of the labeler", 19 | choices=["str", "regressionrange"], 20 | default="str", 21 | ) 22 | args = parser.parse_args() 23 | 24 | if args.goal == "str": 25 | model = BugModel.load("bugmodel") 26 | elif args.goal == "regressionrange": 27 | model = RegressionModel.load("regressionmodel") 28 | 29 | file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv") 30 | 31 | with open(file_path, "r") as f: 32 | reader = csv.reader(f) 33 | next(reader) 34 | labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader] 35 | 36 | already_done = set((c[0], c[1]) for c in labeled_comments) 37 | 38 | bugs = [] 39 | for bug in bugzilla.get_bugs(): 40 | # For the str and regressionrange problems, we don't care about test failures, 41 | if ( 42 | "intermittent-failure" in bug["keywords"] 43 | or "stockwell" in bug["whiteboard"] 44 | or "permafail" in bug["summary"].lower() 45 | ): 46 | continue 47 | 48 | # bugs filed from Socorro, 49 | if ( 50 | "this bug was filed from the socorro interface" 51 | in bug["comments"][0]["text"].lower() 52 | ): 53 | continue 54 | 55 | # and fuzzing bugs. 56 | if "fuzzing" in bug["comments"][0]["text"].lower(): 57 | continue 58 | 59 | bugs.append(bug) 60 | 61 | random.shuffle(bugs) 62 | 63 | for bug in bugs: 64 | # Only show bugs that are really bugs/regressions for labeling. 65 | c = model.classify(bug) 66 | if c != 1: 67 | continue 68 | 69 | v = None 70 | 71 | for i, comment in enumerate(bug["comments"]): 72 | if (bug["id"], i) in already_done: 73 | continue 74 | 75 | os.system("clear") 76 | print(f"Bug {bug['id']} - {bug['summary']}") 77 | print(f"Comment {i}") 78 | print(comment["text"]) 79 | 80 | if args.goal == "str": 81 | print( 82 | "\nY for comment containing STR, N for comment not containing STR, K to skip, E to exit" 83 | ) 84 | elif args.goal == "regressionrange": 85 | print( 86 | "\nY for comment containing regression range, N for comment not containing regression range, K to skip, E to exit" 87 | ) 88 | v = input() 89 | 90 | if v in ["e", "k"]: 91 | break 92 | 93 | if v in ["y", "n"]: 94 | labeled_comments.append((bug["id"], i, v)) 95 | 96 | if v not in ["e", "k"]: 97 | with open(file_path, "w") as f: 98 | writer = csv.writer(f) 99 | writer.writerow(["bug_id", "comment_num", f"has_{args.goal}"]) 100 | writer.writerows(sorted(labeled_comments)) 101 | 102 | print("\nE to exit, anything else to continue") 103 | v = input() 104 | 105 | if v == "e": 106 | break 107 | -------------------------------------------------------------------------------- /scripts/comment_resolver_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | 5 | from dotenv import load_dotenv 6 | 7 | import bugbug.db as db 8 | import bugbug.phabricator as phabricator 9 | from bugbug.generative_model_tool import create_llm_from_args 10 | from bugbug.tools.comment_resolver import ( 11 | CodeGeneratorTool, 12 | FixCommentDB, 13 | LocalQdrantVectorDB, 14 | generate_fixes, 15 | generate_individual_fix, 16 | ) 17 | 18 | 19 | def run(args) -> None: 20 | load_dotenv() 21 | 22 | logging.basicConfig(level=logging.INFO) 23 | 24 | db = FixCommentDB(LocalQdrantVectorDB(collection_name="fix_comments")) 25 | 26 | if args.create_db: 27 | db.db.delete_collection() 28 | db.db.setup() 29 | db.upload_dataset(args.dataset_file) 30 | 31 | llm = create_llm_from_args(args) 32 | llm_tool = CodeGeneratorTool(llm=llm, db=db) 33 | 34 | if args.revision_id and args.diff_id and args.comment_id: 35 | pass 36 | # TODO: Create this function 37 | generate_individual_fix( 38 | llm_tool=llm_tool, 39 | db=db, 40 | revision_id=args.revision_id, 41 | diff_id=args.diff_id, 42 | comment_id=args.comment_id, 43 | ) 44 | else: 45 | generate_fixes( 46 | llm_tool=llm_tool, 47 | db=db, 48 | generation_limit=args.generation_limit, 49 | prompt_types=args.prompt_types, 50 | hunk_sizes=args.hunk_sizes, 51 | diff_length_limits=args.diff_length_limits, 52 | output_csv=args.output_csv, 53 | ) 54 | 55 | 56 | def parse_args(args): 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument( 59 | "--llm", 60 | help="LLM", 61 | choices=["openai"], 62 | default="openai", 63 | ) 64 | parser.add_argument( 65 | "--create-db", 66 | action="store_true", 67 | help="If set, the local Qdrant database will be created and populated.", 68 | ) 69 | parser.add_argument( 70 | "--dataset-file", 71 | type=str, 72 | default="data/fixed_comments.json", 73 | help="Dataset file to upload as Qdrant database.", 74 | ) 75 | parser.add_argument( 76 | "--output-csv", 77 | type=str, 78 | default="metrics_results.csv", 79 | help="Output CSV file for results.", 80 | ) 81 | parser.add_argument( 82 | "--prompt-types", 83 | nargs="+", 84 | default=["zero-shot"], 85 | help="Types of prompts to use.", 86 | ) 87 | parser.add_argument( 88 | "--diff-length-limits", 89 | nargs="+", 90 | type=int, 91 | default=[1000], 92 | help="Diff length limits to enforce when searching for examples.", 93 | ) 94 | parser.add_argument( 95 | "--hunk-sizes", 96 | nargs="+", 97 | type=int, 98 | default=[20], 99 | help="Hunk sizes to enforce when searching for examples.", 100 | ) 101 | parser.add_argument( 102 | "--generation-limit", 103 | type=int, 104 | default=100, 105 | help="Maximum number of generations.", 106 | ) 107 | parser.add_argument( 108 | "--revision-id", 109 | type=int, 110 | help="Revision ID for individual fix generation.", 111 | ) 112 | parser.add_argument( 113 | "--diff-id", 114 | type=int, 115 | help="Diff ID for individual fix generation.", 116 | ) 117 | parser.add_argument( 118 | "--comment-id", 119 | type=int, 120 | help="Comment ID for individual fix generation.", 121 | ) 122 | 123 | return parser.parse_args(args) 124 | 125 | 126 | if __name__ == "__main__": 127 | db.download(phabricator.FIXED_COMMENTS_DB) 128 | args = parse_args(sys.argv[1:]) 129 | run(args) 130 | -------------------------------------------------------------------------------- /scripts/commit_retriever.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import os 5 | from logging import INFO, basicConfig, getLogger 6 | 7 | import hglib 8 | 9 | from bugbug import db, repository 10 | from bugbug.utils import create_tar_zst, zstd_compress 11 | 12 | basicConfig(level=INFO) 13 | logger = getLogger(__name__) 14 | 15 | 16 | class Retriever(object): 17 | def __init__(self, cache_root): 18 | assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir." 19 | self.repo_dir = os.path.join(cache_root, "mozilla-central") 20 | 21 | def retrieve_commits(self, limit): 22 | repository.clone(self.repo_dir) 23 | 24 | if limit: 25 | # Mercurial revset supports negative integers starting from tip 26 | rev_start = -limit 27 | else: 28 | db.download(repository.COMMITS_DB, support_files_too=True) 29 | 30 | rev_start = 0 31 | for commit in repository.get_commits(): 32 | rev_start = f"children({commit['node']})" 33 | 34 | with hglib.open(self.repo_dir) as hg: 35 | revs = repository.get_revs(hg, rev_start) 36 | 37 | chunk_size = 70000 38 | 39 | for i in range(0, len(revs), chunk_size): 40 | repository.download_commits(self.repo_dir, revs=revs[i : (i + chunk_size)]) 41 | 42 | logger.info("commit data extracted from repository") 43 | 44 | # Some commits that were already in the DB from the previous run might need 45 | # to be updated (e.g. coverage information). 46 | repository.update_commits() 47 | 48 | zstd_compress(repository.COMMITS_DB) 49 | create_tar_zst(os.path.join("data", repository.COMMIT_EXPERIENCES_DB)) 50 | 51 | 52 | def main(): 53 | description = "Retrieve and extract the information from Mozilla-Central repository" 54 | parser = argparse.ArgumentParser(description=description) 55 | 56 | parser.add_argument( 57 | "--limit", 58 | type=int, 59 | help="Only download the N oldest commits, used mainly for integration tests", 60 | ) 61 | parser.add_argument("cache-root", help="Cache for repository clones.") 62 | 63 | args = parser.parse_args() 64 | 65 | retriever = Retriever(getattr(args, "cache-root")) 66 | 67 | retriever.retrieve_commits(args.limit) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /scripts/compatibility_report_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import os 5 | from logging import INFO, basicConfig, getLogger 6 | 7 | import numpy as np 8 | import requests 9 | 10 | from bugbug.models import get_model_class 11 | from bugbug.utils import download_model 12 | 13 | basicConfig(level=INFO) 14 | logger = getLogger(__name__) 15 | 16 | 17 | def classify_reports(model_name: str, report_text: str) -> None: 18 | model_file_name = f"{model_name}model" 19 | 20 | if not os.path.exists(model_file_name): 21 | logger.info("%s does not exist. Downloading the model....", model_file_name) 22 | try: 23 | download_model(model_name) 24 | except requests.HTTPError: 25 | logger.error( 26 | "A pre-trained model is not available, you will need to train it yourself using the trainer script" 27 | ) 28 | raise SystemExit(1) 29 | 30 | model_class = get_model_class(model_name) 31 | model = model_class.load(model_file_name) 32 | 33 | logger.info("%s", report_text) 34 | 35 | report = {"body": report_text, "title": ""} 36 | 37 | if model.calculate_importance: 38 | probas, importance = model.classify( 39 | report, probabilities=True, importances=True 40 | ) 41 | 42 | model.print_feature_importances( 43 | importance["importances"], class_probabilities=probas 44 | ) 45 | else: 46 | probas = model.classify(report, probabilities=True, importances=False) 47 | 48 | probability = probas[0] 49 | pred_index = np.argmax(probability) 50 | if len(probability) > 2: 51 | pred_class = model.le.inverse_transform([pred_index])[0] 52 | else: 53 | pred_class = "Positive" if pred_index == 1 else "Negative" 54 | logger.info("%s %s", pred_class, probability) 55 | input() 56 | 57 | 58 | def main() -> None: 59 | description = "Perform evaluation of user report using the specified model" 60 | parser = argparse.ArgumentParser(description=description) 61 | 62 | parser.add_argument("model", type=str, help="Which model to use for evaluation") 63 | parser.add_argument("--report-text", help="Report text to classify", type=str) 64 | 65 | args = parser.parse_args() 66 | 67 | classify_reports(args.model, args.report_text) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /scripts/generate_sheet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import csv 5 | import os 6 | from datetime import datetime, timedelta 7 | from logging import INFO, basicConfig, getLogger 8 | 9 | import numpy as np 10 | 11 | from bugbug import bugzilla 12 | from bugbug.models import get_model_class 13 | 14 | basicConfig(level=INFO) 15 | logger = getLogger(__name__) 16 | 17 | 18 | def generate_sheet(model_name: str, token: str, days: int, threshold: float) -> None: 19 | model_file_name = f"{model_name}model" 20 | 21 | assert os.path.exists(model_file_name), ( 22 | f"{model_file_name} does not exist. Train the model with trainer.py first." 23 | ) 24 | 25 | model_class = get_model_class(model_name) 26 | model = model_class.load(model_file_name) 27 | 28 | bugzilla.set_token(token) 29 | bug_ids = bugzilla.get_ids_between(datetime.utcnow() - timedelta(days)) 30 | bugs = bugzilla.get(bug_ids) 31 | 32 | logger.info("Classifying %d bugs...", len(bugs)) 33 | 34 | rows = [["Bug", f"{model_name}(model)", model_name, "Title"]] 35 | 36 | for bug in bugs.values(): 37 | p = model.classify(bug, probabilities=True) 38 | probability = p[0] 39 | if len(probability) > 2: 40 | index = np.argmax(probability) 41 | prediction = model.class_names[index] 42 | else: 43 | prediction = "y" if probability[1] >= threshold else "n" 44 | 45 | rows.append( 46 | [ 47 | f"https://bugzilla.mozilla.org/show_bug.cgi?id={bug['id']}", 48 | prediction, 49 | "", 50 | bug["summary"], 51 | ] 52 | ) 53 | 54 | os.makedirs("sheets", exist_ok=True) 55 | with open( 56 | os.path.join( 57 | "sheets", 58 | f"{model_name}-{datetime.utcnow().strftime('%Y-%m-%d')}-labels.csv", 59 | ), 60 | "w", 61 | ) as f: 62 | writer = csv.writer(f) 63 | writer.writerows(rows) 64 | 65 | 66 | def main() -> None: 67 | description = "Perform evaluation on bugs from specified days back on the specified model and generate a csv file " 68 | parser = argparse.ArgumentParser(description=description) 69 | 70 | parser.add_argument("model", help="Which model to generate a csv for.") 71 | parser.add_argument("token", help="Bugzilla token") 72 | parser.add_argument( 73 | "days", 74 | type=int, 75 | default=7, 76 | help="No. of days back from which bugs will be evaluated", 77 | ) 78 | parser.add_argument( 79 | "threshold", type=float, default=0.7, help="Confidence threshold for the model" 80 | ) 81 | 82 | args = parser.parse_args() 83 | 84 | generate_sheet(args.model, args.token, args.days, args.threshold) 85 | 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /scripts/get_type_labels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import csv 8 | import sys 9 | 10 | import requests 11 | 12 | 13 | def parse_args(args): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument( 16 | "--types", 17 | help="Types to retrieve", 18 | default=["defect", "enhancement", "task"], 19 | nargs="*", 20 | ) 21 | return parser.parse_args(args) 22 | 23 | 24 | def main(args): 25 | params = { 26 | "columnlist": "bug_type", 27 | "order": "bug_id", 28 | "j_top": "OR", 29 | "f1": "bug_type", 30 | "o1": "everchanged", 31 | "f2": "OP", 32 | "f3": "bug_type", 33 | "o3": "anyexact", 34 | "v3": "task,enhancement", 35 | "f4": "bug_id", 36 | "o4": "greaterthan", 37 | "v4": 1540807, 38 | "f5": "CP", 39 | "ctype": "csv", 40 | } 41 | 42 | r = requests.get("https://bugzilla.mozilla.org/buglist.cgi", params=params) 43 | r.raise_for_status() 44 | 45 | with open("bugbug/labels/defect_enhancement_task_h.csv", "r") as f: 46 | reader = csv.reader(f) 47 | headers = next(reader) 48 | bug_type_map = {int(row[0]): row[1] for row in reader} 49 | 50 | # We add to our csv both labels that were changed, and labels that are in 51 | # the list of requested types. 52 | reader = csv.reader(r.text.splitlines()) 53 | next(reader) 54 | for row in reader: 55 | if int(row[0]) in bug_type_map or row[1] in args.types: 56 | bug_type_map[int(row[0])] = row[1] 57 | 58 | with open("bugbug/labels/defect_enhancement_task_h.csv", "w") as f: 59 | writer = csv.writer(f) 60 | writer.writerow(headers) 61 | writer.writerows(sorted(bug_type_map.items())) 62 | 63 | 64 | if __name__ == "__main__": 65 | main(parse_args(sys.argv[1:])) 66 | -------------------------------------------------------------------------------- /scripts/github_issue_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import os 5 | from logging import INFO, basicConfig, getLogger 6 | 7 | import numpy as np 8 | import requests 9 | 10 | from bugbug import db 11 | from bugbug.github import Github 12 | from bugbug.models import get_model_class 13 | from bugbug.utils import download_model 14 | 15 | basicConfig(level=INFO) 16 | logger = getLogger(__name__) 17 | 18 | 19 | def classify_issues( 20 | owner: str, repo: str, retrieve_events: bool, model_name: str, issue_number: int 21 | ) -> None: 22 | model_file_name = f"{model_name}model" 23 | 24 | if not os.path.exists(model_file_name): 25 | logger.info("%s does not exist. Downloading the model....", model_file_name) 26 | try: 27 | download_model(model_name) 28 | except requests.HTTPError: 29 | logger.error( 30 | "A pre-trained model is not available, you will need to train it yourself using the trainer script" 31 | ) 32 | raise SystemExit(1) 33 | 34 | model_class = get_model_class(model_name) 35 | model = model_class.load(model_file_name) 36 | 37 | github = Github( 38 | owner=owner, repo=repo, state="all", retrieve_events=retrieve_events 39 | ) 40 | 41 | if issue_number: 42 | issues = iter( 43 | [github.fetch_issue_by_number(owner, repo, issue_number, retrieve_events)] 44 | ) 45 | assert issues, f"An issue with a number of {issue_number} was not found" 46 | else: 47 | assert db.download(github.db_path) 48 | issues = github.get_issues() 49 | 50 | for issue in issues: 51 | logger.info("%s - %s ", issue["url"], issue["title"]) 52 | 53 | if model.calculate_importance: 54 | probas, importance = model.classify( 55 | issue, probabilities=True, importances=True 56 | ) 57 | 58 | model.print_feature_importances( 59 | importance["importances"], class_probabilities=probas 60 | ) 61 | else: 62 | probas = model.classify(issue, probabilities=True, importances=False) 63 | 64 | probability = probas[0] 65 | pred_index = np.argmax(probability) 66 | if len(probability) > 2: 67 | pred_class = model.le.inverse_transform([pred_index])[0] 68 | else: 69 | pred_class = "Positive" if pred_index == 1 else "Negative" 70 | logger.info("%s %s", pred_class, probability) 71 | input() 72 | 73 | 74 | def main() -> None: 75 | description = "Perform evaluation on github issues using the specified model" 76 | parser = argparse.ArgumentParser(description=description) 77 | 78 | parser.add_argument("model", type=str, help="Which model to use for evaluation") 79 | parser.add_argument( 80 | "--owner", 81 | help="GitHub repository owner.", 82 | type=str, 83 | required=True, 84 | ) 85 | parser.add_argument( 86 | "--repo", 87 | help="GitHub repository name.", 88 | type=str, 89 | required=True, 90 | ) 91 | parser.add_argument( 92 | "--retrieve-events", 93 | action="store_true", 94 | help="Whether to retrieve events for each issue.", 95 | ) 96 | 97 | parser.add_argument( 98 | "--issue-number", help="Classify the given github issue by number", type=int 99 | ) 100 | 101 | args = parser.parse_args() 102 | 103 | classify_issues( 104 | args.owner, args.repo, args.retrieve_events, args.model, args.issue_number 105 | ) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | -------------------------------------------------------------------------------- /scripts/integration_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euox pipefail 3 | 4 | # Script that runs the whole data pipeline as fast as possible to validate 5 | # that every part is working with the others 6 | 7 | # Supposed to be run from the repository root directory 8 | 9 | # Remove the models and any old data 10 | rm defectenhancementtaskmodel* || true; 11 | rm backout* || true; 12 | rm -Rf data || true; 13 | 14 | ls -lh 15 | 16 | # First retrieve a subset of bug data 17 | bugbug-data-bugzilla --limit 500 18 | ls -lh 19 | ls -lh data 20 | 21 | # The bug data force download the commit DB 22 | # Removes it to ensure the commit retrieval work as expected 23 | rm data/commit* 24 | 25 | # Then generate a test dataset of fixed inline comments 26 | bugbug-fixed-comments --limit 150 27 | ls -lh 28 | ls -lh data 29 | 30 | # Remove DB to ensure it works as expected 31 | rm data/fixed_comments.json 32 | 33 | # Then retrieve a subset of commit data 34 | bugbug-data-commits --limit 500 "${CACHE_DIR:-cache}" 35 | test -d ${CACHE_DIR:-cache}/mozilla-central 36 | ls -lh 37 | ls -lh data 38 | 39 | 40 | # Then train a bug model 41 | bugbug-train defectenhancementtask --limit 500 --no-download 42 | 43 | # Then train a commit model 44 | # FIXME: Disabled temporary due to a problem in identifying backout comments 45 | # See: https://github.com/mozilla/bugbug/issues/5020#issuecomment-2884394426 46 | # bugbug-train backout --limit 30000 --no-download 47 | 48 | # Then spin the http service up 49 | # This part duplicates the http service Dockerfiles because we cannot easily spin Docker containers 50 | # up on Taskcluster 51 | cp VERSION http_service/VERSION 52 | pip install --disable-pip-version-check --quiet --no-cache-dir ./http_service 53 | 54 | export REDIS_URL=redis://localhost:6379/4 55 | 56 | # Start Redis 57 | redis-server >/dev/null 2>&1 & 58 | redis_pid=$! 59 | 60 | sleep 1 61 | 62 | # Uncomment following line to clean up the redis-server 63 | redis-cli -n 4 FLUSHDB 64 | 65 | # Start the http server 66 | gunicorn -b 127.0.0.1:8000 bugbug_http.app --preload --timeout 30 -w 3 & 67 | gunicorn_pid=$! 68 | 69 | # Start the background worker 70 | env BUGBUG_ALLOW_MISSING_MODELS=1 BUGBUG_REPO_DIR=${CACHE_DIR:-cache}/mozilla-central bugbug-http-worker high default low & 71 | worker_pid=$! 72 | 73 | # Ensure we take down the containers at the end 74 | trap 'kill $gunicorn_pid && kill $worker_pid && kill $redis_pid' EXIT 75 | 76 | # Then check that we can correctly classify a bug 77 | sleep 10 && python http_service/tests/test_integration.py 78 | -------------------------------------------------------------------------------- /scripts/maintenance_effectiveness_indicator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import argparse 7 | import math 8 | from logging import INFO, basicConfig, getLogger 9 | 10 | import dateutil.parser 11 | 12 | from bugbug import bugzilla 13 | from bugbug.utils import get_secret 14 | 15 | basicConfig(level=INFO) 16 | logger = getLogger(__name__) 17 | 18 | 19 | def main() -> None: 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument("teams", help="Bugzilla team", type=str, nargs="+") 22 | parser.add_argument( 23 | "start_date", 24 | help="Start date of the period (YYYY-MM-DD)", 25 | type=str, 26 | ) 27 | parser.add_argument( 28 | "end_date", 29 | help="End date of the period (YYYY-MM-DD)", 30 | type=str, 31 | ) 32 | parser.add_argument( 33 | "--components", 34 | help="Bugzilla components", 35 | type=str, 36 | nargs="*", 37 | ) 38 | 39 | args = parser.parse_args() 40 | 41 | # Try to use a Bugzilla API key if available. 42 | try: 43 | bugzilla.set_token(get_secret("BUGZILLA_TOKEN")) 44 | except ValueError: 45 | logger.info( 46 | "If you want to include security bugs too, please set the BUGBUG_BUGZILLA_TOKEN environment variable to your Bugzilla API key." 47 | ) 48 | 49 | result = bugzilla.calculate_maintenance_effectiveness_indicator( 50 | args.teams, 51 | dateutil.parser.parse(args.start_date), 52 | dateutil.parser.parse(args.end_date), 53 | args.components, 54 | ) 55 | 56 | for factor, value in result["stats"].items(): 57 | print("%s: %d" % (factor, round(value, 2) if value != math.inf else value)) 58 | 59 | for query, link in result["queries"].items(): 60 | print(f"{query}: {link}") 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /scripts/review_comments_retriever.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | from bugbug.tools.code_review import PhabricatorReviewData, ReviewCommentsDB 8 | from bugbug.vectordb import QdrantVectorDB 9 | 10 | 11 | def main(): 12 | review_data = PhabricatorReviewData() 13 | vector_db = QdrantVectorDB("diff_comments") 14 | vector_db.setup() 15 | comments_db = ReviewCommentsDB(vector_db) 16 | # TODO: support resuming from where last run left off. We should run it from 17 | # scratch only once. Following runs should add only new comments. 18 | comments_db.add_comments_by_hunk(review_data.retrieve_comments_with_hunks()) 19 | 20 | 21 | if __name__ == "__main__": 22 | main() 23 | -------------------------------------------------------------------------------- /scripts/revision_retriever.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | from datetime import datetime, timezone 5 | from logging import getLogger 6 | 7 | import dateutil.parser 8 | from dateutil.relativedelta import relativedelta 9 | 10 | from bugbug import bugzilla, db, phabricator, repository 11 | from bugbug.utils import get_secret, zstd_compress 12 | 13 | logger = getLogger(__name__) 14 | 15 | 16 | class Retriever(object): 17 | def retrieve_revisions( 18 | self, 19 | limit_months: int = 2, 20 | limit_count: int | None = None, 21 | ) -> None: 22 | """Retrieve revisions from Phabricator. 23 | 24 | Args: 25 | limit_months: The number of months to go back in time to retrieve 26 | revisions. The limit is based on bugs last activity date and 27 | commits push date. 28 | limit_count: Only download the N oldest revisions, used mainly for 29 | integration tests. 30 | """ 31 | phabricator.set_api_key( 32 | get_secret("PHABRICATOR_URL"), get_secret("PHABRICATOR_TOKEN") 33 | ) 34 | 35 | db.download(phabricator.REVISIONS_DB) 36 | 37 | # Get the commits DB, as we need it to get the revision IDs linked to recent commits. 38 | assert db.download(repository.COMMITS_DB) 39 | 40 | # Get the bugs DB, as we need it to get the revision IDs linked to bugs. 41 | assert db.download(bugzilla.BUGS_DB) 42 | 43 | phabricator.download_modified_revisions() 44 | 45 | # Get IDs of revisions linked to commits. 46 | start_date = datetime.now(timezone.utc) - relativedelta(months=limit_months) 47 | revision_ids = list( 48 | ( 49 | filter( 50 | None, 51 | ( 52 | repository.get_revision_id(commit) 53 | for commit in repository.get_commits() 54 | if dateutil.parser.parse(commit["pushdate"]).replace( 55 | tzinfo=timezone.utc 56 | ) 57 | >= start_date 58 | ), 59 | ) 60 | ) 61 | ) 62 | 63 | # Get IDs of revisions linked to bugs. 64 | for bug in bugzilla.get_bugs(): 65 | if dateutil.parser.parse(bug["last_change_time"]) < start_date: 66 | continue 67 | 68 | revision_ids += bugzilla.get_revision_ids(bug) 69 | 70 | if limit_count is not None: 71 | revision_ids = revision_ids[-limit_count:] 72 | 73 | phabricator.download_revisions(revision_ids) 74 | 75 | zstd_compress(phabricator.REVISIONS_DB) 76 | 77 | 78 | def main() -> None: 79 | description = "Retrieve revisions from Phabricator" 80 | parser = argparse.ArgumentParser(description=description) 81 | parser.add_argument( 82 | "--limit-months", 83 | type=int, 84 | default=24, 85 | help="The number of months to go back in time to retrieve revisions.", 86 | ) 87 | parser.add_argument( 88 | "--limit", 89 | type=int, 90 | help="Only download the N oldest revisions, used mainly for integration tests", 91 | ) 92 | 93 | # Parse args to show the help if `--help` is passed 94 | args = parser.parse_args() 95 | 96 | retriever = Retriever() 97 | retriever.retrieve_revisions(args.limit_months, args.limit) 98 | 99 | 100 | if __name__ == "__main__": 101 | main() 102 | -------------------------------------------------------------------------------- /scripts/trainer_extract_args.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import logging 7 | import os 8 | import re 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def get_model_name() -> str | None: 15 | pr_description = os.environ.get("PR_DESCRIPTION") 16 | if not pr_description: 17 | logger.error("The PR_DESCRIPTION environment variable does not exist") 18 | return None 19 | 20 | match = re.search(r"Train on Taskcluster:\s+([a-z_1-9]+)", pr_description) 21 | if not match: 22 | logger.error( 23 | "Could not identify the model name using the 'Train on Taskcluster' keyword from the Pull Request description" 24 | ) 25 | return None 26 | 27 | model_name = match.group(1) 28 | 29 | return model_name 30 | 31 | 32 | def main(): 33 | model = get_model_name() 34 | if model: 35 | print(model) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | 8 | from setuptools import find_packages, setup 9 | 10 | here = os.path.dirname(__file__) 11 | 12 | 13 | def read_requirements(file_): 14 | with open(os.path.join(here, file_)) as f: 15 | return sorted(list(set(line.split("#")[0].strip() for line in f))) 16 | 17 | 18 | install_requires = read_requirements("requirements.txt") 19 | 20 | 21 | with open(os.path.join(here, "VERSION")) as f: 22 | version = f.read().strip() 23 | 24 | # Read the extra requirements 25 | extras = ["nlp", "nn"] 26 | 27 | extras_require = {} 28 | 29 | for extra in extras: 30 | extras_require[extra] = read_requirements("extra-%s-requirements.txt" % extra) 31 | 32 | 33 | setup( 34 | name="bugbug", 35 | version=version, 36 | description="ML tools for Mozilla projects", 37 | author="Marco Castelluccio", 38 | author_email="mcastelluccio@mozilla.com", 39 | install_requires=install_requires, 40 | extras_require=extras_require, 41 | packages=find_packages(exclude=["contrib", "docs", "tests"]), 42 | include_package_data=True, 43 | license="MPL2", 44 | entry_points={ 45 | "console_scripts": [ 46 | "bugbug-data-commits = scripts.commit_retriever:main", 47 | "bugbug-data-bugzilla = scripts.bug_retriever:main", 48 | "bugbug-data-test-scheduling-history = scripts.test_scheduling_history_retriever:main", 49 | "bugbug-data-revisions = scripts.revision_retriever:main", 50 | "bugbug-train = scripts.trainer:main", 51 | "bugbug-check = scripts.check:main", 52 | "bugbug-maintenance-effectiveness-indicator = scripts.maintenance_effectiveness_indicator:main", 53 | "bugbug-microannotate-generate = scripts.microannotate_generator:main", 54 | "bugbug-classify-commit = scripts.commit_classifier:main", 55 | "bugbug-classify-bug = scripts.bug_classifier:main", 56 | "bugbug-regressor-finder = scripts.regressor_finder:main", 57 | "bugbug-retrieve-training-metrics = scripts.retrieve_training_metrics:main", 58 | "bugbug-analyze-training-metrics = scripts.analyze_training_metrics:main", 59 | "bugbug-check-all-metrics = scripts.check_all_metrics:main", 60 | "bugbug-past-bugs-by-unit = scripts.past_bugs_by_unit:main", 61 | "bugbug-testing-policy-stats = scripts.testing_policy_stats:main", 62 | "bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main", 63 | "bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main", 64 | "bugbug-data-github = scripts.github_issue_retriever:main", 65 | "bugbug-fixed-comments = scripts.inline_comments_data_collection:main", 66 | ] 67 | }, 68 | classifiers=[ 69 | "Programming Language :: Python :: 3.10", 70 | "Programming Language :: Python :: 3.9", 71 | "Programming Language :: Python :: 3 :: Only", 72 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 73 | ], 74 | ) 75 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==7.8.2 2 | hypothesis==6.135.0 3 | igraph==0.11.8 4 | jsonschema==4.24.0 5 | pre-commit==4.2.0 6 | pytest==8.3.5 7 | pytest-cov==6.1.1 8 | pytest-responses==0.5.1 9 | responses==0.21.0 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | import shutil 8 | 9 | import pytest 10 | import zstandard 11 | 12 | from bugbug import bugzilla, repository 13 | 14 | FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures") 15 | 16 | 17 | @pytest.fixture(autouse=True) 18 | def mock_data(tmp_path): 19 | os.mkdir(tmp_path / "data") 20 | 21 | DBs = [ 22 | os.path.basename(bugzilla.BUGS_DB), 23 | os.path.basename(repository.COMMITS_DB), 24 | os.path.basename("data/github_webcompat_web-bugs_issues.json"), 25 | ] 26 | 27 | for f in DBs: 28 | shutil.copyfile(os.path.join(FIXTURES_DIR, f), tmp_path / "data" / f) 29 | with open(tmp_path / "data" / f"{f}.zst.etag", "w") as f: 30 | f.write("etag") 31 | 32 | os.chdir(tmp_path) 33 | 34 | 35 | @pytest.fixture 36 | def get_fixture_path(): 37 | def _get_fixture_path(path): 38 | path = os.path.join(FIXTURES_DIR, path) 39 | assert os.path.exists(path) 40 | return path 41 | 42 | return _get_fixture_path 43 | 44 | 45 | @pytest.fixture 46 | def mock_zst(): 47 | def create_zst_file(db_path, content=b'{"Hello": "World"}'): 48 | with open(db_path, "wb") as output_f: 49 | cctx = zstandard.ZstdCompressor() 50 | with cctx.stream_writer(output_f) as compressor: 51 | compressor.write(content) 52 | 53 | return create_zst_file 54 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/blocked_bugs_number.json: -------------------------------------------------------------------------------- 1 | {"blocks": [548311, 1354004]} 2 | {"blocks": []} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/bug_reporter.json: -------------------------------------------------------------------------------- 1 | {"creator_detail": {"email": "bill.mccloskey@gmail.com", "real_name": "Bill McCloskey [inactive unless it's an emergency] (:billm)", "name": "bill.mccloskey@gmail.com", "nick": "billm", "id": 389993}} 2 | {"creator_detail": {"email": "rhelmer@mozilla.com", "real_name": "Robert Helmer [:rhelmer]", "name": "rhelmer@mozilla.com", "nick": "rhelmer", "id": 17036}} 3 | {"creator_detail": {"email": "intermittent-bug-filer@mozilla.bugs", "real_name": "Treeherder Bug Filer", "name": "intermittent-bug-filer@mozilla.bugs", "nick": "intermittent-bug-filer", "id": 573381}} 4 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/bug_types.json: -------------------------------------------------------------------------------- 1 | {"keywords": ["meta", "perf"], "whiteboard": "", "cf_crash_signature": ""} 2 | {"keywords": ["memory-leak", "regression"], "whiteboard": "[MemShrink:P1]", "cf_crash_signature": ""} 3 | {"whiteboard": "", "keywords": ["power"]} 4 | {"keywords": ["sec-want"], "whiteboard": "[sg:want][psm-padlock]"} 5 | {"keywords": ["crash", "regression"], "whiteboard": "", "cf_crash_signature": "[@ audiounit_property_listener_callback]"} 6 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/comment_count.json: -------------------------------------------------------------------------------- 1 | {"comment_count": 4} 2 | {"comment_count": 28} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/component.json: -------------------------------------------------------------------------------- 1 | {"component": "Graphics"} 2 | {"component": "CSS Parsing and Computation"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_crash_signature.json: -------------------------------------------------------------------------------- 1 | {"cf_crash_signature": ""} 2 | {"cf_crash_signature": "[@ RtlpScanEnvironment + 0x1dc | mozilla::detail::MutexImpl::lock()]"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_cve_in_alias.json: -------------------------------------------------------------------------------- 1 | {"alias": "CVE-2017-7813"} 2 | {"alias": null} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_github_url.json: -------------------------------------------------------------------------------- 1 | {"url": "https://github.com/w3c/webcomponents/issues/635"} 2 | {"url": ""} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_regression_range.json: -------------------------------------------------------------------------------- 1 | {"cf_has_regression_range": "yes"} 2 | {"cf_has_regression_range": "---"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_str.json: -------------------------------------------------------------------------------- 1 | {"cf_has_str": "yes"} 2 | {"cf_has_str": "---"} 3 | {"cf_has_str": "no"} 4 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_url.json: -------------------------------------------------------------------------------- 1 | {"url": "data:text/html;charset=UTF-8,"} 2 | {"url": ""} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/has_w3c_url.json: -------------------------------------------------------------------------------- 1 | {"url": "https://github.com/w3c/webcomponents/issues/635"} 2 | {"url": ""} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/is_coverity_issue.json: -------------------------------------------------------------------------------- 1 | {"summary": "Firefox Nightly 56 shows no buttons and no page content.", "whiteboard": ""} 2 | {"whiteboard": "", "summary": "[CID 1419486] signed/unsigned conversion error in pk11 signature test"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/is_mozillian.json: -------------------------------------------------------------------------------- 1 | {"creator_detail": {"email": "johngraciliano@gmail.com", "real_name": "", "name": "johngraciliano@gmail.com", "nick": "johngraciliano", "id": 532161}} 2 | {"creator_detail": {"email": "bdahl@mozilla.com", "real_name": "Brendan Dahl [:bdahl]", "name": "bdahl@mozilla.com", "nick": "bdahl", "id": 425126}} 3 | {"creator_detail": {"email": "asa@mozilla.org", "real_name": "Asa Dotzler [:asa]", "name": "asa@mozilla.org", "nick": "asa", "id": 5003}} 4 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/keywords.json: -------------------------------------------------------------------------------- 1 | {"keywords": ["crash", "intermittent-failure", "stale-bug"]} 2 | {"keywords": ["bulk-close-intermittents", "crash", "intermittent-failure"]} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/patches.json: -------------------------------------------------------------------------------- 1 | {"attachments": [{"content_type": "text/plain", "creator": "ehsan@mozilla.com", "flags": [{"modification_date": "2017-07-04T08:24:38Z", "creation_date": "2017-07-04T02:16:27Z", "type_id": 4, "status": "+", "name": "review", "id": 1606172, "setter": "mzehe@mozilla.com"}], "is_patch": 1, "creation_time": "2017-07-04T02:16:27Z", "id": 8883151, "is_obsolete": 0}]} 2 | {"attachments": []} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/product.json: -------------------------------------------------------------------------------- 1 | {"product": "Core"} 2 | {"product": "Firefox for Android"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/severity.json: -------------------------------------------------------------------------------- 1 | {"severity": "major"} 2 | {"severity": "normal"} 3 | -------------------------------------------------------------------------------- /tests/fixtures/bug_features/whiteboard.json: -------------------------------------------------------------------------------- 1 | {"whiteboard": "[MemShrink][platform-rel-Facebook]"} 2 | {"whiteboard": ""} 3 | {"whiteboard": "inj+ [AV:Quick Heal] "} 4 | {"whiteboard": "[AV:Quick Heal][regressed sept 6th][dll version is 3.0.1.*]"} 5 | {"whiteboard": "[AV:Quick Heal]inj+"} 6 | {"whiteboard": "[AV:Quick Heal] inj+"} 7 | {"whiteboard": "inj+ [AV:Quick Heal]"} 8 | {"whiteboard": "inj+[AV:Quick Heal]"} 9 | {"whiteboard": "inj+ ux [AV:Quick Heal] qf"} 10 | -------------------------------------------------------------------------------- /tests/test_assignee.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models import assignee 7 | from bugbug.models.assignee import AssigneeModel 8 | 9 | 10 | def test_get_assignee_labels(): 11 | assignee.MINIMUM_ASSIGNMENTS = 1 12 | model = AssigneeModel() 13 | classes, _ = model.get_labels() 14 | assert len(classes) != 0 15 | assert classes[1320039] == "gijskruitbosch+bugs@gmail.com" 16 | assert classes[1045018] == "padenot@mozilla.com" 17 | assert 1319973 not in classes 18 | -------------------------------------------------------------------------------- /tests/test_backout.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.backout import BackoutModel 7 | 8 | 9 | def test_get_backout_labels(): 10 | model = BackoutModel() 11 | classes, _ = model.get_labels() 12 | assert classes["c2b5cf7bde83db072fc206c24d1cab72354be727"] == 1 13 | assert classes["9d576871fd33bed006dcdccfba880a4ed591f870"] != 1 14 | -------------------------------------------------------------------------------- /tests/test_bug.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.defect import DefectModel 7 | 8 | 9 | def test_get_bug_labels(): 10 | model = DefectModel() 11 | classes, _ = model.get_labels() 12 | # labels from bug_nobug.csv 13 | assert classes[1087488] 14 | assert not classes[1101825] 15 | # labels from regression_bug_nobug.csv 16 | assert not classes[1586096] # nobug 17 | assert classes[518272] # regression 18 | assert classes[528988] # bug_unknown_regression 19 | assert classes[1037762] # bug_no_regression 20 | # labels from defectenhancementtask.csv 21 | assert not classes[1488307] # task 22 | assert classes[1488310] # defect 23 | assert not classes[1531080] # enhancement 24 | -------------------------------------------------------------------------------- /tests/test_bug_snapshot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | from bugbug import bugzilla 8 | from bugbug.bug_snapshot import rollback 9 | 10 | 11 | def test_bug_snapshot(): 12 | for i, bug in enumerate(bugzilla.get_bugs()): 13 | print(bug["id"]) 14 | print(i) 15 | 16 | rollback(bug, do_assert=True) 17 | -------------------------------------------------------------------------------- /tests/test_bugtype.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import numpy as np 7 | 8 | from bugbug.models.bugtype import BugTypeModel 9 | 10 | 11 | def test_get_bugtype_labels(): 12 | model = BugTypeModel() 13 | classes, keyword_list = model.get_labels() 14 | 15 | assert np.array_equal(classes[1319957], np.zeros(5)) 16 | 17 | target = np.zeros(5) 18 | target[keyword_list.index("crash")] = 1 19 | assert np.array_equal(classes[1319973], target) 20 | 21 | target = np.zeros(5) 22 | target[keyword_list.index("memory")] = 1 23 | assert np.array_equal(classes[1325215], target) 24 | 25 | target = np.zeros(5) 26 | target[keyword_list.index("performance")] = 1 27 | assert np.array_equal(classes[1320195], target) 28 | 29 | target = np.zeros(5) 30 | target[keyword_list.index("security")] = 1 31 | assert np.array_equal(classes[1320039], target) 32 | -------------------------------------------------------------------------------- /tests/test_bugzilla.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from typing import Any 7 | 8 | import pytest 9 | 10 | from bugbug import bugzilla 11 | 12 | 13 | def test_get_bugs(): 14 | all_bugs = {int(bug["id"]) for bug in bugzilla.get_bugs(include_invalid=True)} 15 | legitimate_bugs = {int(bug["id"]) for bug in bugzilla.get_bugs()} 16 | 17 | assert 1541482 in all_bugs 18 | assert 1541482 not in legitimate_bugs 19 | 20 | assert 1559674 in all_bugs 21 | assert 1559674 not in legitimate_bugs 22 | 23 | assert 1549207 in all_bugs 24 | assert 1549207 not in legitimate_bugs 25 | 26 | assert 1572747 in all_bugs 27 | assert 1572747 in legitimate_bugs 28 | 29 | 30 | def test_get_fixed_versions(): 31 | assert bugzilla.get_fixed_versions( 32 | { 33 | "target_milestone": "mozilla81", 34 | "cf_tracking_firefox83": "blocking", 35 | "cf_status_firefox82": "fixed", 36 | "cf_status_firefox81": "unaffected", 37 | } 38 | ) == [81, 82] 39 | 40 | assert bugzilla.get_fixed_versions( 41 | { 42 | "target_milestone": "mozilla82", 43 | "cf_tracking_firefox82": "---", 44 | "cf_status_firefox82": "fixed", 45 | "cf_status_firefox83": "fixed", 46 | } 47 | ) == [82, 83] 48 | 49 | assert bugzilla.get_fixed_versions( 50 | { 51 | "target_milestone": "mozilla82", 52 | } 53 | ) == [82] 54 | 55 | assert bugzilla.get_fixed_versions( 56 | { 57 | "target_milestone": "82 Branch", 58 | } 59 | ) == [82] 60 | 61 | assert bugzilla.get_fixed_versions( 62 | { 63 | "target_milestone": "Firefox 82", 64 | } 65 | ) == [82] 66 | 67 | 68 | @pytest.fixture 69 | def component_team_mapping(): 70 | return { 71 | "products": [ 72 | { 73 | "name": "JSS", 74 | "components": [ 75 | { 76 | "name": "Library", 77 | "team_name": "Crypto", 78 | }, 79 | { 80 | "name": "Tests", 81 | "team_name": "Crypto", 82 | }, 83 | ], 84 | }, 85 | { 86 | "name": "Core", 87 | "components": [ 88 | { 89 | "name": "Graphics", 90 | "team_name": "GFX", 91 | }, 92 | ], 93 | }, 94 | ] 95 | } 96 | 97 | 98 | def test_get_component_team_mapping( 99 | responses: Any, component_team_mapping: dict 100 | ) -> None: 101 | responses.add( 102 | responses.GET, 103 | "https://bugzilla.mozilla.org/rest/product?type=accessible&include_fields=name&include_fields=components.name&include_fields=components.team_name", 104 | status=200, 105 | json=component_team_mapping, 106 | ) 107 | 108 | assert bugzilla.get_component_team_mapping() == { 109 | "Core": {"Graphics": "GFX"}, 110 | "JSS": {"Library": "Crypto", "Tests": "Crypto"}, 111 | } 112 | -------------------------------------------------------------------------------- /tests/test_code_review.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import responses 3 | from unidiff import PatchSet 4 | 5 | from bugbug.tools.code_review import find_comment_scope 6 | 7 | 8 | def test_find_comment_scope(): 9 | responses.add_passthru("https://phabricator.services.mozilla.com/") 10 | responses.add_passthru( 11 | "https://mozphab-phabhost-cdn.devsvcprod.mozaws.net/file/data/" 12 | ) 13 | 14 | test_data = { 15 | "https://phabricator.services.mozilla.com/D233024?id=964198": { 16 | "browser/components/newtab/test/browser/browser.toml": { 17 | 79: { 18 | "line_start": 78, 19 | "line_end": 79, 20 | "has_added_lines": False, 21 | } 22 | }, 23 | "browser/components/asrouter/tests/browser/browser.toml": { 24 | 63: { 25 | "line_start": 60, 26 | "line_end": 74, 27 | "has_added_lines": True, 28 | }, 29 | }, 30 | }, 31 | "https://phabricator.services.mozilla.com/D240754?id=995999": { 32 | "dom/canvas/WebGLShaderValidator.cpp": { 33 | 39: { 34 | "line_start": 37, 35 | "line_end": 42, 36 | "has_added_lines": True, 37 | }, 38 | 46: { 39 | "line_start": 37, 40 | "line_end": 42, 41 | "has_added_lines": True, 42 | }, 43 | } 44 | }, 45 | } 46 | 47 | for revision_url, patch_files in test_data.items(): 48 | raw_diff = requests.get(revision_url + "&download=true", timeout=5).text 49 | patch_set = PatchSet.from_string(raw_diff) 50 | 51 | for file_name, target_hunks in patch_files.items(): 52 | patched_file = next( 53 | patched_file 54 | for patched_file in patch_set 55 | if patched_file.path == file_name 56 | ) 57 | 58 | for line_number, expected_scope in target_hunks.items(): 59 | assert find_comment_scope(patched_file, line_number) == expected_scope 60 | -------------------------------------------------------------------------------- /tests/test_commit_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import pytest 7 | 8 | from bugbug.commit_features import AuthorExperience, CommitExtractor, ReviewersNum 9 | from bugbug.feature_cleanup import fileref, url 10 | 11 | 12 | def test_CommitExtractor(): 13 | CommitExtractor([ReviewersNum(), AuthorExperience()], [fileref(), url()]) 14 | with pytest.raises(AssertionError): 15 | CommitExtractor([ReviewersNum(), AuthorExperience()], [fileref(), fileref()]) 16 | with pytest.raises(AssertionError): 17 | CommitExtractor([AuthorExperience(), AuthorExperience()], [fileref(), url()]) 18 | -------------------------------------------------------------------------------- /tests/test_defect.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.defect import DefectModel 7 | 8 | 9 | def test_get_defect_labels(): 10 | model = DefectModel() 11 | classes, _ = model.get_labels() 12 | assert classes[1042414] == 1 13 | assert classes[1049816] != 1 14 | -------------------------------------------------------------------------------- /tests/test_defect_enhancement_task.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.defect_enhancement_task import DefectEnhancementTaskModel 7 | 8 | 9 | def test_get_defect_enhancement_task_labels(): 10 | model = DefectEnhancementTaskModel() 11 | classes, _ = model.get_labels() 12 | assert classes[1042414] == "defect" 13 | assert classes[1531080] == "task" 14 | assert classes[1348788] == "enhancement" 15 | -------------------------------------------------------------------------------- /tests/test_devdocneeded.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.devdocneeded import DevDocNeededModel 7 | 8 | 9 | def test_get_devdocneeded_labels(): 10 | model = DevDocNeededModel() 11 | classes, _ = model.get_labels() 12 | assert classes[528988] == 0 13 | assert classes[1053944] == 1 14 | assert classes[1531080] == 1 15 | -------------------------------------------------------------------------------- /tests/test_hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import json 7 | import os 8 | import shutil 9 | 10 | import jsone 11 | import jsonschema 12 | import pytest 13 | 14 | from infra.set_hook_version import set_hook 15 | 16 | with open(os.path.join("VERSION")) as f: 17 | version = f.read().strip() 18 | 19 | parameters = [ 20 | (os.path.realpath("infra/taskcluster-hook-data-pipeline.json"), {}), 21 | (os.path.realpath("infra/taskcluster-hook-check-models-start.json"), {}), 22 | ( 23 | os.path.realpath("infra/taskcluster-hook-classify-patch.json"), 24 | {"PHABRICATOR_DEPLOYMENT": "prod", "DIFF_ID": 123}, 25 | ), 26 | ( 27 | os.path.realpath("infra/taskcluster-hook-test-select.json"), 28 | {"PHABRICATOR_DEPLOYMENT": "dev", "DIFF_ID": 123}, 29 | ), 30 | ( 31 | os.path.realpath("infra/taskcluster-hook-test-select.json"), 32 | { 33 | "PHABRICATOR_DEPLOYMENT": "prod", 34 | "DIFF_ID": 123, 35 | "RUNNABLE_JOBS": "http://localhost", 36 | }, 37 | ), 38 | (os.path.realpath("infra/taskcluster-hook-landings-risk-report.json"), {}), 39 | ] 40 | 41 | for infra_path in os.listdir("infra"): 42 | if not infra_path.startswith("taskcluster-hook-"): 43 | continue 44 | 45 | assert any( 46 | path == os.path.realpath(os.path.join("infra", infra_path)) 47 | for path, payload in parameters 48 | ), f"{infra_path} not found" 49 | 50 | 51 | @pytest.mark.parametrize("hook_file,payload", parameters) 52 | def test_jsone_validates(tmp_path, hook_file, payload): 53 | tmp_hook_file = tmp_path / "hook.json" 54 | 55 | shutil.copyfile(hook_file, tmp_hook_file) 56 | 57 | set_hook(tmp_hook_file, version) 58 | 59 | with open(tmp_hook_file, "r") as f: 60 | hook_content = json.load(f) 61 | 62 | jsonschema.validate(instance=payload, schema=hook_content["triggerSchema"]) 63 | 64 | jsone.render(hook_content, context={"payload": payload}) 65 | -------------------------------------------------------------------------------- /tests/test_invalid_compatibility_report.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.invalid_compatibility_report import InvalidCompatibilityReportModel 7 | 8 | 9 | def test_get_invalid_labels(): 10 | model = InvalidCompatibilityReportModel() 11 | classes, _ = model.get_labels() 12 | assert classes[70960] 13 | assert classes[70978] 14 | assert not classes[71052] 15 | assert not classes[71011] 16 | -------------------------------------------------------------------------------- /tests/test_labels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | 8 | from bugbug import labels 9 | 10 | 11 | def test_get_labels_dir(): 12 | path = labels.get_labels_dir() 13 | assert os.path.isabs(path) 14 | assert path.endswith("labels") 15 | 16 | 17 | def test_get_all_bug_ids(): 18 | bug_ids = labels.get_all_bug_ids() 19 | assert len(bug_ids) > 0 20 | assert all(isinstance(bug_id, int) for bug_id in bug_ids) 21 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from logging import INFO, basicConfig, getLogger 7 | 8 | from bugbug import model 9 | from bugbug.models import MODELS, get_model_class 10 | 11 | basicConfig(level=INFO) 12 | logger = getLogger(__name__) 13 | 14 | 15 | def test_import_all_models(): 16 | """Try loading all defined models to ensure that their full qualified 17 | names are still good 18 | """ 19 | 20 | for model_name in MODELS: 21 | logger.info("Try loading model %s", model_name) 22 | get_model_class(model_name) 23 | 24 | 25 | def test_component_is_bugmodel(): 26 | model_class = get_model_class("component") 27 | assert issubclass(model_class, model.BugModel) 28 | model_class = get_model_class("regression") 29 | assert issubclass(model_class, model.BugModel) 30 | 31 | 32 | def test_backout_is_commitmodel(): 33 | model_class = get_model_class("backout") 34 | assert issubclass(model_class, model.CommitModel) 35 | -------------------------------------------------------------------------------- /tests/test_needsdiagnosis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.needsdiagnosis import NeedsDiagnosisModel 7 | 8 | 9 | def test_get_needsdiagnosis_labels(): 10 | model = NeedsDiagnosisModel() 11 | classes, _ = model.get_labels() 12 | assert not classes[71052] 13 | assert not classes[71011] 14 | assert classes[71012] 15 | assert classes[70962] 16 | -------------------------------------------------------------------------------- /tests/test_performancebug.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.performancebug import PerformanceBugModel 7 | 8 | 9 | def test_get_performancebug_labels(): 10 | model = PerformanceBugModel() 11 | classes, _ = model.get_labels() 12 | assert classes[1461247] == 1 13 | assert classes[1457988] == 1 14 | assert classes[446261] == 0 15 | assert classes[452258] == 0 16 | -------------------------------------------------------------------------------- /tests/test_pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import os 7 | 8 | import jsone 9 | import jsonschema 10 | import pytest 11 | import requests 12 | import responses 13 | import yaml 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def task_schema(): 18 | responses.add_passthru("https://community-tc.services.mozilla.com/") 19 | r = requests.get( 20 | "https://community-tc.services.mozilla.com/schemas/queue/v1/create-task-request.json" 21 | ) 22 | r.raise_for_status() 23 | return r.json() 24 | 25 | 26 | @pytest.fixture(scope="session") 27 | def payload_schema(): 28 | responses.add_passthru("https://community-tc.services.mozilla.com/") 29 | r = requests.get( 30 | "https://community-tc.services.mozilla.com/schemas/docker-worker/v1/payload.json" 31 | ) 32 | r.raise_for_status() 33 | return r.json() 34 | 35 | 36 | @pytest.mark.parametrize( 37 | "pipeline_file", 38 | ( 39 | os.path.realpath(os.path.join("infra", f)) 40 | for f in os.listdir("infra") 41 | if f.endswith(".yml") 42 | ), 43 | ) 44 | def test_jsone_validates(pipeline_file, task_schema, payload_schema): 45 | responses.add_passthru("https://community-tc.services.mozilla.com/") 46 | 47 | with open(pipeline_file, "r") as f: 48 | yaml_content = yaml.safe_load(f.read()) 49 | 50 | result = jsone.render(yaml_content, context={"version": "42.0"}) 51 | tasks = result["tasks"] 52 | 53 | all_ids = [task["ID"] for task in tasks] 54 | 55 | # Make sure there are no duplicate IDs. 56 | assert len(all_ids) == len(set(all_ids)) 57 | 58 | # Make sure all dependencies are present. 59 | for task in tasks: 60 | assert "dependencies" not in task or all( 61 | dependency in all_ids for dependency in task["dependencies"] 62 | ) 63 | 64 | for task in tasks: 65 | if "ID" in task: 66 | del task["ID"] 67 | 68 | if "dependencies" in task: 69 | del task["dependencies"] 70 | 71 | jsonschema.validate(instance=task, schema=task_schema) 72 | 73 | jsonschema.validate(instance=task["payload"], schema=payload_schema) 74 | -------------------------------------------------------------------------------- /tests/test_qaneeded.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug import bugzilla 7 | from bugbug.models.qaneeded import QANeededModel 8 | 9 | 10 | def test_get_qaneeded_labels(): 11 | model = QANeededModel() 12 | classes, _ = model.get_labels() 13 | assert not classes[1389220] 14 | assert classes[1389223], "Bug should contain qawanted in a field" 15 | assert classes[1390433], "Bug should contain qe-verify in a field" 16 | 17 | 18 | def test_rollback(): 19 | model = QANeededModel() 20 | 21 | histories = {} 22 | for bug in bugzilla.get_bugs(): 23 | histories[int(bug["id"])] = bug["history"] 24 | 25 | def rollback_point(bug_id): 26 | count = 0 27 | for history in histories[bug_id]: 28 | for change in history["changes"]: 29 | if model.rollback(change): 30 | return count 31 | count += 1 32 | return count 33 | 34 | assert rollback_point(1390433) == 35, ( 35 | "A bug field should start with qawanted or qe-verify" 36 | ) 37 | assert rollback_point(1389136) == 9, ( 38 | "A bug field should start with qawanted or qe-verify" 39 | ) 40 | 41 | assert rollback_point(1388990) == 29 42 | assert rollback_point(1389223) == 8 43 | -------------------------------------------------------------------------------- /tests/test_rcatype.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.rcatype import RCATypeModel 7 | 8 | 9 | def test_get_rca_from_whiteboard(): 10 | model = RCATypeModel() 11 | # Case 1: No rca 12 | assert model.get_rca_from_whiteboard("[Whiteboard1][Not RCA type]") == [] 13 | # Case 2: RCA : A and RCA - A 14 | assert model.get_rca_from_whiteboard("[RCA: cornercase]") == ["cornercase"] 15 | assert model.get_rca_from_whiteboard("[rca - codingerror]") == ["codingerror"] 16 | # Case 3: Multiple rca types 17 | assert model.get_rca_from_whiteboard("[rca - cornercase][rca - codingerror]") == [ 18 | "cornercase", 19 | "codingerror", 20 | ] 21 | assert model.get_rca_from_whiteboard("[rca : systemerror][rca - codingerror]") == [ 22 | "systemerror", 23 | "codingerror", 24 | ] 25 | assert model.get_rca_from_whiteboard("[rca - cornercase][rca : testingerror]") == [ 26 | "cornercase", 27 | "testingerror", 28 | ] 29 | assert model.get_rca_from_whiteboard("[rca : cornercase][rca : codingerror]") == [ 30 | "cornercase", 31 | "codingerror", 32 | ] 33 | assert model.get_rca_from_whiteboard("[RCA: codingerror - syntaxerror]") == [ 34 | "codingerror" 35 | ] 36 | # Case 4: subcategories enabled, with rca already present in the list 37 | model = RCATypeModel(rca_subcategories_enabled=True) 38 | assert model.get_rca_from_whiteboard("[RCA: codingerror - syntaxerror]") == [ 39 | "codingerror-syntaxerror" 40 | ] 41 | assert model.get_rca_from_whiteboard( 42 | "[RCA: codingerror - syntaxerror][rca: codingerror:logicalerror]" 43 | ) == ["codingerror-syntaxerror", "codingerror-logicalerror"] 44 | # Case 5: subcategories enabled, with rca not present in list 45 | assert model.get_rca_from_whiteboard("[RCA: codingerror - semanticerror]") == [ 46 | "codingerror-semanticerror" 47 | ] 48 | 49 | 50 | def test_get_labels(): 51 | model = RCATypeModel() 52 | classes, _ = model.get_labels() 53 | 54 | assert classes[1556846].tolist() == [ 55 | 1.0, 56 | 0.0, 57 | 0.0, 58 | 0.0, 59 | 0.0, 60 | 0.0, 61 | 0.0, 62 | 1.0, 63 | 0.0, 64 | 0.0, 65 | 0.0, 66 | 0.0, 67 | 0.0, 68 | 0.0, 69 | 0.0, 70 | 0.0, 71 | ] 72 | -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | from bugbug.models.regression import RegressionModel 8 | 9 | 10 | def test_get_regression_labels(): 11 | model = RegressionModel() 12 | classes, _ = model.get_labels() 13 | assert classes[1348788] == 0 14 | assert classes[518272] == 1 15 | -------------------------------------------------------------------------------- /tests/test_stepstoreproduce.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | 7 | from bugbug.models.stepstoreproduce import StepsToReproduceModel 8 | 9 | 10 | def test_get_labels(): 11 | model = StepsToReproduceModel() 12 | classes, _ = model.get_labels() 13 | assert classes[1488310] 14 | assert not classes[1372243] 15 | assert 1319973 not in classes 16 | -------------------------------------------------------------------------------- /tests/test_test_scheduling_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug import test_scheduling_features 7 | 8 | 9 | def test_path_distance(): 10 | pd = test_scheduling_features.PathDistance() 11 | 12 | assert ( 13 | pd( 14 | {"name": "dom/media/tests/mochitest.ini"}, 15 | {"files": ["dom/media/tests/test.js", "dom/media/anotherFile.cpp"]}, 16 | ) 17 | == 0 18 | ) 19 | assert ( 20 | pd( 21 | {"name": "dom/media/tests/mochitest.ini"}, 22 | {"files": ["dom/media/anotherFile.cpp"]}, 23 | ) 24 | == 1 25 | ) 26 | assert ( 27 | pd( 28 | {"name": "dom/media/tests/mochitest.ini"}, 29 | {"files": ["dom/media/src/aFile.cpp"]}, 30 | ) 31 | == 2 32 | ) 33 | assert ( 34 | pd( 35 | {"name": "dom/media/tests/mochitest.ini"}, 36 | {"files": ["dom/media/src/aFile.cpp", "dom/media/anotherFile.cpp"]}, 37 | ) 38 | == 1 39 | ) 40 | assert ( 41 | pd( 42 | {"name": "dom/media/tests/mochitest.ini"}, 43 | {"files": ["layout/utils/bla.cpp"]}, 44 | ) 45 | == 5 46 | ) 47 | assert ( 48 | pd( 49 | {"name": "testing/web-platform/tests/content-security-policy/worker-src"}, 50 | {"files": ["test"]}, 51 | ) 52 | == 4 53 | ) 54 | assert ( 55 | pd( 56 | {"name": "test"}, 57 | { 58 | "files": [ 59 | "testing/web-platform/tests/content-security-policy/worker-src" 60 | ] 61 | }, 62 | ) 63 | == 4 64 | ) 65 | -------------------------------------------------------------------------------- /tests/test_tracking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.tracking import TrackingModel 7 | 8 | 9 | def test_get_tracking_labels(): 10 | model = TrackingModel() 11 | classes, _ = model.get_labels() 12 | assert not classes[1101825] 13 | assert classes[1042096] 14 | -------------------------------------------------------------------------------- /tests/test_trainer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | import responses 7 | 8 | from bugbug import bugzilla, db 9 | from scripts import trainer 10 | 11 | 12 | def test_trainer(): 13 | # Pretend the DB was already downloaded and no new DB is available. 14 | 15 | url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_bugs.latest/artifacts/public/bugs.json" 16 | 17 | responses.add( 18 | responses.GET, 19 | f"{url}.version", 20 | status=200, 21 | body=str(db.DATABASES[bugzilla.BUGS_DB]["version"]), 22 | ) 23 | 24 | responses.add( 25 | responses.HEAD, 26 | f"{url}.zst", 27 | status=200, 28 | headers={"ETag": "etag"}, 29 | ) 30 | 31 | trainer.Trainer().go(trainer.parse_args(["regression"])) 32 | -------------------------------------------------------------------------------- /tests/test_uplift.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This Source Code Form is subject to the terms of the Mozilla Public 3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 | # You can obtain one at http://mozilla.org/MPL/2.0/. 5 | 6 | from bugbug.models.uplift import UpliftModel 7 | 8 | 9 | def test_get_uplift_labels(): 10 | model = UpliftModel() 11 | classes, _ = model.get_labels() 12 | assert classes[1364870] == 1 13 | assert classes[1350663] != 1 14 | -------------------------------------------------------------------------------- /ui/changes/.eslintrc.yml: -------------------------------------------------------------------------------- 1 | env: 2 | browser: true 3 | es6: true 4 | plugins: 5 | - prettier 6 | - mozilla 7 | extends: 8 | - standard 9 | - prettier 10 | - plugin:mozilla/recommended 11 | parserOptions: 12 | ecmaVersion: 2018 13 | sourceType: module 14 | rules: 15 | max-len: off 16 | prettier/prettier: "error" 17 | -------------------------------------------------------------------------------- /ui/changes/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "changes", 3 | "version": "1.0.0", 4 | "description": "To update Temporal polyfill:", 5 | "private": true, 6 | "scripts": { 7 | "dev": "npx snowpack dev", 8 | "release": "npx snowpack build", 9 | "test": "echo \"Error: no test specified\" && exit 1" 10 | }, 11 | "keywords": [], 12 | "author": "", 13 | "license": "ISC", 14 | "devDependencies": { 15 | "prettier": "^3.5.3", 16 | "snowpack": "^3.8.8" 17 | }, 18 | "dependencies": { 19 | "@js-temporal/polyfill": "^0.5.1", 20 | "apexcharts": "^4.7.0", 21 | "localforage": "^1.10.0" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /ui/changes/snowpack.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: [ 3 | /* ... */ 4 | ], 5 | packageOptions: { 6 | /* ... */ 7 | }, 8 | devOptions: { 9 | /* ... */ 10 | }, 11 | buildOptions: { 12 | out: "dist", 13 | /* ... */ 14 | }, 15 | mount: { 16 | src: "/", 17 | /* ... */ 18 | }, 19 | alias: { 20 | /* ... */ 21 | }, 22 | }; 23 | -------------------------------------------------------------------------------- /ui/changes/src/bug.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bugbug ui 6 | 7 | 11 | 12 | 13 | 14 |

bugbug ui

15 | 20 |
21 |
22 | 36 |
37 |
38 | 39 |

Bug List

40 |
41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 |
BugDateTesting TagsCoverageRiskiness
54 |
55 |
56 |
57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /ui/changes/src/bug.js: -------------------------------------------------------------------------------- 1 | import * as common from "./common.js"; 2 | 3 | async function renderUI() { 4 | const data = await common.landingsData; 5 | const bugID = Number(common.getOption("bugID")); 6 | 7 | // 1433500 8 | 9 | const allBugSummaries = [].concat.apply([], Object.values(data)); 10 | 11 | for (const bugSummary of allBugSummaries) { 12 | if (bugSummary["id"] == bugID) { 13 | await common.renderTable([bugSummary]); 14 | return; 15 | } 16 | } 17 | 18 | await common.renderTable([]); 19 | } 20 | 21 | (async function init() { 22 | await common.setupOptions(renderUI); 23 | 24 | await renderUI(); 25 | })(); 26 | -------------------------------------------------------------------------------- /ui/changes/src/css/page.css: -------------------------------------------------------------------------------- 1 | @import url("./common.css"); 2 | 3 | * { 4 | box-sizing: border-box; 5 | } 6 | 7 | th, 8 | td { 9 | vertical-align: top; 10 | } 11 | 12 | table { 13 | table-layout: fixed; 14 | width: 100%; 15 | white-space: nowrap; 16 | } 17 | table td, 18 | table th { 19 | white-space: nowrap; 20 | overflow: hidden; 21 | text-overflow: ellipsis; 22 | text-align: center; 23 | } 24 | table tr td:nth-child(1), 25 | table tr th:nth-child(1) { 26 | text-align: left; 27 | } 28 | table tr td:nth-child(2), 29 | table tr th:nth-child(2), 30 | table tr td:nth-child(3), 31 | table tr th:nth-child(3), 32 | table tr td:nth-child(4), 33 | table tr th:nth-child(4), 34 | table tr td:nth-child(5), 35 | table tr th:nth-child(5) { 36 | width: 100px; 37 | } 38 | 39 | table td ul { 40 | margin: 0; 41 | padding: 0; 42 | list-style: none; 43 | } 44 | tr { 45 | padding: 4px 0; 46 | border-bottom: solid 1px rgba(0, 0, 0, 0.2); 47 | } 48 | 49 | td .desc-box { 50 | width: auto; 51 | padding: 3px; 52 | } 53 | td .desc-box ul { 54 | margin: 0; 55 | padding-inline-start: 20px; 56 | } 57 | 58 | #links { 59 | position: absolute; 60 | right: 5px; 61 | top: 5px; 62 | } 63 | 64 | h3 { 65 | margin: 0; 66 | border-bottom: solid 1px rgba(0, 0, 0, 0.2); 67 | } 68 | details h3 { 69 | display: inline-block; 70 | border-bottom: none; 71 | } 72 | details summary { 73 | border-bottom: solid 1px rgba(0, 0, 0, 0.2); 74 | } 75 | 76 | #grid { 77 | display: grid; 78 | grid-template-columns: auto 1fr; 79 | padding: 0 5px; 80 | max-width: 100vw; 81 | overflow: hidden; 82 | } 83 | #grid aside { 84 | padding-right: 5px; 85 | } 86 | #grid main { 87 | overflow: auto; 88 | } 89 | 90 | #filter-container input:not([type="checkbox"]), 91 | #filter-container select { 92 | width: 180px; 93 | display: block; 94 | margin: 2px 0; 95 | padding: 0; 96 | } 97 | #filter-container label { 98 | font-style: italic; 99 | } 100 | 101 | /* For testing/ graph page */ 102 | aside ul { 103 | margin: 0; 104 | padding: 0; 105 | } 106 | aside ul li { 107 | list-style: none; 108 | } 109 | /* 110 | .chart-container { 111 | display: flex; 112 | justify-content: center; 113 | } 114 | */ 115 | 116 | .loading-data #grid, 117 | .loader { 118 | display: none; 119 | } 120 | .loading-data .loader { 121 | display: block; 122 | } 123 | 124 | /* Spinner */ 125 | .loader, 126 | .loader:before, 127 | .loader:after { 128 | background: var(--heading-color); 129 | -webkit-animation: load1 1s infinite ease-in-out; 130 | animation: load1 1s infinite ease-in-out; 131 | width: 1em; 132 | height: 4em; 133 | } 134 | .loader { 135 | color: var(--heading-color); 136 | text-indent: -9999em; 137 | margin: 88px auto; 138 | position: relative; 139 | font-size: 11px; 140 | -webkit-transform: translateZ(0); 141 | -ms-transform: translateZ(0); 142 | transform: translateZ(0); 143 | -webkit-animation-delay: -0.16s; 144 | animation-delay: -0.16s; 145 | } 146 | .loader:before, 147 | .loader:after { 148 | position: absolute; 149 | top: 0; 150 | content: ""; 151 | } 152 | .loader:before { 153 | left: -1.5em; 154 | -webkit-animation-delay: -0.32s; 155 | animation-delay: -0.32s; 156 | } 157 | .loader:after { 158 | left: 1.5em; 159 | } 160 | @-webkit-keyframes load1 { 161 | 0%, 162 | 80%, 163 | 100% { 164 | box-shadow: 0 0; 165 | height: 4em; 166 | } 167 | 40% { 168 | box-shadow: 0 -2em; 169 | height: 5em; 170 | } 171 | } 172 | @keyframes load1 { 173 | 0%, 174 | 80%, 175 | 100% { 176 | box-shadow: 0 0; 177 | height: 4em; 178 | } 179 | 40% { 180 | box-shadow: 0 -2em; 181 | height: 5em; 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /ui/changes/src/feature.js: -------------------------------------------------------------------------------- 1 | import * as common from "./common.js"; 2 | 3 | let resultSummary = document.getElementById("result-summary"); 4 | let resultGraphs = document.getElementById("result-graphs"); 5 | 6 | async function renderFeatureChangesChart(chartEl, bugSummaries) { 7 | // Only show fixed bugs. 8 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null); 9 | 10 | if (bugSummaries.length == 0) { 11 | return; 12 | } 13 | 14 | let metabugs = (await common.featureMetabugs).reduce((acc, val) => { 15 | acc[val.id] = val.summary; 16 | return acc; 17 | }, {}); 18 | 19 | let featureCounter = new common.Counter(); 20 | for (let bugSummary of bugSummaries) { 21 | for (let bugID of bugSummary["meta_ids"]) { 22 | featureCounter[metabugs[bugID]] += 1; 23 | } 24 | } 25 | 26 | const metabug_summary_to_id = Object.entries(metabugs).reduce( 27 | (acc, [id, summary]) => { 28 | acc[summary] = id; 29 | return acc; 30 | }, 31 | {} 32 | ); 33 | 34 | common.renderTreemap(chartEl, `Feature metabug changes`, featureCounter, 0, { 35 | dataPointSelection: function (event, chartContext, config) { 36 | const summary = Object.keys(featureCounter)[config.dataPointIndex]; 37 | 38 | const metaBugID = document.getElementById("metaBugID"); 39 | metaBugID.value = metabug_summary_to_id[summary]; 40 | const syntheticEvent = new Event("change"); 41 | metaBugID.dispatchEvent(syntheticEvent); 42 | }, 43 | }); 44 | } 45 | 46 | async function renderSummary(bugSummaries) { 47 | let metaBugID = common.getOption("metaBugID"); 48 | 49 | let changesets = []; 50 | if (bugSummaries.length) { 51 | changesets = bugSummaries 52 | .map((summary) => summary.commits.length) 53 | .reduce((a, b) => a + b); 54 | } 55 | 56 | let bugText = metaBugID ? `For bug ${metaBugID}: ` : ``; 57 | let summaryText = `${bugText}There are ${bugSummaries.length} bugs with ${changesets} changesets.`; 58 | resultSummary.textContent = summaryText; 59 | 60 | resultGraphs.textContent = ""; 61 | 62 | let featureChangesChartEl = document.createElement("div"); 63 | resultGraphs.append(featureChangesChartEl); 64 | await renderFeatureChangesChart(featureChangesChartEl, bugSummaries); 65 | 66 | let riskChartEl = document.createElement("div"); 67 | resultGraphs.append(riskChartEl); 68 | await common.renderRiskChart(riskChartEl, bugSummaries); 69 | 70 | let regressionsChartEl = document.createElement("div"); 71 | resultGraphs.append(regressionsChartEl); 72 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries); 73 | 74 | let timeToBugChartEl = document.createElement("div"); 75 | resultGraphs.append(timeToBugChartEl); 76 | await common.renderTimeToBugChart(timeToBugChartEl, bugSummaries); 77 | 78 | let timeToConfirmChartEl = document.createElement("div"); 79 | resultGraphs.append(timeToConfirmChartEl); 80 | await common.renderTimeToConfirmChart(timeToConfirmChartEl, bugSummaries); 81 | } 82 | 83 | async function renderUI(rerenderSummary = true) { 84 | const bugSummaries = await common.getFilteredBugSummaries(); 85 | 86 | if (rerenderSummary) { 87 | await renderSummary(bugSummaries); 88 | } 89 | 90 | await common.renderTable(bugSummaries); 91 | } 92 | 93 | (async function init() { 94 | await common.setupOptions(renderUI); 95 | 96 | await renderUI(); 97 | })(); 98 | -------------------------------------------------------------------------------- /ui/changes/src/index.js: -------------------------------------------------------------------------------- 1 | import * as common from "./common.js"; 2 | 3 | let resultSummary = document.getElementById("result-summary"); 4 | let resultGraphs = document.getElementById("result-graphs"); 5 | 6 | async function renderSummary(bugSummaries) { 7 | let metaBugID = common.getOption("metaBugID"); 8 | 9 | let changesets = []; 10 | if (bugSummaries.length) { 11 | changesets = bugSummaries 12 | .map((summary) => summary.commits.length) 13 | .reduce((a, b) => a + b); 14 | } 15 | 16 | let bugText = metaBugID ? `For bug ${metaBugID}: ` : ``; 17 | let summaryText = `${bugText}There are ${bugSummaries.length} bugs with ${changesets} changesets.`; 18 | resultSummary.textContent = summaryText; 19 | 20 | resultGraphs.textContent = ""; 21 | let testingChartEl = document.createElement("div"); 22 | resultGraphs.append(testingChartEl); 23 | common.renderTestingChart(testingChartEl, bugSummaries); 24 | 25 | let riskChartEl = document.createElement("div"); 26 | resultGraphs.append(riskChartEl); 27 | await common.renderRiskChart(riskChartEl, bugSummaries); 28 | 29 | let regressionsChartEl = document.createElement("div"); 30 | resultGraphs.append(regressionsChartEl); 31 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries); 32 | 33 | let severityChartEl = document.createElement("div"); 34 | resultGraphs.append(severityChartEl); 35 | await common.renderSeverityChart(severityChartEl, bugSummaries); 36 | 37 | let fixTimesChartEl = document.createElement("div"); 38 | resultGraphs.append(fixTimesChartEl); 39 | await common.renderFixTimesChart(fixTimesChartEl, bugSummaries); 40 | 41 | let timeToBugChartEl = document.createElement("div"); 42 | resultGraphs.append(timeToBugChartEl); 43 | await common.renderTimeToBugChart(timeToBugChartEl, bugSummaries); 44 | 45 | let timeToConfirmChartEl = document.createElement("div"); 46 | resultGraphs.append(timeToConfirmChartEl); 47 | await common.renderTimeToConfirmChart(timeToConfirmChartEl, bugSummaries); 48 | } 49 | 50 | async function renderUI(rerenderSummary = true) { 51 | const bugSummaries = await common.getFilteredBugSummaries(); 52 | 53 | if (rerenderSummary) { 54 | await renderSummary(bugSummaries); 55 | } 56 | 57 | await common.renderTable(bugSummaries); 58 | } 59 | 60 | (async function init() { 61 | await common.setupOptions(renderUI); 62 | 63 | await renderUI(); 64 | })(); 65 | -------------------------------------------------------------------------------- /ui/changes/src/release.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bugbug ui - Release 6 | 7 | 11 | 12 | 13 | 14 |

bugbug ui

15 | 20 |
21 |
22 | 42 |
43 |
44 |
45 |
46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /ui/changes/src/release.js: -------------------------------------------------------------------------------- 1 | import * as common from "./common.js"; 2 | 3 | let resultGraphs = document.getElementById("result-graphs"); 4 | 5 | async function renderComponentChangesChart(chartEl, bugSummaries) { 6 | // Only show fixed bugs. 7 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null); 8 | 9 | if (bugSummaries.length == 0) { 10 | return; 11 | } 12 | 13 | let dimension = common.getOption("changeGrouping")[0]; 14 | 15 | let componentCounter = new common.Counter(); 16 | for (let bugSummary of bugSummaries) { 17 | componentCounter[bugSummary[dimension]] += 1; 18 | } 19 | 20 | common.renderTreemap( 21 | chartEl, 22 | `${dimension.charAt(0).toUpperCase()}${dimension.slice(1)} changes`, 23 | componentCounter 24 | ); 25 | } 26 | 27 | async function renderAffectedComponentChangesChart(chartEl, bugSummaries) { 28 | // Only consider fixed bugs. 29 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null); 30 | 31 | if (bugSummaries.length == 0) { 32 | return; 33 | } 34 | 35 | let componentCounter = new common.Counter(); 36 | for (let bugSummary of bugSummaries) { 37 | componentCounter[bugSummary["component"]] += 1; 38 | } 39 | 40 | let componentConnectionMap = 41 | await common.getComponentDependencyMap("regressions"); 42 | 43 | let affectedComponentCounter = new common.Counter(); 44 | for (let [sourceComponent, count] of Object.entries(componentCounter)) { 45 | if (!componentConnectionMap.hasOwnProperty(sourceComponent)) { 46 | continue; 47 | } 48 | 49 | for (let [targetComponent, percentage] of Object.entries( 50 | componentConnectionMap[sourceComponent] 51 | )) { 52 | affectedComponentCounter[targetComponent] += count * percentage; 53 | } 54 | } 55 | 56 | common.renderTreemap( 57 | chartEl, 58 | "Most affected components", 59 | affectedComponentCounter 60 | ); 61 | } 62 | 63 | async function renderUI() { 64 | resultGraphs.textContent = ""; 65 | 66 | const bugSummaries = await common.getFilteredBugSummaries(); 67 | 68 | let componentChangesChartEl = document.createElement("div"); 69 | resultGraphs.append(componentChangesChartEl); 70 | await renderComponentChangesChart(componentChangesChartEl, bugSummaries); 71 | 72 | let affectedComponentChangesChartEl = document.createElement("div"); 73 | resultGraphs.append(affectedComponentChangesChartEl); 74 | await renderAffectedComponentChangesChart( 75 | affectedComponentChangesChartEl, 76 | bugSummaries 77 | ); 78 | } 79 | 80 | (async function init() { 81 | await common.setupOptions(renderUI); 82 | 83 | await renderUI(); 84 | })(); 85 | -------------------------------------------------------------------------------- /ui/changes/src/team.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bugbug ui - Team 6 | 7 | 11 | 12 | 13 | 14 |

bugbug ui

15 | 20 |
21 |
22 | 59 |
60 |
61 | 69 | 76 | 80 |
81 |
82 |
83 | 91 | 98 |
99 |
100 |
101 |
102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /ui/changes/src/team.js: -------------------------------------------------------------------------------- 1 | import { Temporal } from "@js-temporal/polyfill"; 2 | import * as common from "./common.js"; 3 | 4 | let resultGraphs = document.getElementById("result-graphs"); 5 | const dependencySection = document.getElementById("dependency-section"); 6 | 7 | async function renderUI() { 8 | resultGraphs.textContent = ""; 9 | dependencySection.textContent = ""; 10 | 11 | const bugSummaries = await common.getFilteredBugSummaries(); 12 | 13 | let riskChartEl = document.createElement("div"); 14 | resultGraphs.append(riskChartEl); 15 | await common.renderRiskChart(riskChartEl, bugSummaries); 16 | 17 | const riskListEl = await common.renderRiskList(bugSummaries); 18 | resultGraphs.append(riskListEl); 19 | resultGraphs.append(document.createElement("br")); 20 | 21 | let regressionsChartEl = document.createElement("div"); 22 | resultGraphs.append(regressionsChartEl); 23 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries, true); 24 | 25 | let severityChartEl = document.createElement("div"); 26 | resultGraphs.append(severityChartEl); 27 | await common.renderSeverityChart(severityChartEl, bugSummaries, true); 28 | 29 | let fixTimesChartEl = document.createElement("div"); 30 | resultGraphs.append(fixTimesChartEl); 31 | await common.renderFixTimesChart(fixTimesChartEl, bugSummaries); 32 | 33 | const fixTimesListEl = await common.renderFixTimesList(bugSummaries); 34 | resultGraphs.append(fixTimesListEl); 35 | resultGraphs.append(document.createElement("br")); 36 | 37 | let patchCoverageChartEl = document.createElement("div"); 38 | resultGraphs.append(patchCoverageChartEl); 39 | await common.renderPatchCoverageChart(patchCoverageChartEl, bugSummaries); 40 | 41 | const patchCoverageListEl = 42 | await common.renderPatchCoverageList(bugSummaries); 43 | resultGraphs.append(patchCoverageListEl); 44 | resultGraphs.append(document.createElement("br")); 45 | 46 | let reviewTimeChartEl = document.createElement("div"); 47 | resultGraphs.append(reviewTimeChartEl); 48 | await common.renderReviewTimeChart(reviewTimeChartEl, bugSummaries); 49 | 50 | const reviewTimeListEl = await common.renderReviewTimeList(bugSummaries); 51 | resultGraphs.append(reviewTimeListEl); 52 | resultGraphs.append(document.createElement("br")); 53 | 54 | let assignTimeChartEl = document.createElement("div"); 55 | resultGraphs.append(assignTimeChartEl); 56 | await common.renderTimeToAssignChart(assignTimeChartEl, bugSummaries); 57 | 58 | let testFailureStatsChartEl = document.createElement("div"); 59 | resultGraphs.append(testFailureStatsChartEl); 60 | await common.renderTestFailureStatsChart(testFailureStatsChartEl); 61 | 62 | const testFailureListEl = await common.renderTestFailureList(); 63 | resultGraphs.append(testFailureListEl); 64 | resultGraphs.append(document.createElement("br")); 65 | 66 | let testSkipStatsChartEl = document.createElement("div"); 67 | resultGraphs.append(testSkipStatsChartEl); 68 | await common.renderTestSkipStatsChart(testSkipStatsChartEl); 69 | 70 | const external_components = common.allComponents.filter( 71 | (component) => !common.getOption("components").includes(component) 72 | ); 73 | 74 | const dependencyHeatmapChartEl = document.createElement("div"); 75 | dependencySection.append(dependencyHeatmapChartEl); 76 | await common.renderDependencyHeatmap( 77 | dependencyHeatmapChartEl, 78 | "Dependencies from external components (columns) to selected components (rows)", 79 | external_components, 80 | common.getOption("components") 81 | ); 82 | } 83 | 84 | (async function init() { 85 | let startDate = Temporal.Now.plainDateISO().subtract({ years: 1 }).toString(); 86 | document.getElementById("createStartDate").value = document.getElementById( 87 | "fixStartDate" 88 | ).value = startDate; 89 | 90 | await common.setupOptions(renderUI); 91 | 92 | await renderUI(); 93 | })(); 94 | --------------------------------------------------------------------------------