├── .codecov.yml
├── .dockerignore
├── .github
├── dependabot.yml
└── workflows
│ └── add_to_project.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierrc
├── .taskcluster.yml
├── CITATION.cff
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── VERSION
├── bugbug
├── __init__.py
├── bug_features.py
├── bug_snapshot.py
├── bugzilla.py
├── code_search
│ ├── __init__.py
│ ├── function_search.py
│ ├── mozilla.py
│ ├── parser.py
│ ├── searchfox_api.py
│ ├── searchfox_data.py
│ └── searchfox_download.py
├── commit_features.py
├── db.py
├── feature_cleanup.py
├── generative_model_tool.py
├── github.py
├── issue_features.py
├── issue_snapshot.py
├── labels.py
├── labels
│ ├── annotateignore.csv
│ ├── bug_nobug.csv
│ ├── defect_enhancement_task.csv
│ ├── defect_enhancement_task_e.csv
│ ├── defect_enhancement_task_h.csv
│ ├── defect_enhancement_task_p.csv
│ ├── defect_enhancement_task_s.csv
│ ├── regression_bug_nobug.csv
│ ├── regressionrange.csv
│ ├── str.csv
│ └── tracking.csv
├── model.py
├── model_calibration.py
├── models
│ ├── __init__.py
│ ├── accessibility.py
│ ├── annotate_ignore.py
│ ├── assignee.py
│ ├── backout.py
│ ├── browsername.py
│ ├── bugtype.py
│ ├── component.py
│ ├── defect.py
│ ├── defect_enhancement_task.py
│ ├── devdocneeded.py
│ ├── fenixcomponent.py
│ ├── fixtime.py
│ ├── invalid_compatibility_report.py
│ ├── needsdiagnosis.py
│ ├── performancebug.py
│ ├── qaneeded.py
│ ├── rcatype.py
│ ├── regression.py
│ ├── regressionrange.py
│ ├── regressor.py
│ ├── spambug.py
│ ├── stepstoreproduce.py
│ ├── testfailure.py
│ ├── testselect.py
│ ├── tracking.py
│ ├── uplift.py
│ └── worksforme.py
├── nlp.py
├── phabricator.py
├── repository.py
├── rust_code_analysis_server.py
├── swarm.py
├── test_scheduling.py
├── test_scheduling_features.py
├── tools
│ ├── __init__.py
│ ├── code_review.py
│ └── comment_resolver.py
├── utils.py
└── vectordb.py
├── docker-compose.yml
├── docs
├── README.md
├── data.md
└── models
│ └── regressor.md
├── experiments
└── review_helper_modify_filtering_step.ipy
├── extra-nlp-requirements.txt
├── extra-nn-requirements.txt
├── functions
├── diff2html
│ ├── index.js
│ ├── package-lock.json
│ └── package.json
└── sync-review-comments-db
│ ├── database.py
│ ├── main.py
│ ├── models.py
│ └── requirements.txt
├── http_service
├── .dockerignore
├── Dockerfile
├── Dockerfile.bg_worker
├── MANIFEST.in
├── README.md
├── bugbug_http
│ ├── __init__.py
│ ├── app.py
│ ├── boot.py
│ ├── download_models.py
│ ├── listener.py
│ ├── models.py
│ ├── readthrough_cache.py
│ ├── sentry.py
│ ├── templates
│ │ └── doc.html
│ └── worker.py
├── docker-compose.yml
├── ensure_models.sh
├── requirements.txt
├── setup.py
└── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── pytest.ini
│ ├── test_bug_classification.py
│ ├── test_get_config_specific_groups.py
│ ├── test_integration.py
│ ├── test_push_schedules.py
│ ├── test_readthrough_cache.py
│ └── test_schedule_tests.py
├── infra
├── check-pipeline.yml
├── data-pipeline.yml
├── dockerfile.base
├── dockerfile.base-nlp
├── dockerfile.commit_retrieval
├── dockerfile.spawn_pipeline
├── hgrc
├── landings-pipeline.yml
├── mozci_config.toml
├── set_hook_version.py
├── spawn_pipeline.py
├── spawn_pipeline_requirements.txt
├── taskcluster-hook-check-models-start.json
├── taskcluster-hook-classify-patch.json
├── taskcluster-hook-data-pipeline.json
├── taskcluster-hook-landings-risk-report.json
├── taskcluster-hook-test-select.json
└── version_check.py
├── pyproject.toml
├── requirements.txt
├── scripts
├── __init__.py
├── analyze_training_metrics.py
├── backout_related_test_regressions.py
├── bug_classifier.py
├── bug_retriever.py
├── check.py
├── check_all_metrics.py
├── code_review_tool_evaluator.py
├── code_review_tool_evaluator_report.py
├── code_review_tool_runner.py
├── comment_level_labeler.py
├── comment_resolver_evaluator.py
├── comment_resolver_runner.py
├── commit_classifier.py
├── commit_retriever.py
├── compatibility_report_classifier.py
├── generate_landings_risk_report.py
├── generate_sheet.py
├── get_type_labels.py
├── get_untriaged.py
├── github_issue_classifier.py
├── github_issue_retriever.py
├── inline_comments_data_collection.py
├── integration_test.sh
├── maintenance_effectiveness_indicator.py
├── microannotate_generator.py
├── past_bugs_by_unit.py
├── redundant_failures.py
├── regressor_finder.py
├── retrieve_training_metrics.py
├── review_comments_retriever.py
├── revision_retriever.py
├── shadow_scheduler_stats.py
├── test_scheduling_history_retriever.py
├── testing_policy_stats.py
├── trainer.py
└── trainer_extract_args.py
├── setup.py
├── test-requirements.txt
├── tests
├── conftest.py
├── fixtures
│ ├── bug_features
│ │ ├── blocked_bugs_number.json
│ │ ├── bug_reporter.json
│ │ ├── bug_types.json
│ │ ├── comment_count.json
│ │ ├── comment_length.json
│ │ ├── component.json
│ │ ├── has_crash_signature.json
│ │ ├── has_cve_in_alias.json
│ │ ├── has_github_url.json
│ │ ├── has_regression_range.json
│ │ ├── has_str.json
│ │ ├── has_url.json
│ │ ├── has_w3c_url.json
│ │ ├── is_coverity_issue.json
│ │ ├── is_mozillian.json
│ │ ├── keywords.json
│ │ ├── landings.json
│ │ ├── nightly_uplift.json
│ │ ├── patches.json
│ │ ├── product.json
│ │ ├── severity.json
│ │ └── whiteboard.json
│ ├── bugs.json
│ ├── commits.json
│ └── github_webcompat_web-bugs_issues.json
├── test_assignee.py
├── test_backout.py
├── test_bug.py
├── test_bug_features.py
├── test_bug_snapshot.py
├── test_bugtype.py
├── test_bugzilla.py
├── test_code_review.py
├── test_commit_features.py
├── test_db.py
├── test_defect.py
├── test_defect_enhancement_task.py
├── test_devdocneeded.py
├── test_feature_cleanup.py
├── test_github.py
├── test_github_issue_retriever.py
├── test_hooks.py
├── test_invalid_compatibility_report.py
├── test_labels.py
├── test_models.py
├── test_needsdiagnosis.py
├── test_performancebug.py
├── test_phabricator.py
├── test_pipelines.py
├── test_qaneeded.py
├── test_rcatype.py
├── test_regression.py
├── test_repository.py
├── test_stepstoreproduce.py
├── test_test_scheduling.py
├── test_test_scheduling_features.py
├── test_testselect.py
├── test_tracking.py
├── test_trainer.py
├── test_uplift.py
└── test_utils.py
└── ui
└── changes
├── .eslintrc.yml
├── package-lock.json
├── package.json
├── snowpack.config.js
└── src
├── bug.html
├── bug.js
├── common.js
├── css
├── common.css
└── page.css
├── feature.html
├── feature.js
├── index.html
├── index.js
├── release.html
├── release.js
├── team.html
├── team.js
├── testing.html
└── testing.js
/.codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | coverage:
3 | status:
4 | project:
5 | default:
6 | only_pulls: true
7 | patch:
8 | default:
9 | only_pulls: true
10 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Git
2 | .git
3 | .gitignore
4 |
5 | # CI
6 | .codeclimate.yml
7 | .travis.yml
8 |
9 | # Docker
10 | docker-compose.yml
11 | .docker
12 |
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | */__pycache__/
16 | */*/__pycache__/
17 | */*/*/__pycache__/
18 | *.py[cod]
19 | */*.py[cod]
20 | */*/*.py[cod]
21 | */*/*/*.py[cod]
22 |
23 | # C extensions
24 | *.so
25 |
26 | # Distribution / packaging
27 | .Python
28 | env/
29 | build/
30 | develop-eggs/
31 | dist/
32 | downloads/
33 | eggs/
34 | lib/
35 | lib64/
36 | parts/
37 | sdist/
38 | var/
39 | *.egg-info/
40 | .installed.cfg
41 | *.egg
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .coverage
57 | .cache
58 | nosetests.xml
59 | coverage.xml
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Virtual environment
75 | .env/
76 | .venv/
77 | venv/
78 |
79 | # PyCharm
80 | .idea
81 |
82 | # Python mode for VIM
83 | .ropeproject
84 | */.ropeproject
85 | */*/.ropeproject
86 | */*/*/.ropeproject
87 |
88 | # Vim swap files
89 | *.swp
90 | */*.swp
91 | */*/*.swp
92 | */*/*/*.swp
93 |
94 | # Pytest files
95 | **/.pytest_cache/
96 |
97 | # Project-specific stuff
98 | cache/
99 | data/
100 | http_service/
101 | .taskcluster.yml
102 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: docker
4 | directory: "/http_service"
5 | schedule:
6 | interval: weekly
7 | open-pull-requests-limit: 99
8 | - package-ecosystem: docker
9 | directory: "/infra"
10 | schedule:
11 | interval: weekly
12 | open-pull-requests-limit: 99
13 | - package-ecosystem: pip
14 | directory: "/"
15 | schedule:
16 | interval: weekly
17 | open-pull-requests-limit: 99
18 | allow:
19 | - dependency-type: direct
20 | - dependency-type: indirect
21 | - package-ecosystem: npm
22 | directory: "/ui/changes"
23 | schedule:
24 | interval: weekly
25 | open-pull-requests-limit: 99
26 |
--------------------------------------------------------------------------------
/.github/workflows/add_to_project.yaml:
--------------------------------------------------------------------------------
1 | name: Add new issues to the team project
2 |
3 | on:
4 | issues:
5 | types:
6 | - opened
7 |
8 | jobs:
9 | add-to-project:
10 | name: Add issue to project
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/add-to-project@v0.5.0
14 | with:
15 | project-url: https://github.com/orgs/mozilla/projects/214
16 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *model
2 | *model.zst
3 | *model.zst.etag
4 | *data_X
5 | *data_y
6 | *data_X.zst
7 | *data_X.zst.etag
8 | *data_y.zst
9 | *data_y.zst.etag
10 | feature_importance.png
11 | importance.html
12 | importances.json
13 | metrics.json
14 | probs.json
15 | http_service/models/*model*
16 |
17 | data/
18 | sheets/
19 |
20 | .mypy_cache/
21 | .pytest_cache/
22 | *.pyc
23 | .coverage
24 |
25 | # Distribution / packaging
26 | .Python
27 | build/
28 | develop-eggs/
29 | dist/
30 | downloads/
31 | eggs/
32 | .eggs/
33 | lib/
34 | lib64/
35 | parts/
36 | sdist/
37 | var/
38 | wheels/
39 | pip-wheel-metadata/
40 | share/python-wheels/
41 | *.egg-info/
42 | .installed.cfg
43 | *.egg
44 | MANIFEST
45 | cache/
46 | node_modules/
47 |
48 | # Logs
49 | *.log
50 | # Desktop Service Store
51 | *.DS_Store
52 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/mirrors-prettier
3 | rev: v4.0.0-alpha.8
4 | hooks:
5 | - id: prettier
6 | exclude: ^tests/fixtures/
7 | - repo: https://github.com/astral-sh/ruff-pre-commit
8 | rev: v0.11.5
9 | hooks:
10 | - id: ruff
11 | args: [--fix]
12 | - id: ruff-format
13 | - repo: https://github.com/pycqa/pydocstyle
14 | rev: 6.3.0
15 | hooks:
16 | - id: pydocstyle
17 | exclude: ^http_service/
18 | args:
19 | - --convention=google
20 | # Ignoring warnings about missing docstrings.
21 | - --add-ignore=D100,D101,D102,D103,D104,D105,D107
22 | - repo: https://github.com/pre-commit/pre-commit-hooks
23 | rev: v5.0.0
24 | hooks:
25 | - id: check-ast
26 | - id: check-docstring-first
27 | - id: check-executables-have-shebangs
28 | - id: check-merge-conflict
29 | - id: check-symlinks
30 | - id: debug-statements
31 | - id: trailing-whitespace
32 | exclude: ^tests/test_repository.py
33 | - id: check-yaml
34 | - id: mixed-line-ending
35 | - id: name-tests-test
36 | args: ["--django"]
37 | - id: check-json
38 | exclude: ^tests/fixtures/
39 | - id: requirements-txt-fixer
40 | - id: check-vcs-permalinks
41 | - repo: https://github.com/codespell-project/codespell
42 | rev: v2.4.1
43 | hooks:
44 | - id: codespell
45 | exclude_types: [json]
46 | - repo: https://github.com/marco-c/taskcluster_yml_validator
47 | rev: v0.0.12
48 | hooks:
49 | - id: taskcluster_yml
50 | - repo: https://github.com/asottile/yesqa
51 | rev: v1.5.0
52 | hooks:
53 | - id: yesqa
54 | - repo: https://github.com/pre-commit/mirrors-mypy
55 | rev: v1.15.0
56 | hooks:
57 | - id: mypy
58 | name: mypy-bugbug
59 | files: ^bugbug/|^scripts/|^tests/
60 | entry: mypy bugbug/ scripts/ tests/
61 | pass_filenames: false
62 | additional_dependencies:
63 | - types-pkg_resources==0.1.2
64 | - types-python-dateutil==0.1.3
65 | - types-PyYAML==5.4.0
66 | - types-orjson==0.1.0
67 | - types-tabulate==0.9.0.20240106
68 | - types-requests==0.1.11
69 | - id: mypy
70 | name: mypy-bugbug-http
71 | files: ^http_service/
72 | entry: mypy http_service/
73 | pass_filenames: false
74 | additional_dependencies:
75 | - types-pkg_resources==0.1.2
76 | - types-requests==0.1.11
77 | - types-Flask==1.1.0
78 | - types-redis==3.5.1
79 | - types-python-dateutil==0.1.3
80 | - types-orjson==0.1.0
81 | - types-tabulate==0.9.0.20240106
82 | - repo: meta
83 | hooks:
84 | - id: check-useless-excludes
85 | default_language_version:
86 | python: python3.12
87 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "endOfLine": "lf",
3 | "printWidth": 80,
4 | "tabWidth": 2,
5 | "trailingComma": "es5"
6 | }
7 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | type: software
4 | authors:
5 | - family-names: Castelluccio
6 | given-names: Marco
7 | orcid: https://orcid.org/0000-0002-3285-5121
8 | affiliation: Mozilla
9 | title: bugbug
10 | doi: 10.5281/zenodo.4911345
11 | identifiers:
12 | - type: doi
13 | value: 10.5281/zenodo.4911345
14 | repository-code: https://github.com/mozilla/bugbug
15 | license: MPL-2.0
16 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Community Participation Guidelines
2 |
3 | This repository is governed by Mozilla's code of conduct and etiquette guidelines.
4 | For more details, please read the
5 | [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
6 |
7 | ## How to Report
8 |
9 | For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
10 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Contributing
2 |
3 | Chat with us in the [bugbug](https://chat.mozilla.org/#/room/#bugbug:mozilla.org) Matrix room.
4 |
5 | 1. [Issues marked as `good-first-bug`](https://github.com/mozilla/bugbug/labels/good-first-bug) are self-contained enough that a contributor should be able to work on them.
6 | 2. Issues are considered not assigned, until there is a PR linked to them. Feel free to work on any unassigned issue, you don't need to ask first.
7 | 3. If you have any problem, it could be already answered in [Discussions](https://github.com/mozilla/bugbug/discussions), if not, feel free to start a new discussion in the [Q&A](https://github.com/mozilla/bugbug/discussions/categories/q-a) category.
8 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include VERSION
2 | include requirements.txt
3 | include extra-nlp-requirements.txt
4 | include extra-nn-requirements.txt
5 | recursive-include bugbug/labels *
6 |
7 | recursive-exclude * __pycache__
8 | recursive-exclude * *.py[co]
9 | recursive-exclude tests *
10 | recursive-exclude data *
11 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.578
2 |
--------------------------------------------------------------------------------
/bugbug/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import importlib.metadata
4 | import logging
5 |
6 | logging.basicConfig(
7 | level=logging.INFO, format="%(asctime)s:%(levelname)s:%(name)s:%(message)s"
8 | )
9 |
10 |
11 | def get_bugbug_version():
12 | return importlib.metadata.version("bugbug")
13 |
--------------------------------------------------------------------------------
/bugbug/code_search/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
--------------------------------------------------------------------------------
/bugbug/code_search/function_search.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from abc import ABC, abstractmethod
7 | from dataclasses import dataclass
8 |
9 |
10 | @dataclass
11 | class Function:
12 | name: str
13 | start: int
14 | file: str
15 | source: str
16 |
17 |
18 | class FunctionSearch(ABC):
19 | @abstractmethod
20 | def get_function_by_line(
21 | self, commit_hash: str, path: str, line: int
22 | ) -> list[Function]:
23 | raise NotImplementedError
24 |
25 | @abstractmethod
26 | def get_function_by_name(
27 | self, commit_hash: str, path: str, function_name: str
28 | ) -> list[Function]:
29 | raise NotImplementedError
30 |
31 |
32 | function_search_classes = {}
33 |
34 |
35 | def register_function_search(name, cls):
36 | function_search_classes[name] = cls
37 |
--------------------------------------------------------------------------------
/bugbug/issue_features.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import sys
7 |
8 | import pandas as pd
9 | from sklearn.base import BaseEstimator, TransformerMixin
10 |
11 | from bugbug import issue_snapshot
12 |
13 |
14 | class CommentCount(object):
15 | name = "# of comments"
16 |
17 | def __call__(self, issue, **kwargs):
18 | return issue["comments"]
19 |
20 |
21 | class IssueExtractor(BaseEstimator, TransformerMixin):
22 | def __init__(
23 | self,
24 | feature_extractors,
25 | cleanup_functions,
26 | rollback=False,
27 | rollback_when=None,
28 | ):
29 | assert len(set(type(fe) for fe in feature_extractors)) == len(
30 | feature_extractors
31 | ), "Duplicate Feature Extractors"
32 | self.feature_extractors = feature_extractors
33 |
34 | assert len(set(type(cf) for cf in cleanup_functions)) == len(
35 | cleanup_functions
36 | ), "Duplicate Cleanup Functions"
37 | self.cleanup_functions = cleanup_functions
38 | self.rollback = rollback
39 | self.rollback_when = rollback_when
40 |
41 | def fit(self, x, y=None):
42 | for feature in self.feature_extractors:
43 | if hasattr(feature, "fit"):
44 | feature.fit(x())
45 |
46 | return self
47 |
48 | def transform(self, issues):
49 | results = []
50 |
51 | for issue in issues():
52 | if self.rollback:
53 | issue = issue_snapshot.rollback(issue, self.rollback_when)
54 |
55 | data = {}
56 |
57 | for feature_extractor in self.feature_extractors:
58 | res = feature_extractor(issue)
59 |
60 | if hasattr(feature_extractor, "name"):
61 | feature_extractor_name = feature_extractor.name
62 | else:
63 | feature_extractor_name = feature_extractor.__class__.__name__
64 |
65 | if res is None:
66 | continue
67 |
68 | if isinstance(res, (list, set)):
69 | for item in res:
70 | data[sys.intern(f"{item} in {feature_extractor_name}")] = True
71 | continue
72 |
73 | data[feature_extractor_name] = res
74 |
75 | title = issue["title"]
76 | body = issue["body"]
77 | for cleanup_function in self.cleanup_functions:
78 | title = cleanup_function(title)
79 | body = cleanup_function(body)
80 |
81 | results.append(
82 | {
83 | "data": data,
84 | "title": title,
85 | "first_comment": body,
86 | }
87 | )
88 |
89 | return pd.DataFrame(results)
90 |
--------------------------------------------------------------------------------
/bugbug/issue_snapshot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | def rollback(issue, when=None):
8 | assert when is None, "Rollback to a specific point in history is not supported yet."
9 |
10 | if issue["events"]:
11 | for event in issue["events"]:
12 | # Extract original title that issue got at the moment of creation
13 | if (
14 | event["event"] == "renamed"
15 | and event["rename"]["from"] != "In the moderation queue."
16 | and event["rename"]["from"] != "Issue closed."
17 | ):
18 | issue["title"] = event["rename"]["from"]
19 |
20 | return issue
21 |
--------------------------------------------------------------------------------
/bugbug/labels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import csv
7 | import os
8 | import sys
9 |
10 |
11 | def get_labels_dir():
12 | return os.path.join(os.path.dirname(sys.modules[__package__].__file__), "labels")
13 |
14 |
15 | def get_labels(file_name):
16 | path = os.path.join(get_labels_dir(), f"{file_name}.csv")
17 |
18 | with open(path, "r") as f:
19 | reader = csv.reader(f)
20 | next(reader)
21 | yield from reader
22 |
23 |
24 | def get_all_bug_ids():
25 | bug_ids = set()
26 |
27 | labels_dir = get_labels_dir()
28 | for csv_file in os.listdir(labels_dir):
29 | with open(os.path.join(labels_dir, csv_file)) as f:
30 | reader = csv.DictReader(f)
31 | if "bug_id" not in reader.fieldnames:
32 | continue
33 |
34 | bug_ids.update(int(row["bug_id"]) for row in reader)
35 |
36 | return list(bug_ids)
37 |
--------------------------------------------------------------------------------
/bugbug/labels/tracking.csv:
--------------------------------------------------------------------------------
1 | bug_id,tracking
2 | 1521010,False
3 | 1521022,False
4 | 1521034,False
5 | 1521037,False
6 | 1521039,False
7 | 1521071,False
8 | 1521080,False
9 | 1521082,False
10 | 1521085,False
11 | 1521088,False
12 | 1521095,False
13 | 1521156,False
14 | 1521158,False
15 | 1521169,False
16 | 1521205,False
17 | 1521221,False
18 | 1521249,False
19 | 1521308,False
20 | 1521336,False
21 | 1521372,False
22 | 1521473,False
23 | 1521498,False
24 | 1521568,True
25 | 1521577,False
26 | 1521579,True
27 | 1521583,False
28 | 1521591,False
29 | 1521597,False
30 | 1521630,True
31 | 1521989,False
32 | 1521991,False
33 | 1521992,False
34 | 1521993,False
35 | 1521994,False
36 | 1521995,False
37 | 1521999,False
38 | 1522000,False
39 | 1522002,False
40 | 1522007,False
41 | 1522008,False
42 | 1522010,False
43 | 1522012,False
44 | 1522017,False
45 | 1522018,False
46 | 1522019,True
47 | 1522023,False
48 | 1522029,False
49 | 1522061,False
50 | 1522077,False
51 | 1522083,False
52 | 1522109,False
53 | 1522118,False
54 | 1522122,False
55 | 1522125,False
56 | 1522127,False
57 | 1522129,False
58 | 1522130,False
59 | 1522134,False
60 | 1522136,False
61 | 1522138,False
62 | 1522139,False
63 | 1522173,False
64 | 1522181,False
65 | 1522182,False
66 | 1522186,False
67 | 1522187,False
68 | 1522188,False
69 | 1522189,False
70 | 1522190,False
71 | 1522191,False
72 | 1522194,False
73 | 1522195,False
74 | 1522201,False
75 | 1522202,False
76 | 1522203,False
77 | 1522204,False
78 | 1522205,False
79 | 1522207,False
80 | 1522208,False
81 | 1522210,False
82 | 1522237,False
83 | 1522242,False
84 | 1522249,False
85 | 1522254,False
86 | 1522259,False
87 | 1522268,False
88 | 1522276,False
89 | 1522277,False
90 | 1522279,False
91 | 1522280,False
92 | 1522294,False
93 | 1522298,False
94 | 1522300,False
95 | 1522302,False
96 | 1522314,False
97 | 1522315,True
98 |
--------------------------------------------------------------------------------
/bugbug/model_calibration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from sklearn.base import BaseEstimator, ClassifierMixin
7 | from sklearn.calibration import CalibratedClassifierCV
8 | from sklearn.model_selection import train_test_split
9 |
10 |
11 | class IsotonicRegressionCalibrator(BaseEstimator, ClassifierMixin):
12 | def __init__(self, base_clf):
13 | self.base_clf = base_clf
14 | self.calibrated_clf = CalibratedClassifierCV(
15 | base_clf, cv="prefit", method="isotonic"
16 | )
17 |
18 | def fit(self, X_train, y_train):
19 | X_train, X_val, y_train, y_val = train_test_split(
20 | X_train, y_train, test_size=0.2, random_state=42
21 | )
22 | self.base_clf.fit(X_train, y_train)
23 | self.calibrated_clf.fit(X_val, y_val)
24 |
25 | def predict(self, X):
26 | return self.calibrated_clf.predict(X)
27 |
28 | def predict_proba(self, X):
29 | return self.calibrated_clf.predict_proba(X)
30 |
31 | @property
32 | def n_features_in_(self):
33 | return self.base_clf.n_features_in_
34 |
--------------------------------------------------------------------------------
/bugbug/models/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import importlib
3 | import logging
4 | from typing import Type
5 |
6 | from bugbug.model import Model
7 |
8 | LOGGER = logging.getLogger()
9 |
10 |
11 | MODELS = {
12 | "accessibility": "bugbug.models.accessibility.AccessibilityModel",
13 | "annotateignore": "bugbug.models.annotate_ignore.AnnotateIgnoreModel",
14 | "assignee": "bugbug.models.assignee.AssigneeModel",
15 | "backout": "bugbug.models.backout.BackoutModel",
16 | "browsername": "bugbug.models.browsername.BrowserNameModel",
17 | "bugtype": "bugbug.models.bugtype.BugTypeModel",
18 | "component": "bugbug.models.component.ComponentModel",
19 | "defect": "bugbug.models.defect.DefectModel",
20 | "defectenhancementtask": "bugbug.models.defect_enhancement_task.DefectEnhancementTaskModel",
21 | "devdocneeded": "bugbug.models.devdocneeded.DevDocNeededModel",
22 | "fixtime": "bugbug.models.fixtime.FixTimeModel",
23 | "invalidcompatibilityreport": "bugbug.models.invalid_compatibility_report.InvalidCompatibilityReportModel",
24 | "needsdiagnosis": "bugbug.models.needsdiagnosis.NeedsDiagnosisModel",
25 | "performancebug": "bugbug.models.performancebug.PerformanceBugModel",
26 | "qaneeded": "bugbug.models.qaneeded.QANeededModel",
27 | "rcatype": "bugbug.models.rcatype.RCATypeModel",
28 | "regression": "bugbug.models.regression.RegressionModel",
29 | "regressionrange": "bugbug.models.regressionrange.RegressionRangeModel",
30 | "regressor": "bugbug.models.regressor.RegressorModel",
31 | "spambug": "bugbug.models.spambug.SpamBugModel",
32 | "stepstoreproduce": "bugbug.models.stepstoreproduce.StepsToReproduceModel",
33 | "testlabelselect": "bugbug.models.testselect.TestLabelSelectModel",
34 | "testgroupselect": "bugbug.models.testselect.TestGroupSelectModel",
35 | "testconfiggroupselect": "bugbug.models.testselect.TestConfigGroupSelectModel",
36 | "testfailure": "bugbug.models.testfailure.TestFailureModel",
37 | "tracking": "bugbug.models.tracking.TrackingModel",
38 | "uplift": "bugbug.models.uplift.UpliftModel",
39 | "worksforme": "bugbug.models.worksforme.WorksForMeModel",
40 | "fenixcomponent": "bugbug.models.fenixcomponent.FenixComponentModel",
41 | }
42 |
43 |
44 | def get_model_class(model_name: str) -> Type[Model]:
45 | if model_name not in MODELS:
46 | err_msg = f"Invalid name {model_name}, not in {list(MODELS.keys())}"
47 | raise ValueError(err_msg)
48 |
49 | full_qualified_class_name = MODELS[model_name]
50 | module_name, class_name = full_qualified_class_name.rsplit(".", 1)
51 |
52 | module = importlib.import_module(module_name)
53 |
54 | return getattr(module, class_name)
55 |
--------------------------------------------------------------------------------
/bugbug/models/browsername.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 |
8 | import xgboost
9 | from sklearn.compose import ColumnTransformer
10 | from sklearn.feature_extraction import DictVectorizer
11 | from sklearn.pipeline import Pipeline
12 |
13 | from bugbug import feature_cleanup, github, issue_features, utils
14 | from bugbug.model import IssueModel
15 |
16 | logger = logging.getLogger(__name__)
17 |
18 |
19 | class BrowserNameModel(IssueModel):
20 | def __init__(self, lemmatization=False):
21 | IssueModel.__init__(self, lemmatization)
22 |
23 | feature_extractors = [
24 | issue_features.CommentCount(),
25 | ]
26 |
27 | cleanup_functions = [
28 | feature_cleanup.fileref(),
29 | feature_cleanup.url(),
30 | feature_cleanup.synonyms(),
31 | ]
32 |
33 | self.extraction_pipeline = Pipeline(
34 | [
35 | (
36 | "issue_extractor",
37 | issue_features.IssueExtractor(
38 | feature_extractors, cleanup_functions
39 | ),
40 | ),
41 | ]
42 | )
43 |
44 | self.clf = Pipeline(
45 | [
46 | (
47 | "union",
48 | ColumnTransformer(
49 | [
50 | ("data", DictVectorizer(), "data"),
51 | ("title", self.text_vectorizer(min_df=0.0001), "title"),
52 | (
53 | "first_comment",
54 | self.text_vectorizer(min_df=0.0001),
55 | "first_comment",
56 | ),
57 | ]
58 | ),
59 | ),
60 | (
61 | "estimator",
62 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
63 | ),
64 | ]
65 | )
66 |
67 | def get_labels(self):
68 | classes = {}
69 |
70 | for issue in github.get_issues():
71 | for label in issue["labels"]:
72 | if label["name"] == "browser-firefox":
73 | classes[issue["number"]] = 1
74 |
75 | if issue["number"] not in classes:
76 | classes[issue["number"]] = 0
77 |
78 | logger.info(
79 | "%d issues belong to Firefox",
80 | sum(label == 1 for label in classes.values()),
81 | )
82 | logger.info(
83 | "%d issues do not belong to Firefox",
84 | sum(label == 0 for label in classes.values()),
85 | )
86 |
87 | return classes, [0, 1]
88 |
89 | def get_feature_names(self):
90 | return self.clf.named_steps["union"].get_feature_names_out()
91 |
--------------------------------------------------------------------------------
/bugbug/models/defect_enhancement_task.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 | from typing import Any
8 |
9 | from bugbug.models.defect import DefectModel
10 |
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | class DefectEnhancementTaskModel(DefectModel):
16 | def __init__(self, lemmatization=False, historical=False):
17 | DefectModel.__init__(self, lemmatization, historical)
18 |
19 | self.calculate_importance = False
20 |
21 | def get_labels(self) -> tuple[dict[int, Any], list[Any]]:
22 | classes = self.get_bugbug_labels("defect_enhancement_task")
23 |
24 | logger.info("%d defects", sum(label == "defect" for label in classes.values()))
25 | logger.info(
26 | "%d enhancements",
27 | sum(label == "enhancement" for label in classes.values()),
28 | )
29 | logger.info("%d tasks", sum(label == "task" for label in classes.values()))
30 |
31 | return classes, ["defect", "enhancement", "task"]
32 |
33 | def overwrite_classes(self, bugs, classes, probabilities):
34 | for i, bug in enumerate(bugs):
35 | if (
36 | any(
37 | keyword in bug["keywords"]
38 | for keyword in ["regression", "talos-regression"]
39 | )
40 | or (
41 | "cf_has_regression_range" in bug
42 | and bug["cf_has_regression_range"] == "yes"
43 | )
44 | or len(bug["regressed_by"]) > 0
45 | ):
46 | classes[i] = "defect" if not probabilities else [1.0, 0.0, 0.0]
47 | elif "feature" in bug["keywords"]:
48 | classes[i] = "enhancement" if not probabilities else [0.0, 1.0, 0.0]
49 |
50 | return classes
51 |
52 | def get_extra_data(self):
53 | labels = self.le.inverse_transform([0, 1, 2])
54 | labels_map = {str(label): index for label, index in zip(labels, [0, 1, 2])}
55 |
56 | return {"labels_map": labels_map}
57 |
--------------------------------------------------------------------------------
/bugbug/models/needsdiagnosis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 |
8 | import xgboost
9 | from sklearn.compose import ColumnTransformer
10 | from sklearn.pipeline import Pipeline
11 |
12 | from bugbug import feature_cleanup, issue_features, utils
13 | from bugbug.model import IssueModel
14 |
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | class NeedsDiagnosisModel(IssueModel):
19 | def __init__(self, lemmatization=False):
20 | IssueModel.__init__(
21 | self, owner="webcompat", repo="web-bugs", lemmatization=lemmatization
22 | )
23 |
24 | self.calculate_importance = False
25 |
26 | feature_extractors = []
27 |
28 | cleanup_functions = [
29 | feature_cleanup.fileref(),
30 | feature_cleanup.url(),
31 | feature_cleanup.synonyms(),
32 | ]
33 |
34 | self.extraction_pipeline = Pipeline(
35 | [
36 | (
37 | "issue_extractor",
38 | issue_features.IssueExtractor(
39 | feature_extractors, cleanup_functions, rollback=True
40 | ),
41 | ),
42 | ]
43 | )
44 |
45 | self.clf = Pipeline(
46 | [
47 | (
48 | "union",
49 | ColumnTransformer(
50 | [
51 | ("title", self.text_vectorizer(min_df=0.0001), "title"),
52 | (
53 | "first_comment",
54 | self.text_vectorizer(min_df=0.0001),
55 | "first_comment",
56 | ),
57 | ]
58 | ),
59 | ),
60 | (
61 | "estimator",
62 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
63 | ),
64 | ]
65 | )
66 |
67 | def get_labels(self):
68 | classes = {}
69 |
70 | for issue in self.github.get_issues():
71 | # Skip issues with empty title or body
72 | if issue["title"] is None or issue["body"] is None:
73 | continue
74 |
75 | # Skip issues that are not moderated yet as they don't have a meaningful title or body
76 | if issue["title"] == "In the moderation queue.":
77 | continue
78 |
79 | for event in issue["events"]:
80 | if event["event"] == "milestoned" and (
81 | event["milestone"]["title"] == "needsdiagnosis"
82 | or event["milestone"]["title"] == "moved"
83 | ):
84 | classes[issue["number"]] = 0
85 |
86 | if issue["number"] not in classes:
87 | classes[issue["number"]] = 1
88 |
89 | logger.info(
90 | "%d issues have not been moved to needsdiagnosis",
91 | sum(label == 1 for label in classes.values()),
92 | )
93 | logger.info(
94 | "%d issues have been moved to needsdiagnosis",
95 | sum(label == 0 for label in classes.values()),
96 | )
97 |
98 | return classes, [0, 1]
99 |
100 | def get_feature_names(self):
101 | return self.clf.named_steps["union"].get_feature_names_out()
102 |
--------------------------------------------------------------------------------
/bugbug/models/regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 | from typing import Any
8 |
9 | from bugbug.models.defect import DefectModel
10 |
11 | logging.basicConfig(level=logging.INFO)
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | class RegressionModel(DefectModel):
16 | def __init__(self, lemmatization=False, historical=False):
17 | DefectModel.__init__(self, lemmatization, historical)
18 | self.calculate_importance = False
19 |
20 | def get_labels(self) -> tuple[dict[int, Any], list[int]]:
21 | classes = self.get_bugbug_labels("regression")
22 |
23 | logger.info("%d regression bugs", sum(label == 1 for label in classes.values()))
24 | logger.info(
25 | "%d non-regression bugs", sum(label == 0 for label in classes.values())
26 | )
27 |
28 | return classes, [0, 1]
29 |
30 | def overwrite_classes(self, bugs, classes, probabilities):
31 | for i, bug in enumerate(bugs):
32 | regression_keyword_removed = False
33 | for history in bug["history"]:
34 | for change in history["changes"]:
35 | if change["field_name"] == "keywords":
36 | if "regression" in [
37 | k.strip() for k in change["removed"].split(",")
38 | ]:
39 | regression_keyword_removed = True
40 | elif "regression" in [
41 | k.strip() for k in change["added"].split(",")
42 | ]:
43 | regression_keyword_removed = False
44 |
45 | if regression_keyword_removed:
46 | classes[i] = 0 if not probabilities else [1.0, 0.0]
47 |
48 | super().overwrite_classes(bugs, classes, probabilities)
49 |
50 | return classes
51 |
--------------------------------------------------------------------------------
/bugbug/models/regressionrange.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 |
8 | import xgboost
9 | from imblearn.pipeline import Pipeline as ImblearnPipeline
10 | from imblearn.under_sampling import RandomUnderSampler
11 | from sklearn.compose import ColumnTransformer
12 | from sklearn.feature_extraction import DictVectorizer
13 | from sklearn.pipeline import Pipeline
14 |
15 | from bugbug import bug_features, bugzilla, feature_cleanup, utils
16 | from bugbug.model import BugModel
17 |
18 | logging.basicConfig(level=logging.INFO)
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | class RegressionRangeModel(BugModel):
23 | def __init__(self, lemmatization=False):
24 | BugModel.__init__(self, lemmatization)
25 |
26 | feature_extractors = [
27 | bug_features.HasSTR(),
28 | bug_features.Severity(),
29 | bug_features.Keywords({"regression", "regressionwindow-wanted"}),
30 | bug_features.IsCoverityIssue(),
31 | bug_features.HasCrashSignature(),
32 | bug_features.HasURL(),
33 | bug_features.HasW3CURL(),
34 | bug_features.HasGithubURL(),
35 | bug_features.Whiteboard(),
36 | bug_features.Patches(),
37 | bug_features.Landings(),
38 | ]
39 |
40 | cleanup_functions = [
41 | feature_cleanup.fileref(),
42 | feature_cleanup.url(),
43 | feature_cleanup.synonyms(),
44 | ]
45 |
46 | self.extraction_pipeline = Pipeline(
47 | [
48 | (
49 | "bug_extractor",
50 | bug_features.BugExtractor(feature_extractors, cleanup_functions),
51 | ),
52 | ]
53 | )
54 |
55 | self.clf = ImblearnPipeline(
56 | [
57 | (
58 | "union",
59 | ColumnTransformer(
60 | [
61 | ("data", DictVectorizer(), "data"),
62 | ("title", self.text_vectorizer(), "title"),
63 | ("comments", self.text_vectorizer(), "comments"),
64 | ]
65 | ),
66 | ),
67 | ("sampler", RandomUnderSampler(random_state=0)),
68 | (
69 | "estimator",
70 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
71 | ),
72 | ]
73 | )
74 |
75 | def get_labels(self):
76 | classes = {}
77 |
78 | for bug_data in bugzilla.get_bugs():
79 | if "regression" not in bug_data["keywords"]:
80 | continue
81 |
82 | bug_id = int(bug_data["id"])
83 | if (
84 | bug_data.get("regressed_by")
85 | or "regressionwindow-wanted" in bug_data["keywords"]
86 | ):
87 | classes[bug_id] = 1
88 | else:
89 | classes[bug_id] = 0
90 |
91 | logger.info(
92 | "%d bugs have regression range",
93 | sum(label == 1 for label in classes.values()),
94 | )
95 | logger.info(
96 | "%d bugs don't have a regression range",
97 | sum(label == 0 for label in classes.values()),
98 | )
99 |
100 | return classes, [0, 1]
101 |
102 | def get_feature_names(self):
103 | return self.clf.named_steps["union"].get_feature_names_out()
104 |
--------------------------------------------------------------------------------
/bugbug/models/uplift.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import xgboost
7 | from imblearn.pipeline import Pipeline as ImblearnPipeline
8 | from imblearn.under_sampling import RandomUnderSampler
9 | from sklearn.compose import ColumnTransformer
10 | from sklearn.feature_extraction import DictVectorizer
11 | from sklearn.pipeline import Pipeline
12 |
13 | from bugbug import bug_features, bugzilla, feature_cleanup, utils
14 | from bugbug.model import BugModel
15 |
16 |
17 | class UpliftModel(BugModel):
18 | def __init__(self, lemmatization=False):
19 | BugModel.__init__(self, lemmatization)
20 |
21 | feature_extractors = [
22 | bug_features.HasSTR(),
23 | bug_features.HasRegressionRange(),
24 | bug_features.Severity(),
25 | bug_features.Keywords(),
26 | bug_features.IsCoverityIssue(),
27 | bug_features.HasCrashSignature(),
28 | bug_features.HasURL(),
29 | bug_features.HasW3CURL(),
30 | bug_features.HasGithubURL(),
31 | bug_features.Whiteboard(),
32 | bug_features.Patches(),
33 | bug_features.Landings(),
34 | ]
35 |
36 | cleanup_functions = [
37 | feature_cleanup.fileref(),
38 | feature_cleanup.url(),
39 | feature_cleanup.synonyms(),
40 | ]
41 |
42 | self.extraction_pipeline = Pipeline(
43 | [
44 | (
45 | "bug_extractor",
46 | bug_features.BugExtractor(
47 | feature_extractors,
48 | cleanup_functions,
49 | rollback=True,
50 | rollback_when=self.rollback,
51 | ),
52 | ),
53 | ]
54 | )
55 |
56 | self.clf = ImblearnPipeline(
57 | [
58 | (
59 | "union",
60 | ColumnTransformer(
61 | [
62 | ("data", DictVectorizer(), "data"),
63 | ("title", self.text_vectorizer(), "title"),
64 | ("comments", self.text_vectorizer(), "comments"),
65 | ]
66 | ),
67 | ),
68 | ("sampler", RandomUnderSampler(random_state=0)),
69 | (
70 | "estimator",
71 | xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
72 | ),
73 | ]
74 | )
75 |
76 | def rollback(self, change):
77 | return (
78 | change["field_name"] == "flagtypes.name"
79 | and change["added"].startswith("approval-mozilla-")
80 | and (change["added"].endswith("+") or change["added"].endswith("-"))
81 | )
82 |
83 | def get_labels(self):
84 | classes = {}
85 |
86 | for bug_data in bugzilla.get_bugs():
87 | bug_id = int(bug_data["id"])
88 |
89 | for attachment in bug_data["attachments"]:
90 | for flag in attachment["flags"]:
91 | if not flag["name"].startswith("approval-mozilla-") or flag[
92 | "status"
93 | ] not in ["+", "-"]:
94 | continue
95 |
96 | if flag["status"] == "+":
97 | classes[bug_id] = 1
98 | elif flag["status"] == "-":
99 | classes[bug_id] = 0
100 |
101 | return classes, [0, 1]
102 |
103 | def get_feature_names(self):
104 | return self.clf.named_steps["union"].get_feature_names_out()
105 |
--------------------------------------------------------------------------------
/bugbug/rust_code_analysis_server.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 | import subprocess
8 | import time
9 |
10 | import requests
11 |
12 | from bugbug import utils
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | START_RETRIES = 14
18 | HEADERS = {"Content-type": "application/octet-stream"}
19 |
20 |
21 | class RustCodeAnalysisServer:
22 | def __init__(self, thread_num: int | None = None):
23 | for _ in range(START_RETRIES):
24 | self.start_process(thread_num)
25 |
26 | for _ in range(START_RETRIES):
27 | if self.ping():
28 | logger.info("Rust code analysis server is ready to accept queries")
29 | return
30 | else:
31 | if self.proc.poll() is not None:
32 | break
33 |
34 | time.sleep(0.35)
35 |
36 | self.terminate()
37 | raise RuntimeError("Unable to run rust-code-analysis server")
38 |
39 | @property
40 | def base_url(self):
41 | return f"http://127.0.0.1:{self.port}"
42 |
43 | def start_process(self, thread_num: int | None = None):
44 | self.port = utils.get_free_tcp_port()
45 |
46 | try:
47 | cmd = ["rust-code-analysis-web", "--port", str(self.port)]
48 | if thread_num is not None:
49 | cmd += ["-j", str(thread_num)]
50 | self.proc = subprocess.Popen(cmd)
51 | except FileNotFoundError:
52 | raise RuntimeError("rust-code-analysis is required for code analysis")
53 |
54 | def terminate(self):
55 | if self.proc is not None:
56 | self.proc.terminate()
57 |
58 | def __str__(self):
59 | return f"Server running at {self.base_url}"
60 |
61 | def ping(self):
62 | try:
63 | r = requests.get(f"{self.base_url}/ping")
64 | return r.ok
65 | except requests.exceptions.ConnectionError:
66 | return False
67 |
68 | def metrics(self, filename, code, unit=True):
69 | """Get code metrics for a file.
70 |
71 | Args:
72 | filename: the path for the file that we want to analyze
73 | code: the content of the file
74 | unit: when unit is True, then only metrics for top-level is
75 | returned, when False, then we get detailed metrics for all
76 | classes, functions, nested functions, ...
77 | """
78 | unit = 1 if unit else 0
79 | url = f"{self.base_url}/metrics?file_name={filename}&unit={unit}"
80 | r = requests.post(url, data=code, headers=HEADERS)
81 |
82 | if not r.ok:
83 | return {}
84 |
85 | return r.json()
86 |
--------------------------------------------------------------------------------
/bugbug/swarm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import json
7 | import subprocess
8 | from datetime import datetime
9 | from typing import Collection
10 |
11 |
12 | def api_revinfo(instance, rev_id):
13 | u = f"https://{instance}api/v10/reviews/{rev_id}"
14 | return u
15 |
16 |
17 | def api_filelist_v_fromto(instance, rev_id, v1=0, v2=1):
18 | u = f"https://{instance}api/v10/reviews/{rev_id}/files?from={v1}&to={v2}"
19 | return u
20 |
21 |
22 | def call(auth, g):
23 | command = f'curl -u "{auth["user"]}:{auth["password"]}" "{g}"'
24 | process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
25 | return process.stdout
26 |
27 |
28 | def p4_connect(auth):
29 | from P4 import P4 # from pip p4python
30 |
31 | p4 = P4()
32 | p4.port = auth["port"]
33 | p4.user = auth["user"]
34 | p4.password = auth["password"]
35 | p4.connect()
36 | p4.run_login()
37 | p4.tagged = False
38 | return p4
39 |
40 |
41 | def get_review(instance, rev_id, version_num, auth):
42 | p4 = p4_connect(auth)
43 | data_rev = {}
44 | g = api_revinfo(instance, rev_id)
45 | message = json.loads(call(auth, g))
46 | data_rev = message["data"]
47 |
48 | fl = api_filelist_v_fromto(instance, rev_id, v1=version_num[0], v2=version_num[1])
49 | file_list = json.loads(call(auth, fl))
50 | for what in file_list["data"]:
51 | data_rev[what] = file_list["data"][what]
52 |
53 | commit_id = data_rev["reviews"][0]["versions"][version_num[1] - 1]["change"]
54 |
55 | diffs = {}
56 | for file in data_rev["files"]:
57 | filename1 = file["fromFile"] if "fromFile" in file else file["depotFile"]
58 | filename2 = file["depotFile"]
59 | commit_id1 = file["diffFrom"] if "diffFrom" in file else f"#{file['rev']}"
60 | commit_id2 = file["diffTo"] if "diffTo" in file else f"@={commit_id}"
61 |
62 | diffs[filename2] = "\n".join(
63 | p4.run(
64 | "diff2",
65 | "-u",
66 | "-du5",
67 | f"{filename1}{commit_id1}",
68 | f"{filename2}{commit_id2}",
69 | )
70 | )
71 |
72 | data_rev["diffs"] = diffs
73 |
74 | return data_rev
75 |
76 |
77 | def get(
78 | AUTH,
79 | rev_ids: Collection[int] | None = None,
80 | modified_start: datetime | None = None,
81 | version_l=[0, 1],
82 | ):
83 | data = []
84 | instance = AUTH["instance"]
85 | if rev_ids is not None:
86 | for r in rev_ids:
87 | loc = get_review(instance, r, version_l, AUTH)
88 |
89 | full_diff = "".join([loc["diffs"][e] for e in loc["diffs"]])
90 |
91 | data += [
92 | {
93 | "fields": {
94 | "diffID": int(r),
95 | "version": version_l,
96 | "file_diff": loc["diffs"],
97 | "diff": full_diff,
98 | }
99 | }
100 | ]
101 |
102 | return data
103 |
--------------------------------------------------------------------------------
/bugbug/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.2"
2 | services:
3 | bugbug-base:
4 | build:
5 | context: .
6 | dockerfile: infra/dockerfile.base
7 | image: mozilla/bugbug-base
8 |
9 | bugbug-nlp:
10 | build:
11 | context: .
12 | dockerfile: infra/dockerfile.base-nlp
13 | image: mozilla/bugbug-base-nlp
14 |
15 | bugbug-commit-retrieval:
16 | build:
17 | context: .
18 | dockerfile: infra/dockerfile.commit_retrieval
19 | image: mozilla/bugbug-commit-retrieval
20 | volumes:
21 | - type: bind
22 | source: ./cache/
23 | target: /cache/
24 | volume:
25 | nocopy: true
26 |
27 | bugbug-http-service:
28 | build:
29 | context: http_service
30 | image: mozilla/bugbug-http-service
31 | environment:
32 | - BUGBUG_BUGZILLA_TOKEN
33 | - BUGBUG_GITHUB_TOKEN
34 | - PORT=8000
35 | ports:
36 | - target: 8000
37 | published: 8000
38 | protocol: tcp
39 | mode: host
40 |
41 | bugbug-http-service-bg-worker:
42 | build:
43 | context: http_service
44 | dockerfile: Dockerfile.bg_worker
45 | image: mozilla/bugbug-http-service-bg-worker
46 | environment:
47 | - BUGBUG_BUGZILLA_TOKEN
48 | - BUGBUG_GITHUB_TOKEN
49 |
50 | bugbug-spawn-pipeline:
51 | build:
52 | context: infra/
53 | dockerfile: dockerfile.spawn_pipeline
54 | image: mozilla/bugbug-spawn-pipeline
55 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | Detailed documentation per model
2 |
3 | - [Regressor model for predicting risky commits](models/regressor.md)
4 |
--------------------------------------------------------------------------------
/docs/data.md:
--------------------------------------------------------------------------------
1 | # Downloading Data Using BugBug
2 |
3 | BugBug relies on various types of data, such as bugs, commits, issues, and crash reports, to build its models. Although all this data is publicly available through different APIs, retrieving it every time we train a model is not an efficient solution. Hence, a copy of the data is saved as downloadable compressed files through a simple API.
4 |
5 | > **Note:**
6 | > You can use the data outside this project by using BugBug as a dependency (`pip install bugbug`).
7 |
8 | ## Bugzilla Bugs
9 |
10 | ```py
11 | from bugbug import bugzilla, db
12 |
13 | # Downland the latest version if the data set if it is not already downloaded
14 | db.download(bugzilla.BUGS_DB)
15 |
16 | # Iterate over all bugs in the dataset
17 | for bug in bugzilla.get_bugs():
18 | # This is the same as if you retrieved the bug through Bugzilla REST API:
19 | # https://bmo.readthedocs.io/en/latest/api/core/v1/bug.html
20 | print(bug["id"])
21 | ```
22 |
23 | ## Phabricator Revisions
24 |
25 | ```py
26 | from bugbug import phabricator, db
27 |
28 | db.download(phabricator.REVISIONS_DB)
29 |
30 | for revision in phabricator.get_revisions():
31 | # The revision here combines the results retrieved from two API endpoints:
32 | # https://phabricator.services.mozilla.com/conduit/method/differential.revision.search/
33 | # https://phabricator.services.mozilla.com/conduit/method/transaction.search/
34 | print(revision["id"])
35 | ```
36 |
37 | ## Repository Commits
38 |
39 | ```py
40 | from bugbug import repository, db
41 |
42 | db.download(bugzilla.COMMITS_DB)
43 |
44 | for commit in repository.get_commits():
45 | print(commit["node"])
46 | ```
47 |
48 | ## Github Issues
49 |
50 | > _TODO_
51 |
52 | ## Mozilla Crash Reports
53 |
54 | > _TODO_
55 |
--------------------------------------------------------------------------------
/docs/models/regressor.md:
--------------------------------------------------------------------------------
1 | ## Supported languages
2 |
3 | The regressor model supports all languages supported by rust-code-analysis: https://github.com/mozilla/rust-code-analysis#supported-languages.
4 |
5 | ## Training the model for another project
6 |
7 | There are quite a few steps to reproduce the results on another project, and they kind of depend on the processes followed by the specific project. Here is the current pipeline, which depends on Mozilla's processes. Some steps might me not necessary for other projects (and some projects might require additional steps).
8 |
9 | 1. Gather bugs from the project's Bugzilla;
10 | 1. Mine commits from the repository;
11 | 1. Create a list of commits to ignore (formatting changes and so on, which surely can't have introduced regressions);
12 | 1. Classify bugs between actual bugs and feature requests (we recently introduced a new "type" field in Bugzilla that developers fill, so we have a high precision in this step; for old bugs where the type field is absent, we use the "defect" model to classify the bug);
13 | 1. Use SZZ to find the commits which introduced the bugs from the list from step 4 (making git blame ignore and skip over commits from step 3);
14 | 1. Now we have a dataset of commits which introduced bugs and commits which did not introduce bugs, so we can actually train the regressor model.
15 |
16 | - Step 1 is in scripts/bug_retriever.py and bugbug/bugzilla.py;
17 | - Step 2 is scripts/commit_retriever.py and bugbug/repository.py;
18 | - Step 3 and 4 and 5 are in scripts/regressor_finder.py;
19 | - Step 6 is the actual "regressor" model, in bugbug/models/regressor.py.
20 |
--------------------------------------------------------------------------------
/extra-nlp-requirements.txt:
--------------------------------------------------------------------------------
1 | gensim==4.3.2
2 | spacy==3.8.7
3 |
--------------------------------------------------------------------------------
/extra-nn-requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/extra-nn-requirements.txt
--------------------------------------------------------------------------------
/functions/diff2html/index.js:
--------------------------------------------------------------------------------
1 | const https = require("https");
2 | const functions = require("@google-cloud/functions-framework");
3 | const Diff2html = require("diff2html");
4 |
5 | const agent = new https.Agent({ keepAlive: true });
6 | const headers = new Headers({
7 | "User-Agent": "bugbug-diff2html",
8 | });
9 | const configuration = {
10 | // Diff2Html Configuration
11 | outputFormat: "line-by-line",
12 | matching: "lines",
13 | renderNothingWhenEmpty: false,
14 | diffStyle: "word",
15 | // Diff2HtmlUI Configuration
16 | synchronisedScroll: true,
17 | highlight: true,
18 | fileListToggle: true,
19 | fileListStartVisible: false,
20 | fileContentToggle: true,
21 | stickyFileHeaders: true,
22 | };
23 |
24 | /**
25 | * Responds to any HTTP request.
26 | *
27 | * @param {!express:Request} req HTTP request context.
28 | * @param {!express:Response} res HTTP response context.
29 | */
30 | functions.http("diff2html", (req, res) => {
31 | res.set("Access-Control-Allow-Origin", "*");
32 |
33 | let revision_id = req.query.revision_id;
34 | let diff_id = req.query.diff_id;
35 | let changeset = req.query.changeset;
36 | let enableJS = req.query.format !== "html";
37 |
38 | if (
39 | changeset == undefined &&
40 | (revision_id == undefined || diff_id == undefined)
41 | ) {
42 | res.status(400).send("Missing required parameters");
43 | return;
44 | }
45 |
46 | const url =
47 | changeset != undefined
48 | ? `https://hg.mozilla.org/mozilla-central/raw-rev/${changeset}`
49 | : `https://phabricator.services.mozilla.com/D${revision_id}?id=${diff_id}&download=true`;
50 |
51 | fetch(url, { agent, headers })
52 | .then((res) => {
53 | if (!res.ok) throw Error(res.statusText);
54 | return res.text();
55 | })
56 | .then((text) => strDiff2Html(text, enableJS))
57 | .then((output) => res.status(200).send(output))
58 | .catch((err) => res.status(500).send(`Error: ${err.message}`));
59 | });
60 |
61 | const jsTemplate = `
62 |
63 |
67 |
71 |
81 | `;
82 |
83 | function strDiff2Html(strDiff, enableJS) {
84 | const diffHtml = Diff2html.html(strDiff, configuration);
85 | return `
86 |
87 |
${enableJS ? jsTemplate : ""}
88 |
93 |
94 |
95 | ${diffHtml}
96 |
97 |
98 | `;
99 | }
100 |
--------------------------------------------------------------------------------
/functions/diff2html/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "diff2html",
3 | "scripts": {
4 | "start": "npx functions-framework --target=diff2html"
5 | },
6 | "dependencies": {
7 | "@google-cloud/functions-framework": "^3.1.2",
8 | "diff2html": "3.4.35"
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/functions/sync-review-comments-db/database.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import json
7 | import os
8 |
9 | import pg8000
10 | import sqlalchemy
11 | from google.cloud.sql.connector import Connector, IPTypes
12 |
13 |
14 | def init_connection_pool_engine() -> sqlalchemy.engine.base.Engine:
15 | """Initializes a connection pool for a Cloud SQL instance of Postgres.
16 |
17 | Uses the Cloud SQL Python Connector package.
18 | """
19 | connector = Connector()
20 | credentials = json.loads(os.environ["DATABASE_CREDENTIALS"])
21 | ip_type = IPTypes.PRIVATE if credentials["private_ip"] else IPTypes.PUBLIC
22 |
23 | def getconn() -> pg8000.dbapi.Connection:
24 | conn: pg8000.dbapi.Connection = connector.connect(
25 | credentials["instance_connection_name"],
26 | "pg8000",
27 | user=credentials["db_user"],
28 | password=credentials["db_password"],
29 | db=credentials["db_name"],
30 | ip_type=ip_type,
31 | )
32 | return conn
33 |
34 | engine = sqlalchemy.create_engine(
35 | "postgresql+pg8000://",
36 | creator=getconn,
37 | pool_size=5,
38 | max_overflow=2,
39 | pool_timeout=30, # 30 seconds
40 | pool_recycle=1800, # 30 minutes
41 | )
42 | return engine
43 |
--------------------------------------------------------------------------------
/functions/sync-review-comments-db/main.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import functions_framework
4 | from database import init_connection_pool_engine
5 | from models import (
6 | Evaluation,
7 | Suggestion,
8 | )
9 | from qdrant_client import QdrantClient
10 | from sqlalchemy import select
11 | from sqlalchemy.orm import Session
12 |
13 | from bugbug.tools import code_review
14 | from bugbug.utils import get_secret
15 | from bugbug.vectordb import QdrantVectorDB
16 |
17 | logging.basicConfig()
18 | logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
19 |
20 | pg_engine = init_connection_pool_engine()
21 | qdrant_client = QdrantClient(
22 | location=get_secret("QDRANT_LOCATION"), api_key=get_secret("QDRANT_API_KEY")
23 | )
24 |
25 |
26 | def get_recent_evaluations(min_id: int):
27 | with Session(pg_engine) as session:
28 | stmt = (
29 | select(Evaluation, Suggestion)
30 | .join(Suggestion)
31 | .where(Evaluation.id > min_id)
32 | )
33 |
34 | evaluations = session.scalars(stmt)
35 | yield from evaluations
36 |
37 |
38 | @functions_framework.cloud_event
39 | def event_handler(cloud_event):
40 | vector_db = QdrantVectorDB("suggestions_feedback")
41 | vector_db.setup()
42 |
43 | largest_evaluation_id = vector_db.get_largest_id()
44 | logging.info(
45 | "Retrieving evaluations from the PostgreSQL database starting from evaluation ID %d",
46 | largest_evaluation_id,
47 | )
48 |
49 | feedback_db = code_review.SuggestionsFeedbackDB(vector_db)
50 | feedback_db.add_suggestions_feedback(
51 | code_review.SuggestionFeedback(
52 | id=evaluation.id,
53 | action=evaluation.action.name,
54 | comment=evaluation.suggestion.content,
55 | file_path=evaluation.suggestion.file_path,
56 | user=evaluation.user,
57 | )
58 | for evaluation in get_recent_evaluations(largest_evaluation_id)
59 | )
60 |
--------------------------------------------------------------------------------
/functions/sync-review-comments-db/requirements.txt:
--------------------------------------------------------------------------------
1 | bugbug
2 | cloud-sql-python-connector[pg8000]==1.13.0
3 | functions-framework==3.5.0
4 | SQLAlchemy==2.0.25
5 |
--------------------------------------------------------------------------------
/http_service/.dockerignore:
--------------------------------------------------------------------------------
1 | # Git
2 | .git
3 | .gitignore
4 |
5 | # CI
6 | .codeclimate.yml
7 | .travis.yml
8 |
9 | # Docker
10 | docker-compose.yml
11 | .docker
12 |
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | */__pycache__/
16 | */*/__pycache__/
17 | */*/*/__pycache__/
18 | *.py[cod]
19 | */*.py[cod]
20 | */*/*.py[cod]
21 | */*/*/*.py[cod]
22 |
23 | # C extensions
24 | *.so
25 |
26 | # Distribution / packaging
27 | .Python
28 | env/
29 | build/
30 | develop-eggs/
31 | dist/
32 | downloads/
33 | eggs/
34 | lib/
35 | lib64/
36 | parts/
37 | sdist/
38 | var/
39 | *.egg-info/
40 | .installed.cfg
41 | *.egg
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .coverage
57 | .cache
58 | nosetests.xml
59 | coverage.xml
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Virtual environment
75 | .env/
76 | .venv/
77 | venv/
78 |
79 | # PyCharm
80 | .idea
81 |
82 | # Python mode for VIM
83 | .ropeproject
84 | */.ropeproject
85 | */*/.ropeproject
86 | */*/*/.ropeproject
87 |
88 | # Vim swap files
89 | *.swp
90 | */*.swp
91 | */*/*.swp
92 | */*/*/*.swp
93 |
94 | # Pytest files
95 | **/.pytest_cache/
96 |
97 | # Project-specific stuff
98 | data/
99 | */data
100 |
101 | # Integrations tests cache
102 | cache/
103 | */cache/
104 | */*/cache/
--------------------------------------------------------------------------------
/http_service/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG BUGBUG_VERSION=latest
2 |
3 | FROM mozilla/bugbug-base:$BUGBUG_VERSION
4 |
5 | # Install dependencies first
6 | COPY requirements.txt /requirements-http.txt
7 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements-http.txt
8 |
9 | # Setup http service as package
10 | COPY . /code/http_service
11 | # Use same version as bugbug
12 | RUN python -c "import importlib.metadata; print(importlib.metadata.version('bugbug'))" > /code/http_service/VERSION
13 | RUN pip install --disable-pip-version-check --no-cache-dir /code/http_service
14 |
15 | # Run the Pulse listener in the background
16 | CMD (bugbug-http-pulse-listener &) && gunicorn -b 0.0.0.0:$PORT bugbug_http.app --preload --timeout 30 -w 3
17 |
--------------------------------------------------------------------------------
/http_service/Dockerfile.bg_worker:
--------------------------------------------------------------------------------
1 | ARG BUGBUG_VERSION=latest
2 |
3 | FROM mozilla/bugbug-commit-retrieval:$BUGBUG_VERSION
4 |
5 | # Install dependencies first
6 | COPY requirements.txt /requirements-http.txt
7 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements-http.txt
8 |
9 | # Setup http service as package
10 | COPY . /code/http_service
11 | # Use same version as bugbug
12 | RUN python -c "import importlib.metadata; print(importlib.metadata.version('bugbug'))" > /code/http_service/VERSION
13 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir /code/http_service
14 |
15 | # Load the models
16 | WORKDIR /code/
17 |
18 | ARG CHECK_MODELS
19 | ENV CHECK_MODELS="${CHECK_MODELS}"
20 |
21 | ARG TAG
22 | ENV TAG="${TAG}"
23 |
24 | RUN bash /code/http_service/ensure_models.sh
25 |
26 | CMD bugbug-http-worker high default low
27 |
--------------------------------------------------------------------------------
/http_service/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include bugbug_http/templates/*.html
2 |
--------------------------------------------------------------------------------
/http_service/README.md:
--------------------------------------------------------------------------------
1 | ### Local development
2 |
3 | **For starting the service locally run the following commands.**
4 |
5 | Start Redis:
6 |
7 | docker-compose up redis
8 |
9 | Build the http service image:
10 |
11 | docker build -t mozilla/bugbug-http-service -f Dockerfile .
12 |
13 | Start the http service:
14 |
15 | docker-compose up bugbug-http-service
16 |
17 | Build the background worker image:
18 |
19 | docker build -t mozilla/bugbug-http-service-bg-worker --build-arg TAG=latest -f Dockerfile.bg_worker .
20 |
21 | Run the background worker:
22 |
23 | docker-compose up bugbug-http-service-bg-worker
24 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import tempfile
4 |
5 | ALLOW_MISSING_MODELS = bool(int(os.environ.get("BUGBUG_ALLOW_MISSING_MODELS", "0")))
6 |
7 | REPO_DIR = os.environ.get(
8 | "BUGBUG_REPO_DIR", os.path.join(tempfile.gettempdir(), "bugbug-hg")
9 | )
10 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/download_models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 |
8 | from bugbug import utils
9 | from bugbug_http import ALLOW_MISSING_MODELS
10 | from bugbug_http.models import MODEL_CACHE, MODELS_NAMES
11 |
12 | LOGGER = logging.getLogger()
13 |
14 |
15 | def download_models():
16 | for model_name in MODELS_NAMES:
17 | utils.download_model(model_name)
18 | # Try loading the model
19 | try:
20 | m = MODEL_CACHE.get(model_name)
21 | m.download_eval_dbs(extract=False, ensure_exist=not ALLOW_MISSING_MODELS)
22 | except FileNotFoundError:
23 | if ALLOW_MISSING_MODELS:
24 | LOGGER.info(
25 | "Missing %r model, skipping because ALLOW_MISSING_MODELS is set"
26 | % model_name
27 | )
28 | return None
29 | else:
30 | raise
31 |
32 |
33 | if __name__ == "__main__":
34 | download_models()
35 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/readthrough_cache.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | import datetime
8 | import logging
9 | import threading
10 | import time
11 | from datetime import timedelta
12 | from typing import Callable, Generic, TypeVar
13 |
14 | LOGGER = logging.getLogger()
15 |
16 | # A simple TTL cache to use with models. Because we expect the number of models
17 | # in the service to not be very large, simplicity of implementation is
18 | # preferred to algorithmic efficiency of operations.
19 | #
20 | # Called an 'Idle' TTL cache because TTL of items is reset after every get
21 | Key = TypeVar("Key")
22 | Value = TypeVar("Value")
23 |
24 |
25 | class ReadthroughTTLCache(Generic[Key, Value]):
26 | def __init__(self, ttl: timedelta, load_item_function: Callable[[Key], Value]):
27 | self.ttl = ttl
28 | self.load_item_function = load_item_function
29 | self.items_last_accessed: dict[Key, datetime.datetime] = {}
30 | self.items_storage: dict[Key, Value] = {}
31 |
32 | def __contains__(self, key):
33 | return key in self.items_storage
34 |
35 | def get(self, key, force_store=False):
36 | store_item = force_store
37 | if key in self.items_storage:
38 | item = self.items_storage[key]
39 | else:
40 | item = self.load_item_function(key)
41 | # Cache the item only if it was last accessed within the past TTL seconds
42 | # Note that all entries in items_last_accessed are purged if item was not
43 | # accessed in the last TTL seconds.
44 | if key in self.items_last_accessed:
45 | store_item = True
46 |
47 | self.items_last_accessed[key] = datetime.datetime.now()
48 | if store_item:
49 | LOGGER.info(
50 | f"Storing item with the following key in readthroughcache: {key}"
51 | )
52 | self.items_storage[key] = item
53 |
54 | return item
55 |
56 | def purge_expired_entries(self):
57 | purge_entries_before = datetime.datetime.now() - self.ttl
58 | for key, time_last_touched in list(self.items_last_accessed.items()):
59 | if time_last_touched < purge_entries_before:
60 | LOGGER.info(
61 | f"Evicting item with the following key from readthroughcache: {key}"
62 | )
63 | del self.items_last_accessed[key]
64 | del self.items_storage[key]
65 |
66 | def start_ttl_thread(self):
67 | def purge_expired_entries_with_wait():
68 | while True:
69 | time.sleep(self.ttl.total_seconds())
70 | self.purge_expired_entries()
71 |
72 | thread = threading.Thread(target=purge_expired_entries_with_wait)
73 | thread.setDaemon(True)
74 | thread.start()
75 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/sentry.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # This Source Code Form is subject to the terms of the Mozilla Public
4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
5 | # You can obtain one at http://mozilla.org/MPL/2.0/.
6 |
7 | import logging
8 |
9 | import sentry_sdk
10 | from sentry_sdk.integrations.logging import LoggingIntegration
11 |
12 | from bugbug import get_bugbug_version
13 |
14 |
15 | def setup_sentry(dsn, integrations=[]):
16 | logging_integration = LoggingIntegration(
17 | # Default behaviour: INFO messages will be included as breadcrumbs
18 | level=logging.INFO,
19 | # Change default behaviour (ERROR messages events)
20 | event_level=logging.WARNING,
21 | )
22 | sentry_sdk.init(
23 | dsn=dsn,
24 | integrations=[logging_integration] + integrations,
25 | release=get_bugbug_version(),
26 | )
27 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/templates/doc.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | BugBug documentation
5 |
6 |
7 |
11 |
12 |
15 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/http_service/bugbug_http/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # This Source Code Form is subject to the terms of the Mozilla Public
4 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
5 | # You can obtain one at http://mozilla.org/MPL/2.0/.
6 |
7 | import os
8 | import sys
9 | from urllib.parse import urlparse
10 |
11 | from redis import Redis
12 | from rq import Worker
13 | from sentry_sdk.integrations.rq import RqIntegration
14 |
15 | import bugbug_http.boot
16 | from bugbug_http.sentry import setup_sentry
17 |
18 | if os.environ.get("SENTRY_DSN"):
19 | setup_sentry(dsn=os.environ.get("SENTRY_DSN"), integrations=[RqIntegration()])
20 |
21 |
22 | def main():
23 | # Bootstrap the worker assets
24 | bugbug_http.boot.boot_worker()
25 |
26 | # Provide queue names to listen to as arguments to this script,
27 | # similar to rq worker
28 | url = urlparse(os.environ.get("REDIS_URL", "redis://localhost/0"))
29 | assert url.hostname is not None
30 | redis_conn = Redis(
31 | host=url.hostname,
32 | port=url.port if url.port is not None else 6379,
33 | password=url.password,
34 | ssl=True if url.scheme == "rediss" else False,
35 | ssl_cert_reqs=None,
36 | )
37 | qs = sys.argv[1:] or ["default"]
38 | w = Worker(qs, connection=redis_conn)
39 | w.work()
40 |
41 |
42 | if __name__ == "__main__":
43 | main()
44 |
--------------------------------------------------------------------------------
/http_service/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.2"
2 | services:
3 | bugbug-http-service:
4 | build:
5 | context: .
6 | image: mozilla/bugbug-http-service
7 | environment:
8 | - BUGBUG_BUGZILLA_TOKEN
9 | - BUGBUG_GITHUB_TOKEN
10 | - REDIS_URL=redis://redis:6379/0
11 | - PORT=8000
12 | - PULSE_USER
13 | - PULSE_PASSWORD
14 | - SENTRY_DSN
15 | ports:
16 | - target: 8000
17 | published: 8000
18 | protocol: tcp
19 | mode: host
20 | depends_on:
21 | - redis
22 |
23 | bugbug-http-service-bg-worker:
24 | build:
25 | context: .
26 | dockerfile: Dockerfile.bg_worker
27 | image: mozilla/bugbug-http-service-bg-worker
28 | environment:
29 | - BUGBUG_BUGZILLA_TOKEN
30 | - BUGBUG_GITHUB_TOKEN
31 | - REDIS_URL=redis://redis:6379/0
32 | - BUGBUG_ALLOW_MISSING_MODELS
33 | - BUGBUG_REPO_DIR
34 | - SENTRY_DSN
35 | depends_on:
36 | - redis
37 |
38 | bugbug-http-service-rq-dasboard:
39 | build:
40 | context: .
41 | dockerfile: Dockerfile
42 | image: mozilla/bugbug-http-service-bg-worker
43 | command:
44 | - rq-dashboard
45 | - "-u"
46 | - "redis://redis:6379/0"
47 | ports:
48 | - target: 9181
49 | published: 9181
50 | protocol: tcp
51 | mode: host
52 | depends_on:
53 | - redis
54 |
55 | redis:
56 | image: redis:4
57 | ports:
58 | - target: 6379
59 | published: 6379
60 | protocol: tcp
61 | mode: host
62 |
--------------------------------------------------------------------------------
/http_service/ensure_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 | #
6 | # Download models and check that models can be correctly be loaded. Can be
7 | # disabled by passing CHECK_MODELS=0 as an environment variable
8 |
9 | set -eox pipefail
10 |
11 | if [ "$CHECK_MODELS" == "0" ]; then
12 | echo "Skipping downloading and checking models!"
13 | exit 0;
14 | fi
15 |
16 | python -m bugbug_http.download_models
17 |
--------------------------------------------------------------------------------
/http_service/requirements.txt:
--------------------------------------------------------------------------------
1 | apispec-webframeworks==1.2.0
2 | apispec[yaml]==6.8.2
3 | cerberus==1.3.7
4 | Flask==3.1.1
5 | flask-apispec==0.11.4
6 | flask-cors==6.0.0
7 | gunicorn==23.0.0
8 | kombu==5.5.4
9 | marshmallow==3.26.1
10 | requests==2.32.3
11 | rq==2.3.3
12 | rq-dashboard==0.8.2.2
13 | sentry-sdk[flask]==2.29.1
14 |
--------------------------------------------------------------------------------
/http_service/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 |
8 | from setuptools import find_packages, setup
9 |
10 | here = os.path.dirname(__file__)
11 |
12 |
13 | def read_requirements(file_):
14 | with open(os.path.join(here, file_)) as f:
15 | return sorted(list(set(line.split("#")[0].strip() for line in f)))
16 |
17 |
18 | install_requires = read_requirements("requirements.txt")
19 |
20 | with open(os.path.join(here, "VERSION")) as f:
21 | version = f.read().strip()
22 |
23 | setup(
24 | name="bugbug-http-service",
25 | version=version,
26 | description="ML tools for Mozilla projects",
27 | author="Marco Castelluccio",
28 | author_email="mcastelluccio@mozilla.com",
29 | install_requires=install_requires,
30 | packages=find_packages(),
31 | include_package_data=True,
32 | license="MPL2",
33 | entry_points={
34 | "console_scripts": [
35 | "bugbug-http-worker = bugbug_http.worker:main",
36 | "bugbug-http-pulse-listener = bugbug_http.listener:main",
37 | ]
38 | },
39 | classifiers=[
40 | "Programming Language :: Python :: 3.7",
41 | "Programming Language :: Python :: 3 :: Only",
42 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
43 | ],
44 | )
45 |
--------------------------------------------------------------------------------
/http_service/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/http_service/tests/__init__.py
--------------------------------------------------------------------------------
/http_service/tests/pytest.ini:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/http_service/tests/pytest.ini
--------------------------------------------------------------------------------
/http_service/tests/test_get_config_specific_groups.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from typing import Callable
7 |
8 | import orjson
9 | import zstandard
10 |
11 | from bugbug_http import models
12 |
13 |
14 | def test_get_config_specific_groups(
15 | mock_get_config_specific_groups: Callable[
16 | [dict[str, float], dict[str, float]], None
17 | ],
18 | ) -> None:
19 | assert models.get_config_specific_groups("test-linux1804-64/opt-*") == "OK"
20 |
21 | # Assert the test selection result is stored in Redis.
22 | value = models.redis.get(
23 | "bugbug:job_result:get_config_specific_groups:test-linux1804-64/opt-*"
24 | )
25 | assert value is not None
26 | result = orjson.loads(zstandard.ZstdDecompressor().decompress(value))
27 | assert result == [{"name": "test-group1"}]
28 |
--------------------------------------------------------------------------------
/http_service/tests/test_integration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 | import time
8 | from logging import INFO, basicConfig, getLogger
9 |
10 | import requests
11 |
12 | basicConfig(level=INFO)
13 | logger = getLogger(__name__)
14 |
15 | BUGBUG_HTTP_SERVER = os.environ.get("BUGBUG_HTTP_SERVER", "http://localhost:8000/")
16 |
17 |
18 | # Test classifying a single bug.
19 | def integration_test_single():
20 | timeout = 1200
21 | for _ in range(timeout):
22 | response = requests.get(
23 | f"{BUGBUG_HTTP_SERVER}/defectenhancementtask/predict/1376406",
24 | headers={"X-Api-Key": "integration_test_single"},
25 | )
26 |
27 | if response.status_code == 200:
28 | break
29 |
30 | time.sleep(1)
31 |
32 | response_json = response.json()
33 |
34 | if not response.ok:
35 | raise requests.HTTPError(
36 | f"Couldn't get an answer in {timeout} seconds: {response_json}",
37 | response=response,
38 | )
39 |
40 | logger.info("Response for bug 1376406 %s", response_json)
41 | assert response_json["class"] is not None
42 |
43 |
44 | # Test classifying a batch of bugs.
45 | def integration_test_batch():
46 | timeout = 100
47 | for _ in range(timeout):
48 | response = requests.post(
49 | f"{BUGBUG_HTTP_SERVER}/defectenhancementtask/predict/batch",
50 | headers={"X-Api-Key": "integration_test_batch"},
51 | json={"bugs": [1376544, 1376412]},
52 | )
53 |
54 | if response.status_code == 200:
55 | break
56 |
57 | time.sleep(1)
58 |
59 | response_json = response.json()
60 |
61 | if not response.ok:
62 | raise requests.HTTPError(
63 | f"Couldn't get an answer in {timeout} seconds: {response_json}",
64 | response=response,
65 | )
66 |
67 | response_1376544 = response_json["bugs"]["1376544"]
68 | logger.info("Response for bug 1376544 %s", response_1376544)
69 | assert response_1376544["class"] is not None
70 | response_1376412 = response_json["bugs"]["1376412"]
71 | logger.info("Response for bug 1376412 %s", response_1376412)
72 | assert response_1376412["class"] is not None
73 |
74 |
75 | if __name__ == "__main__":
76 | integration_test_single()
77 | integration_test_batch()
78 |
--------------------------------------------------------------------------------
/http_service/tests/test_push_schedules.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import gzip
7 |
8 | import orjson
9 |
10 | from bugbug_http.app import API_TOKEN
11 |
12 |
13 | def retrieve_compressed_reponse(response):
14 | # Response is of type "" - Flask Client's Response
15 | # Not applicable for " "
16 | if response.headers["Content-Encoding"] == "gzip":
17 | return orjson.loads(gzip.decompress(response.data))
18 | return response.json
19 |
20 |
21 | def test_queue_job_valid(client, add_result, jobs):
22 | # schedule job
23 | rv = client.get(
24 | "/push/autoland/abcdef/schedules",
25 | headers={API_TOKEN: "test"},
26 | )
27 |
28 | assert rv.status_code == 202
29 | assert rv.json == {"ready": False}
30 |
31 | # still not ready
32 | rv = client.get(
33 | "/push/autoland/abcdef/schedules",
34 | headers={API_TOKEN: "test"},
35 | )
36 |
37 | assert rv.status_code == 202
38 | assert rv.json == {"ready": False}
39 |
40 | # job done
41 | result = {
42 | "groups": ["foo/mochitest.ini", "bar/xpcshell.ini"],
43 | "tasks": ["test-linux/opt-mochitest-1"],
44 | }
45 | keys = next(iter(jobs.values()))
46 | add_result(keys[0], result)
47 |
48 | rv = client.get(
49 | "/push/autoland/abcdef/schedules",
50 | headers={API_TOKEN: "test"},
51 | )
52 | assert rv.status_code == 200
53 | assert retrieve_compressed_reponse(rv) == result
54 |
55 |
56 | def test_no_api_key(client):
57 | rv = client.get("/push/autoland/foobar/schedules")
58 |
59 | assert rv.status_code == 401
60 | assert rv.json == {"message": "Error, missing X-API-KEY"}
61 |
--------------------------------------------------------------------------------
/infra/check-pipeline.yml:
--------------------------------------------------------------------------------
1 | version: 1
2 | tasks:
3 | - ID: check-component
4 | created: { $fromNow: "" }
5 | deadline: { $fromNow: "12 hours" }
6 | expires: { $fromNow: "1 week" }
7 | provisionerId: proj-bugbug
8 | workerType: batch
9 | payload:
10 | maxRunTime: 3600
11 | image: mozilla/bugbug-base:${version}
12 | command:
13 | - bugbug-check
14 | - component
15 |
16 | routes:
17 | - notify.email.release-mgmt-analysis@mozilla.com.on-failed
18 | - notify.irc-channel.#bugbug.on-failed
19 | metadata:
20 | name: bugbug check component
21 | description: bugbug check component
22 | owner: release-mgmt-analysis@mozilla.com
23 | source: https://github.com/mozilla/bugbug/raw/master/infra/check-pipeline.yml
24 |
25 | - ID: shadow-scheduler-stats
26 | created: { $fromNow: "" }
27 | deadline: { $fromNow: "12 hours" }
28 | expires: { $fromNow: "1 week" }
29 | provisionerId: proj-bugbug
30 | workerType: compute-large
31 | payload:
32 | maxRunTime: 43200
33 | image: mozilla/bugbug-base:${version}
34 | command:
35 | - bugbug-shadow-scheduler-stats
36 | - "7"
37 |
38 | artifacts:
39 | public/average_group_scheduled.svg:
40 | path: /average_group_scheduled.svg
41 | type: file
42 | public/percentage_group_caught_at_least_one.svg:
43 | path: /percentage_group_caught_at_least_one.svg
44 | type: file
45 | public/percentage_group_caught.svg:
46 | path: /percentage_group_caught.svg
47 | type: file
48 | public/average_config_group_scheduled.svg:
49 | path: /average_config_group_scheduled.svg
50 | type: file
51 | public/percentage_config_group_caught_at_least_one.svg:
52 | path: /percentage_config_group_caught_at_least_one.svg
53 | type: file
54 | public/percentage_config_group_caught.svg:
55 | path: /percentage_config_group_caught.svg
56 | type: file
57 |
58 | features:
59 | taskclusterProxy: true
60 | scopes:
61 | - auth:aws-s3:read-write:communitytc-bugbug/*
62 | routes:
63 | - notify.email.release-mgmt-analysis@mozilla.com.on-failed
64 | - notify.irc-channel.#bugbug.on-failed
65 | - index.project.bugbug.shadow_scheduler_stats.latest
66 | metadata:
67 | name: bugbug shadow scheduler stats
68 | description: bugbug shadow scheduler stats
69 | owner: release-mgmt-analysis@mozilla.com
70 | source: https://github.com/mozilla/bugbug/raw/master/infra/check-pipeline.yml
71 |
--------------------------------------------------------------------------------
/infra/dockerfile.base:
--------------------------------------------------------------------------------
1 | FROM python:3.12.7-slim
2 |
3 | # Setup dependencies in a cacheable step
4 | RUN --mount=type=bind,source=requirements.txt,target=/requirements.txt \
5 | apt-get update && \
6 | apt-get install -y --no-install-recommends gcc g++ libgomp1 libffi-dev libjemalloc2 zstd patch git && \
7 | pip install --disable-pip-version-check --quiet --no-cache-dir -r /requirements.txt && \
8 | apt-get purge -y gcc g++ libffi-dev patch git && \
9 | apt-get autoremove -y && \
10 | rm -rf /var/lib/apt/lists/*
11 |
12 | ENV LD_PRELOAD="libjemalloc.so.2"
13 |
14 | COPY infra/mozci_config.toml /root/.config/mozci/config.toml
15 |
16 | RUN --mount=type=bind,target=/tmp/bugbug,rw \
17 | pip install --disable-pip-version-check --quiet --no-cache-dir /tmp/bugbug
18 |
--------------------------------------------------------------------------------
/infra/dockerfile.base-nlp:
--------------------------------------------------------------------------------
1 | FROM mozilla/bugbug-base:latest
2 |
3 | # Setup dependencies in a cacheable step
4 | ADD extra-nlp-requirements.txt /
5 |
6 | RUN apt-get update && \
7 | apt-get install -y --no-install-recommends gcc g++ libgomp1 && \
8 | pip install --disable-pip-version-check --quiet --no-cache-dir -r /extra-nlp-requirements.txt && \
9 | apt-get purge -y gcc g++ && \
10 | apt-get autoremove -y && \
11 | rm -rf /var/lib/apt/lists/*
12 |
13 | RUN python -m spacy download en_core_web_sm
14 |
--------------------------------------------------------------------------------
/infra/dockerfile.commit_retrieval:
--------------------------------------------------------------------------------
1 | FROM mozilla/bugbug-base:latest
2 |
3 | ENV PATH="${PATH}:/git-cinnabar"
4 |
5 | # git is required by the annotate pipeline.
6 | # libcurl4 is required by git-cinnabar.
7 | RUN apt-get update && \
8 | apt-get install -y --no-install-recommends git xz-utils curl libcurl4 && \
9 | hg clone -r 90302f015ac8dd8877ef3ee24b5a62541142378b https://hg.mozilla.org/hgcustom/version-control-tools /version-control-tools/ && \
10 | rm -r /version-control-tools/.hg /version-control-tools/ansible /version-control-tools/docs /version-control-tools/testing && \
11 | git clone https://github.com/glandium/git-cinnabar.git /git-cinnabar && \
12 | cd /git-cinnabar && git -c advice.detachedHead=false checkout fd17180c439c3eb3ab9de5cfc47923b04242394a && cd .. && \
13 | git config --global cinnabar.experiments python3 && \
14 | git config --global cinnabar.check no-version-check && \
15 | git config --global fetch.prune true && \
16 | git cinnabar download && \
17 | rm -r /git-cinnabar/.git /git-cinnabar/CI /git-cinnabar/tests && \
18 | curl -L https://github.com/mozilla/rust-code-analysis/releases/download/v0.0.23/rust-code-analysis-linux-web-x86_64.tar.gz | tar -C /usr/bin -xzv && \
19 | apt-get purge -y xz-utils curl && \
20 | apt-get autoremove -y && \
21 | rm -r /var/lib/apt/lists/*
22 |
23 | COPY infra/hgrc /etc/mercurial/hgrc.d/bugbug.rc
24 |
25 | CMD bugbug-data-commits /cache/
26 |
--------------------------------------------------------------------------------
/infra/dockerfile.spawn_pipeline:
--------------------------------------------------------------------------------
1 | FROM python:3.12.7-slim
2 |
3 | # Setup dependencies in a cacheable step
4 | ADD spawn_pipeline_requirements.txt /code/
5 |
6 | RUN pip install --disable-pip-version-check --quiet --no-cache-dir -r /code/spawn_pipeline_requirements.txt
7 |
8 | ADD spawn_pipeline.py /code
9 |
10 | ADD *-pipeline.yml /code/
11 |
12 | CMD python /code/spawn_pipeline.py
13 |
--------------------------------------------------------------------------------
/infra/hgrc:
--------------------------------------------------------------------------------
1 | [extensions]
2 | purge =
3 | strip =
4 | robustcheckout = /version-control-tools/hgext/robustcheckout/__init__.py
5 | hgmo = /version-control-tools/hgext/hgmo
6 | pushlog = /version-control-tools/hgext/pushlog
7 | mozext = /version-control-tools/hgext/mozext
8 |
--------------------------------------------------------------------------------
/infra/mozci_config.toml:
--------------------------------------------------------------------------------
1 | [mozci]
2 | data_sources = ["treeherder_client", "hgmo", "taskcluster", "errorsummary"]
3 |
4 | [mozci.cache]
5 | retention = 40320
6 | serializer = "compressedpickle"
7 |
8 | [mozci.cache.stores]
9 | s3 = { driver = "s3", bucket = "communitytc-bugbug", prefix = "data/adr_cache/" }
10 |
--------------------------------------------------------------------------------
/infra/set_hook_version.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import json
8 | import sys
9 |
10 |
11 | def set_hook(hook_path, version):
12 | with open(hook_path, "r") as hook_file:
13 | hook_data = json.load(hook_file)
14 |
15 | task_payload = hook_data["task"]["payload"]
16 |
17 | task_image = task_payload.get("image")
18 |
19 | # 1) Insert or replace the environment variable
20 | if task_payload["env"]:
21 | if "$merge" not in task_payload["env"]:
22 | task_payload["env"] = {"$merge": [task_payload["env"]]}
23 |
24 | task_payload["env"]["$merge"].append({"TAG": version})
25 | else:
26 | task_payload["env"]["TAG"] = version
27 |
28 | # 2) Set the version for the hook docker image
29 | if task_image:
30 | image_name = task_image.split(":", 1)[0]
31 | if image_name.startswith("mozilla/bugbug-"):
32 | task_payload["image"] = f"{image_name}:{version}"
33 |
34 | with open(hook_path, "w") as hook_file:
35 | json.dump(
36 | hook_data, hook_file, sort_keys=True, indent=4, separators=(",", ": ")
37 | )
38 |
39 |
40 | def parse_args(raw_args):
41 | parser = argparse.ArgumentParser()
42 | parser.add_argument(
43 | "version",
44 | metavar="version",
45 | type=str,
46 | help="The version to set in the hook definition",
47 | )
48 | parser.add_argument(
49 | "hook_file",
50 | metavar="hook-file",
51 | type=str,
52 | help="The hook definition file to update in-place",
53 | )
54 |
55 | return parser.parse_args(raw_args)
56 |
57 |
58 | if __name__ == "__main__":
59 | args = parse_args(sys.argv[1:])
60 | set_hook(args.hook_file, args.version)
61 |
--------------------------------------------------------------------------------
/infra/spawn_pipeline_requirements.txt:
--------------------------------------------------------------------------------
1 | json-e==4.8.0
2 | pyyaml==6.0.2
3 | requests==2.32.3
4 | taskcluster==84.0.2
5 |
--------------------------------------------------------------------------------
/infra/taskcluster-hook-check-models-start.json:
--------------------------------------------------------------------------------
1 | {
2 | "schedule": ["0 0 0 * * *"],
3 | "metadata": {
4 | "description": "",
5 | "name": "BugBug check tasks",
6 | "owner": "mcastelluccio@mozilla.com"
7 | },
8 | "task": {
9 | "created": {
10 | "$fromNow": "0 seconds"
11 | },
12 | "deadline": {
13 | "$fromNow": "2 hours"
14 | },
15 | "expires": {
16 | "$fromNow": "1 week"
17 | },
18 | "extra": {},
19 | "metadata": {
20 | "description": "",
21 | "name": "BugBug check tasks",
22 | "owner": "mcastelluccio@mozilla.com",
23 | "source": "https://github.com/mozilla/bugbug"
24 | },
25 | "payload": {
26 | "artifacts": {},
27 | "cache": {},
28 | "capabilities": {},
29 | "env": {},
30 | "features": {
31 | "taskclusterProxy": true
32 | },
33 | "command": [
34 | "/usr/local/bin/python3",
35 | "/code/spawn_pipeline.py",
36 | "/code/check-pipeline.yml"
37 | ],
38 | "image": "mozilla/bugbug-spawn-pipeline",
39 | "maxRunTime": 7200
40 | },
41 | "priority": "normal",
42 | "provisionerId": "proj-bugbug",
43 | "retries": 5,
44 | "routes": [
45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed",
46 | "notify.irc-channel.#bugbug.on-failed"
47 | ],
48 | "schedulerId": "-",
49 | "scopes": ["assume:hook-id:project-bugbug/bugbug-checks"],
50 | "tags": {},
51 | "workerType": "batch"
52 | },
53 | "triggerSchema": {
54 | "additionalProperties": false,
55 | "type": "object"
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/infra/taskcluster-hook-classify-patch.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "description": "",
4 | "name": "BugBug classify patch",
5 | "owner": "mcastelluccio@mozilla.com"
6 | },
7 | "task": {
8 | "created": {
9 | "$fromNow": "0 seconds"
10 | },
11 | "deadline": {
12 | "$fromNow": "2 hours"
13 | },
14 | "expires": {
15 | "$fromNow": "1 month"
16 | },
17 | "extra": {},
18 | "metadata": {
19 | "description": "",
20 | "name": "BugBug classify patch",
21 | "owner": "mcastelluccio@mozilla.com",
22 | "source": "https://github.com/mozilla/bugbug"
23 | },
24 | "payload": {
25 | "artifacts": {
26 | "public/results.json": {
27 | "path": "/results.json",
28 | "type": "file"
29 | },
30 | "public/importances.json": {
31 | "path": "/importances.json",
32 | "type": "file"
33 | },
34 | "public/method_level.json": {
35 | "path": "/method_level.json",
36 | "type": "file"
37 | }
38 | },
39 | "cache": {
40 | "bugbug-mercurial-repository": "/cache"
41 | },
42 | "capabilities": {},
43 | "env": {
44 | "TC_SECRET_ID": "project/bugbug/production"
45 | },
46 | "features": {
47 | "taskclusterProxy": true
48 | },
49 | "command": [
50 | "bugbug-classify-commit",
51 | "regressor",
52 | "/cache/mozilla-central",
53 | "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}",
54 | "--diff-id=${payload['DIFF_ID']}",
55 | "--git_repo_dir=/gecko-dev",
56 | "--method_defect_predictor_dir=/MethodDefectPredictor"
57 | ],
58 | "image": "mozilla/bugbug-commit-retrieval",
59 | "maxRunTime": 7200
60 | },
61 | "priority": "normal",
62 | "provisionerId": "proj-bugbug",
63 | "retries": 5,
64 | "routes": [
65 | "notify.email.mcastelluccio@mozilla.com.on-failed",
66 | "notify.irc-channel.#bugbug.on-failed",
67 | "index.project.bugbug.classify_patch.latest",
68 | "index.project.bugbug.classify_patch.diff.${payload['DIFF_ID']}"
69 | ],
70 | "schedulerId": "-",
71 | "scopes": ["assume:hook-id:project-bugbug/bugbug-classify-patch"],
72 | "tags": {},
73 | "workerType": "compute-small"
74 | },
75 | "triggerSchema": {
76 | "additionalProperties": false,
77 | "properties": {
78 | "PHABRICATOR_DEPLOYMENT": {
79 | "type": "string",
80 | "enum": ["prod", "dev"]
81 | },
82 | "DIFF_ID": {
83 | "type": "number"
84 | }
85 | },
86 | "required": ["PHABRICATOR_DEPLOYMENT", "DIFF_ID"],
87 | "type": "object"
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/infra/taskcluster-hook-data-pipeline.json:
--------------------------------------------------------------------------------
1 | {
2 | "schedule": ["0 0 1,16 * *"],
3 | "metadata": {
4 | "description": "",
5 | "name": "BugBug data pipeline",
6 | "owner": "mcastelluccio@mozilla.com"
7 | },
8 | "task": {
9 | "created": {
10 | "$fromNow": "0 seconds"
11 | },
12 | "deadline": {
13 | "$fromNow": "2 hours"
14 | },
15 | "expires": {
16 | "$fromNow": "1 year"
17 | },
18 | "extra": {},
19 | "metadata": {
20 | "description": "",
21 | "name": "BugBug data pipeline",
22 | "owner": "mcastelluccio@mozilla.com",
23 | "source": "https://github.com/mozilla/bugbug"
24 | },
25 | "payload": {
26 | "artifacts": {},
27 | "cache": {},
28 | "capabilities": {},
29 | "env": {},
30 | "features": {
31 | "taskclusterProxy": true
32 | },
33 | "command": [
34 | "/usr/local/bin/python3",
35 | "/code/spawn_pipeline.py",
36 | "/code/data-pipeline.yml"
37 | ],
38 | "image": "mozilla/bugbug-spawn-pipeline",
39 | "maxRunTime": 7200
40 | },
41 | "priority": "normal",
42 | "provisionerId": "proj-bugbug",
43 | "retries": 5,
44 | "routes": [
45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed",
46 | "notify.irc-channel.#bugbug.on-failed",
47 | "index.project.bugbug.data-pipeline-start"
48 | ],
49 | "schedulerId": "-",
50 | "scopes": ["assume:hook-id:project-bugbug/bugbug"],
51 | "tags": {},
52 | "workerType": "batch"
53 | },
54 | "triggerSchema": {
55 | "additionalProperties": false,
56 | "type": "object"
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/infra/taskcluster-hook-landings-risk-report.json:
--------------------------------------------------------------------------------
1 | {
2 | "schedule": ["0 0 0 * * *"],
3 | "metadata": {
4 | "description": "",
5 | "name": "BugBug landings risk report pipeline",
6 | "owner": "mcastelluccio@mozilla.com"
7 | },
8 | "task": {
9 | "created": {
10 | "$fromNow": "0 seconds"
11 | },
12 | "deadline": {
13 | "$fromNow": "2 hours"
14 | },
15 | "expires": {
16 | "$fromNow": "1 week"
17 | },
18 | "extra": {},
19 | "metadata": {
20 | "description": "",
21 | "name": "BugBug landings risk report pipeline",
22 | "owner": "mcastelluccio@mozilla.com",
23 | "source": "https://github.com/mozilla/bugbug"
24 | },
25 | "payload": {
26 | "artifacts": {},
27 | "cache": {},
28 | "capabilities": {},
29 | "env": {},
30 | "features": {
31 | "taskclusterProxy": true
32 | },
33 | "command": [
34 | "/usr/local/bin/python3",
35 | "/code/spawn_pipeline.py",
36 | "/code/landings-pipeline.yml"
37 | ],
38 | "image": "mozilla/bugbug-spawn-pipeline",
39 | "maxRunTime": 7200
40 | },
41 | "priority": "normal",
42 | "provisionerId": "proj-bugbug",
43 | "retries": 5,
44 | "routes": [
45 | "notify.email.release-mgmt-analysis@mozilla.com.on-failed",
46 | "notify.irc-channel.#bugbug.on-failed"
47 | ],
48 | "schedulerId": "-",
49 | "scopes": ["assume:hook-id:project-bugbug/bugbug-landings-risk-report"],
50 | "tags": {},
51 | "workerType": "batch"
52 | },
53 | "triggerSchema": {
54 | "additionalProperties": false,
55 | "type": "object"
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/infra/taskcluster-hook-test-select.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "description": "",
4 | "name": "BugBug test select",
5 | "owner": "mcastelluccio@mozilla.com"
6 | },
7 | "task": {
8 | "created": {
9 | "$fromNow": "0 seconds"
10 | },
11 | "deadline": {
12 | "$fromNow": "2 hours"
13 | },
14 | "expires": {
15 | "$fromNow": "1 month"
16 | },
17 | "extra": {
18 | "phabricator-deployment": "${payload['PHABRICATOR_DEPLOYMENT']}",
19 | "phabricator-diff-id": "${payload['DIFF_ID']}"
20 | },
21 | "metadata": {
22 | "description": "",
23 | "name": "BugBug test select",
24 | "owner": "mcastelluccio@mozilla.com",
25 | "source": "https://github.com/mozilla/bugbug"
26 | },
27 | "payload": {
28 | "artifacts": {
29 | "public/selected_tasks": {
30 | "path": "/selected_tasks",
31 | "type": "file"
32 | },
33 | "public/failure_risk": {
34 | "path": "/failure_risk",
35 | "type": "file"
36 | }
37 | },
38 | "cache": {
39 | "bugbug-mercurial-repository": "/cache"
40 | },
41 | "capabilities": {},
42 | "env": {
43 | "TC_SECRET_ID": "project/bugbug/production"
44 | },
45 | "features": {
46 | "taskclusterProxy": true
47 | },
48 | "command": [
49 | "bugbug-classify-commit",
50 | "testlabelselect",
51 | "/cache/mozilla-central",
52 | "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}",
53 | "--diff-id=${payload['DIFF_ID']}",
54 | "--runnable-jobs=${payload['RUNNABLE_JOBS']}"
55 | ],
56 | "image": "mozilla/bugbug-commit-retrieval",
57 | "maxRunTime": 7200
58 | },
59 | "priority": "normal",
60 | "provisionerId": "proj-bugbug",
61 | "retries": 5,
62 | "routes": [
63 | "notify.email.mcastelluccio@mozilla.com.on-failed",
64 | "notify.irc-channel.#bugbug.on-failed",
65 | "index.project.bugbug.test_select.latest",
66 | "index.project.bugbug.test_select.diff.${payload['DIFF_ID']}",
67 | "project.bugbug.test_select"
68 | ],
69 | "schedulerId": "-",
70 | "scopes": [
71 | "assume:hook-id:project-bugbug/bugbug-test-select",
72 | "queue:route:project.bugbug.test_select.*"
73 | ],
74 | "tags": {},
75 | "workerType": "compute-small"
76 | },
77 | "triggerSchema": {
78 | "additionalProperties": false,
79 | "properties": {
80 | "PHABRICATOR_DEPLOYMENT": {
81 | "type": "string",
82 | "enum": ["prod", "dev"]
83 | },
84 | "DIFF_ID": {
85 | "type": "number"
86 | },
87 | "RUNNABLE_JOBS": {
88 | "type": "string"
89 | }
90 | },
91 | "required": ["PHABRICATOR_DEPLOYMENT", "DIFF_ID"],
92 | "type": "object"
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/infra/version_check.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import subprocess
7 |
8 | with open("VERSION", "r") as f:
9 | version = f.read().rstrip()
10 |
11 | try:
12 | p = subprocess.run(
13 | ["git", "describe", "--abbrev=0", "--tags"], check=True, capture_output=True
14 | )
15 | except subprocess.CalledProcessError as e:
16 | print(f"{e.cmd} failed with return code {e.returncode}")
17 | print("stdout:")
18 | print(e.stdout)
19 | print("stderr:")
20 | print(e.stderr)
21 | raise RuntimeError("Failure while getting latest tag")
22 |
23 | cur_tag = p.stdout.decode("utf-8")[1:].rstrip()
24 |
25 | assert version == cur_tag, (
26 | f"Version in the VERSION file ({version}) should be the same as the current tag ({cur_tag})"
27 | )
28 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff]
2 | extend-exclude = ["data"]
3 |
4 | [tool.ruff.lint]
5 | select = ["E4", "E7", "E9", "F", "I", "T10", "CPY"]
6 |
7 | [tool.ruff.lint.isort]
8 | known-first-party = ["bugbug_http"]
9 |
10 | [tool.codespell]
11 | ignore-words-list = ["aFile", "thirdparty", "checkin"]
12 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | amqp==5.3.1
2 | beautifulsoup4==4.13.4
3 | boto3==1.38.27
4 | imbalanced-learn==0.13.0
5 | langchain==0.3.25
6 | langchain-anthropic==0.3.13
7 | langchain-community==0.3.24
8 | langchain-google-genai==2.1.5
9 | langchain-mistralai==0.2.10
10 | langchain-openai==0.3.18
11 | libmozdata==0.2.10
12 | llama-cpp-python==0.3.9
13 | lmdb==1.6.2
14 | lxml-html-clean==0.4.2
15 | markdown2==2.5.3
16 | matplotlib==3.10.1
17 | mercurial==7.0.2
18 | microannotate==0.0.24
19 | mozci==2.4.1
20 | numpy==2.0.2
21 | orjson==3.10.18
22 | ortools==9.12.4544
23 | pandas==2.2.3
24 | psutil==7.0.0
25 | pydriller==1.12
26 | pyOpenSSL>=0.14 # Could not find a version that satisfies the requirement pyOpenSSL>=0.14; extra == "security" (from requests[security]>=2.7.0->libmozdata==0.1.43)
27 | python-dateutil==2.9.0.post0
28 | python-hglib==2.6.2
29 | qdrant-client==1.14.2
30 | ratelimit==2.2.1
31 | requests==2.32.3
32 | requests-html==0.10.0
33 | rs_parsepatch==0.4.4
34 | scikit-learn==1.6.1
35 | scipy==1.15.2
36 | sendgrid==6.12.3
37 | shap[plots]==0.47.2
38 | tabulate==0.9.0
39 | taskcluster==84.0.2
40 | tenacity==9.1.2
41 | tqdm==4.67.1
42 | unidiff==0.7.5
43 | xgboost==2.1.4
44 | zstandard==0.23.0
45 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mozilla/bugbug/b36fdbab32351de0b60e9634742218789944dddd/scripts/__init__.py
--------------------------------------------------------------------------------
/scripts/backout_related_test_regressions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 | import argparse
6 | import json
7 | from logging import INFO, basicConfig, getLogger
8 |
9 | from mozci.push import Push
10 | from tqdm import tqdm
11 |
12 | from bugbug import db, repository
13 |
14 | basicConfig(level=INFO)
15 | logger = getLogger(__name__)
16 |
17 |
18 | def go() -> None:
19 | assert db.download(repository.COMMITS_DB)
20 |
21 | backouts = []
22 | backedouts = []
23 | for commit in repository.get_commits(include_backouts=True):
24 | if commit["backedoutby"]:
25 | backouts.append(commit["node"])
26 | if commit["backsout"]:
27 | backedouts += commit["backsout"]
28 |
29 | backouts = backouts[-100:]
30 | backedouts = backedouts[-100:]
31 |
32 | likely_label_count = 0
33 | possible_label_count = 0
34 | likely_group_count = 0
35 | possible_group_count = 0
36 |
37 | backout_regressions = {}
38 |
39 | for backout in tqdm(backouts):
40 | p = Push(backout)
41 |
42 | label_regressions = p.get_regressions("label")
43 | likely_label_count += len(p.get_likely_regressions("label"))
44 | possible_label_count += len(p.get_possible_regressions("label"))
45 |
46 | group_regressions = p.get_regressions("group")
47 | likely_group_count += len(p.get_likely_regressions("label"))
48 | possible_group_count += len(p.get_possible_regressions("label"))
49 |
50 | if len(label_regressions) > 0 or len(group_regressions) > 0:
51 | backout_regressions[backout] = {
52 | "label": label_regressions,
53 | "group": group_regressions,
54 | }
55 |
56 | logger.info("Likely labels for backouts: %d", likely_label_count)
57 | logger.info("Likely groups for backouts: %d", likely_group_count)
58 | logger.info("Possible labels for backouts: %d", possible_label_count)
59 | logger.info("Possible groups for backouts: %d", possible_group_count)
60 |
61 | backedout_regressions = {}
62 |
63 | for backedout in tqdm(backedouts):
64 | p = Push(backedout)
65 |
66 | label_regressions = p.get_regressions("label")
67 | group_regressions = p.get_regressions("group")
68 |
69 | if (
70 | len(p.get_likely_regressions("label")) == 0
71 | or len(p.get_likely_regressions("group")) == 0
72 | ):
73 | backedout_regressions[backedout] = {
74 | "label": label_regressions,
75 | "group": group_regressions,
76 | }
77 |
78 | with open("backout_regressions.json", "w") as f:
79 | json.dump(backout_regressions, f)
80 |
81 | with open("backedout_regressions.json", "w") as f:
82 | json.dump(backedout_regressions, f)
83 |
84 |
85 | def main() -> None:
86 | description = (
87 | "Find likely and possible test regressions of backouts and backed-out commits"
88 | )
89 | parser = argparse.ArgumentParser(description=description)
90 | parser.parse_args()
91 |
92 | go()
93 |
94 |
95 | if __name__ == "__main__":
96 | main()
97 |
--------------------------------------------------------------------------------
/scripts/bug_classifier.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import os
5 | from logging import INFO, basicConfig, getLogger
6 |
7 | import numpy as np
8 | import requests
9 |
10 | from bugbug import bugzilla, db
11 | from bugbug.models import get_model_class
12 | from bugbug.utils import download_model
13 |
14 | basicConfig(level=INFO)
15 | logger = getLogger(__name__)
16 |
17 |
18 | def classify_bugs(model_name: str, bug_id: int) -> None:
19 | model_file_name = f"{model_name}model"
20 |
21 | if not os.path.exists(model_file_name):
22 | logger.info("%s does not exist. Downloading the model....", model_file_name)
23 | try:
24 | download_model(model_name)
25 | except requests.HTTPError:
26 | logger.error(
27 | "A pre-trained model is not available, you will need to train it yourself using the trainer script"
28 | )
29 | raise SystemExit(1)
30 |
31 | model_class = get_model_class(model_name)
32 | model = model_class.load(model_file_name)
33 |
34 | if bug_id:
35 | bugs = bugzilla.get(bug_id).values()
36 | assert bugs, f"A bug with a bug id of {bug_id} was not found"
37 | else:
38 | assert db.download(bugzilla.BUGS_DB)
39 | bugs = bugzilla.get_bugs()
40 |
41 | for bug in bugs:
42 | print(
43 | f"https://bugzilla.mozilla.org/show_bug.cgi?id={bug['id']} - {bug['summary']} "
44 | )
45 |
46 | if model.calculate_importance:
47 | probas, importance = model.classify(
48 | bug, probabilities=True, importances=True
49 | )
50 |
51 | model.print_feature_importances(
52 | importance["importances"], class_probabilities=probas
53 | )
54 | else:
55 | probas = model.classify(bug, probabilities=True, importances=False)
56 |
57 | probability = probas[0]
58 | pred_index = np.argmax(probability)
59 | if len(probability) > 2:
60 | pred_class = model.le.inverse_transform([pred_index])[0]
61 | else:
62 | pred_class = "Positive" if pred_index == 1 else "Negative"
63 | print(f"{pred_class} {probability}")
64 | input()
65 |
66 |
67 | def main() -> None:
68 | description = "Perform evaluation on bugs using the specified model"
69 | parser = argparse.ArgumentParser(description=description)
70 |
71 | parser.add_argument("model", help="Which model to use for evaluation")
72 | parser.add_argument("--bug-id", help="Classify the given bug id", type=int)
73 |
74 | args = parser.parse_args()
75 |
76 | classify_bugs(args.model, args.bug_id)
77 |
78 |
79 | if __name__ == "__main__":
80 | main()
81 |
--------------------------------------------------------------------------------
/scripts/check.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import sys
5 | from logging import INFO, basicConfig, getLogger
6 |
7 | from bugbug.model import Model
8 | from bugbug.utils import download_model
9 |
10 | basicConfig(level=INFO)
11 | logger = getLogger(__name__)
12 |
13 |
14 | class ModelChecker:
15 | def go(self, model_name: str) -> None:
16 | # Load the model
17 | model = Model.load(download_model(model_name))
18 |
19 | # Then call the check method of the model
20 | success = model.check()
21 |
22 | if not success:
23 | msg = f"Check of model {model.__class__!r} failed, check the output for reasons why"
24 | logger.warning(msg)
25 | sys.exit(1)
26 |
27 |
28 | def main() -> None:
29 | description = "Check the models"
30 | parser = argparse.ArgumentParser(description=description)
31 |
32 | parser.add_argument("model", help="Which model to check.")
33 |
34 | args = parser.parse_args()
35 |
36 | checker = ModelChecker()
37 | checker.go(args.model)
38 |
39 |
40 | if __name__ == "__main__":
41 | main()
42 |
--------------------------------------------------------------------------------
/scripts/check_all_metrics.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import logging
8 | import os
9 | import subprocess
10 | from fnmatch import fnmatch
11 | from pathlib import Path
12 |
13 | import taskcluster
14 |
15 | from bugbug.utils import get_taskcluster_options
16 |
17 | LOGGER = logging.getLogger(__name__)
18 |
19 | logging.basicConfig(level=logging.INFO)
20 |
21 | QUEUE_ROUTE_PATTERN = "index.project.bugbug.train_*.per_date.*"
22 |
23 | CURRENT_DIR = Path(__file__).resolve().parent
24 |
25 |
26 | def download_metric(model_name: str, metric_directory: str):
27 | download_script_path = "bugbug-retrieve-training-metrics"
28 |
29 | cli_args: list[str] = [
30 | download_script_path,
31 | model_name,
32 | "2019",
33 | "-d",
34 | metric_directory,
35 | ]
36 |
37 | LOGGER.info("Download metrics for %r", model_name)
38 |
39 | subprocess.run(cli_args, check=True)
40 |
41 |
42 | def check_metrics(metric_directory: str, output_directory: str):
43 | analyze_script_path = "bugbug-analyze-training-metrics"
44 |
45 | cli_args: list[str] = [analyze_script_path, metric_directory, output_directory]
46 |
47 | LOGGER.info("Checking metrics")
48 |
49 | subprocess.run(cli_args, check=True)
50 |
51 |
52 | def get_model_name(queue, task_id: str):
53 | dependency_task = queue.task(task_id)
54 |
55 | # Check the route to detect training tasks
56 | for route in dependency_task["routes"]:
57 | if fnmatch(route, QUEUE_ROUTE_PATTERN):
58 | model_name = route.split(".")[4] # model_name = "train_component"
59 | return model_name[6:]
60 |
61 | # Show a warning if no matching route was found, this can happen when the
62 | # current task has a dependency to a non-training task or if the route
63 | # pattern changes.
64 | LOGGER.warning(f"No matching route found for task id {task_id}")
65 |
66 |
67 | def get_model_names(task_id: str) -> list[str]:
68 | options = get_taskcluster_options()
69 | queue = taskcluster.Queue(options)
70 | task = queue.task(task_id)
71 |
72 | model_names = []
73 |
74 | for i, task_id in enumerate(task["dependencies"]):
75 | LOGGER.info(
76 | "Loading task dependencies {}/{} {}".format(
77 | i + 1, len(task["dependencies"]), task_id
78 | )
79 | )
80 |
81 | model_name = get_model_name(queue, task_id)
82 |
83 | if model_name:
84 | LOGGER.info("Adding model %r to download list", model_name)
85 | model_names.append(model_name)
86 |
87 | return model_names
88 |
89 |
90 | def main():
91 | description = "Get all the metrics name from taskcluster dependency, download them and check them"
92 | parser = argparse.ArgumentParser(description=description)
93 |
94 | parser.add_argument(
95 | "metric_directory",
96 | metavar="metric-directory",
97 | help="Which directory to download metrics to",
98 | )
99 | parser.add_argument(
100 | "output_directory",
101 | metavar="output-directory",
102 | help="Which directory to output graphs to",
103 | )
104 |
105 | parser.add_argument(
106 | "--task-id",
107 | type=str,
108 | default=os.environ.get("TASK_ID"),
109 | help="Taskcluster task id to analyse",
110 | )
111 |
112 | args = parser.parse_args()
113 |
114 | model_names = get_model_names(args.task_id)
115 |
116 | for model in model_names:
117 | download_metric(model, args.metric_directory)
118 |
119 | check_metrics(args.metric_directory, args.output_directory)
120 |
121 |
122 | if __name__ == "__main__":
123 | main()
124 |
--------------------------------------------------------------------------------
/scripts/code_review_tool_evaluator_report.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 | import pandas as pd
4 |
5 | from scripts.code_review_tool_evaluator import get_latest_evaluation_results_file
6 |
7 | evaluation_results = pd.read_csv(
8 | get_latest_evaluation_results_file("../evaluation_results")
9 | )
10 |
11 | # %%
12 |
13 | variant_names = evaluation_results["variant_name"].unique()
14 | variant_name = variant_names[0]
15 |
16 | df = evaluation_results[evaluation_results["variant_name"] == variant_name]
17 |
18 |
19 | # %%
20 | new_comments_count = df["new_comment"].count()
21 | new_valid_comments = len(df[~df["new_comment"].isna() & (df["evaluation"] == "VALID")])
22 | new_invalid_comments = len(
23 | df[~df["new_comment"].isna() & (df["evaluation"] == "INVALID")]
24 | )
25 | new_unevaluated_comments = len(df[~df["new_comment"].isna() & df["evaluation"].isna()])
26 |
27 | old_comments_count = df["old_comments_count"].sum()
28 | old_valid_comments = df[df["evaluation"] == "VALID"]["old_comments_count"].sum()
29 | old_invalid_comments = df[df["evaluation"] == "INVALID"]["old_comments_count"].sum()
30 |
31 | matched_valid_comments = df[
32 | ~df["new_comment"].isna()
33 | & ~df["old_comment"].isna()
34 | & (df["evaluation"] == "VALID")
35 | ]["old_comments_count"].sum()
36 | matched_invalid_comments = df[
37 | ~df["new_comment"].isna()
38 | & ~df["old_comment"].isna()
39 | & (df["evaluation"] == "INVALID")
40 | ]["old_comments_count"].sum()
41 |
42 | print("--------------------")
43 | print("Variant Name:", variant_name)
44 | print("--------------------")
45 | print("New Comments:", new_comments_count)
46 | print("New Valid Comments:", new_valid_comments)
47 | print("New Invalid Comments:", new_invalid_comments)
48 | print("New Unevaluated Comments:", new_unevaluated_comments)
49 | print("--------------------")
50 | print("Old Comments:", old_comments_count)
51 | print("Old Valid Comments:", old_valid_comments)
52 | print("Old Invalid Comments:", old_invalid_comments)
53 | print("--------------------")
54 | print(
55 | "Recalled comments:",
56 | (matched_valid_comments + matched_invalid_comments) / old_comments_count * 100,
57 | )
58 | print("Recalled valid comments:", matched_valid_comments / old_valid_comments * 100)
59 | print(
60 | "Recalled invalid comments:", matched_invalid_comments / old_invalid_comments * 100
61 | )
62 | print("--------------------")
63 | print(
64 | "Missed valid comments:",
65 | (old_valid_comments - matched_valid_comments) / old_valid_comments * 100,
66 | )
67 | print(
68 | "Missed invalid comments:",
69 | (old_invalid_comments - matched_invalid_comments) / old_invalid_comments * 100,
70 | )
71 |
72 |
73 | # %%
74 |
75 |
76 | df = evaluation_results[
77 | evaluation_results["evaluation"].isin(["VALID", "INVALID"])
78 | & ~evaluation_results["new_comment"].isna()
79 | ].sort_values(by=["evaluation", "revision_id", "new_comment"])
80 |
81 |
82 | df["id"] = df["diff_id"].astype(str) + " | " + df["new_comment"].astype(str)
83 | df_old = df[df["variant_name"] == variant_names[0]]
84 | df_new = df[df["variant_name"] == variant_names[1]]
85 |
86 | in_new_but_not_in_old = df_new[~df_new["id"].isin(df_old["id"])]
87 |
88 | print(
89 | "Examples of comments that were filtered by the old version but were not filtered by the new version:\n"
90 | )
91 | print(
92 | in_new_but_not_in_old[["revision_id", "new_comment", "evaluation"]].to_markdown(
93 | index=False
94 | )
95 | )
96 |
97 |
98 | in_old_but_not_in_new = df_old[~df_old["id"].isin(df_new["id"])]
99 | print(
100 | "\n\nExamples of comments that were filtered by the new version but were not filtered by the old version:\n"
101 | )
102 | print(
103 | in_old_but_not_in_new[["revision_id", "new_comment", "evaluation"]].to_markdown(
104 | index=False
105 | )
106 | )
107 |
108 | # %%
109 |
--------------------------------------------------------------------------------
/scripts/code_review_tool_runner.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import sys
8 |
9 | from bugbug import generative_model_tool
10 | from bugbug.code_search.function_search import function_search_classes
11 | from bugbug.tools import code_review
12 | from bugbug.vectordb import QdrantVectorDB
13 |
14 |
15 | def run(args) -> None:
16 | llm = generative_model_tool.create_llm_from_args(args)
17 |
18 | function_search = (
19 | function_search_classes[args.function_search_type]()
20 | if args.function_search_type is not None
21 | else None
22 | )
23 | vector_db = QdrantVectorDB("diff_comments")
24 | review_comments_db = code_review.ReviewCommentsDB(vector_db)
25 | code_review_tool = code_review.CodeReviewTool(
26 | [llm],
27 | llm,
28 | function_search=function_search,
29 | review_comments_db=review_comments_db,
30 | show_patch_example=False,
31 | )
32 |
33 | review_data = code_review.review_data_classes[args.review_platform]()
34 |
35 | revision = review_data.get_review_request_by_id(args.review_request_id)
36 | patch = review_data.get_patch_by_id(revision.patch_id)
37 |
38 | print(patch)
39 | print(code_review_tool.run(patch))
40 | input()
41 |
42 |
43 | def parse_args(args):
44 | parser = argparse.ArgumentParser(
45 | formatter_class=argparse.ArgumentDefaultsHelpFormatter
46 | )
47 | parser.add_argument(
48 | "--review_platform",
49 | help="Review platform",
50 | choices=list(code_review.review_data_classes.keys()),
51 | )
52 | parser.add_argument(
53 | "--review_request_id",
54 | help="Review request ID",
55 | )
56 | generative_model_tool.create_llm_to_args(parser)
57 | parser.add_argument(
58 | "--function_search_type",
59 | help="Function search tool",
60 | choices=list(function_search_classes.keys()),
61 | )
62 | return parser.parse_args(args)
63 |
64 |
65 | if __name__ == "__main__":
66 | args = parse_args(sys.argv[1:])
67 | run(args)
68 |
--------------------------------------------------------------------------------
/scripts/comment_level_labeler.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import csv
8 | import os
9 | import random
10 |
11 | from bugbug import bugzilla
12 | from bugbug.models.bug import BugModel
13 | from bugbug.models.regression import RegressionModel
14 |
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument(
17 | "--goal",
18 | help="Goal of the labeler",
19 | choices=["str", "regressionrange"],
20 | default="str",
21 | )
22 | args = parser.parse_args()
23 |
24 | if args.goal == "str":
25 | model = BugModel.load("bugmodel")
26 | elif args.goal == "regressionrange":
27 | model = RegressionModel.load("regressionmodel")
28 |
29 | file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv")
30 |
31 | with open(file_path, "r") as f:
32 | reader = csv.reader(f)
33 | next(reader)
34 | labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]
35 |
36 | already_done = set((c[0], c[1]) for c in labeled_comments)
37 |
38 | bugs = []
39 | for bug in bugzilla.get_bugs():
40 | # For the str and regressionrange problems, we don't care about test failures,
41 | if (
42 | "intermittent-failure" in bug["keywords"]
43 | or "stockwell" in bug["whiteboard"]
44 | or "permafail" in bug["summary"].lower()
45 | ):
46 | continue
47 |
48 | # bugs filed from Socorro,
49 | if (
50 | "this bug was filed from the socorro interface"
51 | in bug["comments"][0]["text"].lower()
52 | ):
53 | continue
54 |
55 | # and fuzzing bugs.
56 | if "fuzzing" in bug["comments"][0]["text"].lower():
57 | continue
58 |
59 | bugs.append(bug)
60 |
61 | random.shuffle(bugs)
62 |
63 | for bug in bugs:
64 | # Only show bugs that are really bugs/regressions for labeling.
65 | c = model.classify(bug)
66 | if c != 1:
67 | continue
68 |
69 | v = None
70 |
71 | for i, comment in enumerate(bug["comments"]):
72 | if (bug["id"], i) in already_done:
73 | continue
74 |
75 | os.system("clear")
76 | print(f"Bug {bug['id']} - {bug['summary']}")
77 | print(f"Comment {i}")
78 | print(comment["text"])
79 |
80 | if args.goal == "str":
81 | print(
82 | "\nY for comment containing STR, N for comment not containing STR, K to skip, E to exit"
83 | )
84 | elif args.goal == "regressionrange":
85 | print(
86 | "\nY for comment containing regression range, N for comment not containing regression range, K to skip, E to exit"
87 | )
88 | v = input()
89 |
90 | if v in ["e", "k"]:
91 | break
92 |
93 | if v in ["y", "n"]:
94 | labeled_comments.append((bug["id"], i, v))
95 |
96 | if v not in ["e", "k"]:
97 | with open(file_path, "w") as f:
98 | writer = csv.writer(f)
99 | writer.writerow(["bug_id", "comment_num", f"has_{args.goal}"])
100 | writer.writerows(sorted(labeled_comments))
101 |
102 | print("\nE to exit, anything else to continue")
103 | v = input()
104 |
105 | if v == "e":
106 | break
107 |
--------------------------------------------------------------------------------
/scripts/comment_resolver_runner.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import sys
4 |
5 | from dotenv import load_dotenv
6 |
7 | import bugbug.db as db
8 | import bugbug.phabricator as phabricator
9 | from bugbug.generative_model_tool import create_llm_from_args
10 | from bugbug.tools.comment_resolver import (
11 | CodeGeneratorTool,
12 | FixCommentDB,
13 | LocalQdrantVectorDB,
14 | generate_fixes,
15 | generate_individual_fix,
16 | )
17 |
18 |
19 | def run(args) -> None:
20 | load_dotenv()
21 |
22 | logging.basicConfig(level=logging.INFO)
23 |
24 | db = FixCommentDB(LocalQdrantVectorDB(collection_name="fix_comments"))
25 |
26 | if args.create_db:
27 | db.db.delete_collection()
28 | db.db.setup()
29 | db.upload_dataset(args.dataset_file)
30 |
31 | llm = create_llm_from_args(args)
32 | llm_tool = CodeGeneratorTool(llm=llm, db=db)
33 |
34 | if args.revision_id and args.diff_id and args.comment_id:
35 | pass
36 | # TODO: Create this function
37 | generate_individual_fix(
38 | llm_tool=llm_tool,
39 | db=db,
40 | revision_id=args.revision_id,
41 | diff_id=args.diff_id,
42 | comment_id=args.comment_id,
43 | )
44 | else:
45 | generate_fixes(
46 | llm_tool=llm_tool,
47 | db=db,
48 | generation_limit=args.generation_limit,
49 | prompt_types=args.prompt_types,
50 | hunk_sizes=args.hunk_sizes,
51 | diff_length_limits=args.diff_length_limits,
52 | output_csv=args.output_csv,
53 | )
54 |
55 |
56 | def parse_args(args):
57 | parser = argparse.ArgumentParser()
58 | parser.add_argument(
59 | "--llm",
60 | help="LLM",
61 | choices=["openai"],
62 | default="openai",
63 | )
64 | parser.add_argument(
65 | "--create-db",
66 | action="store_true",
67 | help="If set, the local Qdrant database will be created and populated.",
68 | )
69 | parser.add_argument(
70 | "--dataset-file",
71 | type=str,
72 | default="data/fixed_comments.json",
73 | help="Dataset file to upload as Qdrant database.",
74 | )
75 | parser.add_argument(
76 | "--output-csv",
77 | type=str,
78 | default="metrics_results.csv",
79 | help="Output CSV file for results.",
80 | )
81 | parser.add_argument(
82 | "--prompt-types",
83 | nargs="+",
84 | default=["zero-shot"],
85 | help="Types of prompts to use.",
86 | )
87 | parser.add_argument(
88 | "--diff-length-limits",
89 | nargs="+",
90 | type=int,
91 | default=[1000],
92 | help="Diff length limits to enforce when searching for examples.",
93 | )
94 | parser.add_argument(
95 | "--hunk-sizes",
96 | nargs="+",
97 | type=int,
98 | default=[20],
99 | help="Hunk sizes to enforce when searching for examples.",
100 | )
101 | parser.add_argument(
102 | "--generation-limit",
103 | type=int,
104 | default=100,
105 | help="Maximum number of generations.",
106 | )
107 | parser.add_argument(
108 | "--revision-id",
109 | type=int,
110 | help="Revision ID for individual fix generation.",
111 | )
112 | parser.add_argument(
113 | "--diff-id",
114 | type=int,
115 | help="Diff ID for individual fix generation.",
116 | )
117 | parser.add_argument(
118 | "--comment-id",
119 | type=int,
120 | help="Comment ID for individual fix generation.",
121 | )
122 |
123 | return parser.parse_args(args)
124 |
125 |
126 | if __name__ == "__main__":
127 | db.download(phabricator.FIXED_COMMENTS_DB)
128 | args = parse_args(sys.argv[1:])
129 | run(args)
130 |
--------------------------------------------------------------------------------
/scripts/commit_retriever.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import os
5 | from logging import INFO, basicConfig, getLogger
6 |
7 | import hglib
8 |
9 | from bugbug import db, repository
10 | from bugbug.utils import create_tar_zst, zstd_compress
11 |
12 | basicConfig(level=INFO)
13 | logger = getLogger(__name__)
14 |
15 |
16 | class Retriever(object):
17 | def __init__(self, cache_root):
18 | assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
19 | self.repo_dir = os.path.join(cache_root, "mozilla-central")
20 |
21 | def retrieve_commits(self, limit):
22 | repository.clone(self.repo_dir)
23 |
24 | if limit:
25 | # Mercurial revset supports negative integers starting from tip
26 | rev_start = -limit
27 | else:
28 | db.download(repository.COMMITS_DB, support_files_too=True)
29 |
30 | rev_start = 0
31 | for commit in repository.get_commits():
32 | rev_start = f"children({commit['node']})"
33 |
34 | with hglib.open(self.repo_dir) as hg:
35 | revs = repository.get_revs(hg, rev_start)
36 |
37 | chunk_size = 70000
38 |
39 | for i in range(0, len(revs), chunk_size):
40 | repository.download_commits(self.repo_dir, revs=revs[i : (i + chunk_size)])
41 |
42 | logger.info("commit data extracted from repository")
43 |
44 | # Some commits that were already in the DB from the previous run might need
45 | # to be updated (e.g. coverage information).
46 | repository.update_commits()
47 |
48 | zstd_compress(repository.COMMITS_DB)
49 | create_tar_zst(os.path.join("data", repository.COMMIT_EXPERIENCES_DB))
50 |
51 |
52 | def main():
53 | description = "Retrieve and extract the information from Mozilla-Central repository"
54 | parser = argparse.ArgumentParser(description=description)
55 |
56 | parser.add_argument(
57 | "--limit",
58 | type=int,
59 | help="Only download the N oldest commits, used mainly for integration tests",
60 | )
61 | parser.add_argument("cache-root", help="Cache for repository clones.")
62 |
63 | args = parser.parse_args()
64 |
65 | retriever = Retriever(getattr(args, "cache-root"))
66 |
67 | retriever.retrieve_commits(args.limit)
68 |
69 |
70 | if __name__ == "__main__":
71 | main()
72 |
--------------------------------------------------------------------------------
/scripts/compatibility_report_classifier.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import os
5 | from logging import INFO, basicConfig, getLogger
6 |
7 | import numpy as np
8 | import requests
9 |
10 | from bugbug.models import get_model_class
11 | from bugbug.utils import download_model
12 |
13 | basicConfig(level=INFO)
14 | logger = getLogger(__name__)
15 |
16 |
17 | def classify_reports(model_name: str, report_text: str) -> None:
18 | model_file_name = f"{model_name}model"
19 |
20 | if not os.path.exists(model_file_name):
21 | logger.info("%s does not exist. Downloading the model....", model_file_name)
22 | try:
23 | download_model(model_name)
24 | except requests.HTTPError:
25 | logger.error(
26 | "A pre-trained model is not available, you will need to train it yourself using the trainer script"
27 | )
28 | raise SystemExit(1)
29 |
30 | model_class = get_model_class(model_name)
31 | model = model_class.load(model_file_name)
32 |
33 | logger.info("%s", report_text)
34 |
35 | report = {"body": report_text, "title": ""}
36 |
37 | if model.calculate_importance:
38 | probas, importance = model.classify(
39 | report, probabilities=True, importances=True
40 | )
41 |
42 | model.print_feature_importances(
43 | importance["importances"], class_probabilities=probas
44 | )
45 | else:
46 | probas = model.classify(report, probabilities=True, importances=False)
47 |
48 | probability = probas[0]
49 | pred_index = np.argmax(probability)
50 | if len(probability) > 2:
51 | pred_class = model.le.inverse_transform([pred_index])[0]
52 | else:
53 | pred_class = "Positive" if pred_index == 1 else "Negative"
54 | logger.info("%s %s", pred_class, probability)
55 | input()
56 |
57 |
58 | def main() -> None:
59 | description = "Perform evaluation of user report using the specified model"
60 | parser = argparse.ArgumentParser(description=description)
61 |
62 | parser.add_argument("model", type=str, help="Which model to use for evaluation")
63 | parser.add_argument("--report-text", help="Report text to classify", type=str)
64 |
65 | args = parser.parse_args()
66 |
67 | classify_reports(args.model, args.report_text)
68 |
69 |
70 | if __name__ == "__main__":
71 | main()
72 |
--------------------------------------------------------------------------------
/scripts/generate_sheet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import csv
5 | import os
6 | from datetime import datetime, timedelta
7 | from logging import INFO, basicConfig, getLogger
8 |
9 | import numpy as np
10 |
11 | from bugbug import bugzilla
12 | from bugbug.models import get_model_class
13 |
14 | basicConfig(level=INFO)
15 | logger = getLogger(__name__)
16 |
17 |
18 | def generate_sheet(model_name: str, token: str, days: int, threshold: float) -> None:
19 | model_file_name = f"{model_name}model"
20 |
21 | assert os.path.exists(model_file_name), (
22 | f"{model_file_name} does not exist. Train the model with trainer.py first."
23 | )
24 |
25 | model_class = get_model_class(model_name)
26 | model = model_class.load(model_file_name)
27 |
28 | bugzilla.set_token(token)
29 | bug_ids = bugzilla.get_ids_between(datetime.utcnow() - timedelta(days))
30 | bugs = bugzilla.get(bug_ids)
31 |
32 | logger.info("Classifying %d bugs...", len(bugs))
33 |
34 | rows = [["Bug", f"{model_name}(model)", model_name, "Title"]]
35 |
36 | for bug in bugs.values():
37 | p = model.classify(bug, probabilities=True)
38 | probability = p[0]
39 | if len(probability) > 2:
40 | index = np.argmax(probability)
41 | prediction = model.class_names[index]
42 | else:
43 | prediction = "y" if probability[1] >= threshold else "n"
44 |
45 | rows.append(
46 | [
47 | f"https://bugzilla.mozilla.org/show_bug.cgi?id={bug['id']}",
48 | prediction,
49 | "",
50 | bug["summary"],
51 | ]
52 | )
53 |
54 | os.makedirs("sheets", exist_ok=True)
55 | with open(
56 | os.path.join(
57 | "sheets",
58 | f"{model_name}-{datetime.utcnow().strftime('%Y-%m-%d')}-labels.csv",
59 | ),
60 | "w",
61 | ) as f:
62 | writer = csv.writer(f)
63 | writer.writerows(rows)
64 |
65 |
66 | def main() -> None:
67 | description = "Perform evaluation on bugs from specified days back on the specified model and generate a csv file "
68 | parser = argparse.ArgumentParser(description=description)
69 |
70 | parser.add_argument("model", help="Which model to generate a csv for.")
71 | parser.add_argument("token", help="Bugzilla token")
72 | parser.add_argument(
73 | "days",
74 | type=int,
75 | default=7,
76 | help="No. of days back from which bugs will be evaluated",
77 | )
78 | parser.add_argument(
79 | "threshold", type=float, default=0.7, help="Confidence threshold for the model"
80 | )
81 |
82 | args = parser.parse_args()
83 |
84 | generate_sheet(args.model, args.token, args.days, args.threshold)
85 |
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/scripts/get_type_labels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import csv
8 | import sys
9 |
10 | import requests
11 |
12 |
13 | def parse_args(args):
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument(
16 | "--types",
17 | help="Types to retrieve",
18 | default=["defect", "enhancement", "task"],
19 | nargs="*",
20 | )
21 | return parser.parse_args(args)
22 |
23 |
24 | def main(args):
25 | params = {
26 | "columnlist": "bug_type",
27 | "order": "bug_id",
28 | "j_top": "OR",
29 | "f1": "bug_type",
30 | "o1": "everchanged",
31 | "f2": "OP",
32 | "f3": "bug_type",
33 | "o3": "anyexact",
34 | "v3": "task,enhancement",
35 | "f4": "bug_id",
36 | "o4": "greaterthan",
37 | "v4": 1540807,
38 | "f5": "CP",
39 | "ctype": "csv",
40 | }
41 |
42 | r = requests.get("https://bugzilla.mozilla.org/buglist.cgi", params=params)
43 | r.raise_for_status()
44 |
45 | with open("bugbug/labels/defect_enhancement_task_h.csv", "r") as f:
46 | reader = csv.reader(f)
47 | headers = next(reader)
48 | bug_type_map = {int(row[0]): row[1] for row in reader}
49 |
50 | # We add to our csv both labels that were changed, and labels that are in
51 | # the list of requested types.
52 | reader = csv.reader(r.text.splitlines())
53 | next(reader)
54 | for row in reader:
55 | if int(row[0]) in bug_type_map or row[1] in args.types:
56 | bug_type_map[int(row[0])] = row[1]
57 |
58 | with open("bugbug/labels/defect_enhancement_task_h.csv", "w") as f:
59 | writer = csv.writer(f)
60 | writer.writerow(headers)
61 | writer.writerows(sorted(bug_type_map.items()))
62 |
63 |
64 | if __name__ == "__main__":
65 | main(parse_args(sys.argv[1:]))
66 |
--------------------------------------------------------------------------------
/scripts/github_issue_classifier.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | import os
5 | from logging import INFO, basicConfig, getLogger
6 |
7 | import numpy as np
8 | import requests
9 |
10 | from bugbug import db
11 | from bugbug.github import Github
12 | from bugbug.models import get_model_class
13 | from bugbug.utils import download_model
14 |
15 | basicConfig(level=INFO)
16 | logger = getLogger(__name__)
17 |
18 |
19 | def classify_issues(
20 | owner: str, repo: str, retrieve_events: bool, model_name: str, issue_number: int
21 | ) -> None:
22 | model_file_name = f"{model_name}model"
23 |
24 | if not os.path.exists(model_file_name):
25 | logger.info("%s does not exist. Downloading the model....", model_file_name)
26 | try:
27 | download_model(model_name)
28 | except requests.HTTPError:
29 | logger.error(
30 | "A pre-trained model is not available, you will need to train it yourself using the trainer script"
31 | )
32 | raise SystemExit(1)
33 |
34 | model_class = get_model_class(model_name)
35 | model = model_class.load(model_file_name)
36 |
37 | github = Github(
38 | owner=owner, repo=repo, state="all", retrieve_events=retrieve_events
39 | )
40 |
41 | if issue_number:
42 | issues = iter(
43 | [github.fetch_issue_by_number(owner, repo, issue_number, retrieve_events)]
44 | )
45 | assert issues, f"An issue with a number of {issue_number} was not found"
46 | else:
47 | assert db.download(github.db_path)
48 | issues = github.get_issues()
49 |
50 | for issue in issues:
51 | logger.info("%s - %s ", issue["url"], issue["title"])
52 |
53 | if model.calculate_importance:
54 | probas, importance = model.classify(
55 | issue, probabilities=True, importances=True
56 | )
57 |
58 | model.print_feature_importances(
59 | importance["importances"], class_probabilities=probas
60 | )
61 | else:
62 | probas = model.classify(issue, probabilities=True, importances=False)
63 |
64 | probability = probas[0]
65 | pred_index = np.argmax(probability)
66 | if len(probability) > 2:
67 | pred_class = model.le.inverse_transform([pred_index])[0]
68 | else:
69 | pred_class = "Positive" if pred_index == 1 else "Negative"
70 | logger.info("%s %s", pred_class, probability)
71 | input()
72 |
73 |
74 | def main() -> None:
75 | description = "Perform evaluation on github issues using the specified model"
76 | parser = argparse.ArgumentParser(description=description)
77 |
78 | parser.add_argument("model", type=str, help="Which model to use for evaluation")
79 | parser.add_argument(
80 | "--owner",
81 | help="GitHub repository owner.",
82 | type=str,
83 | required=True,
84 | )
85 | parser.add_argument(
86 | "--repo",
87 | help="GitHub repository name.",
88 | type=str,
89 | required=True,
90 | )
91 | parser.add_argument(
92 | "--retrieve-events",
93 | action="store_true",
94 | help="Whether to retrieve events for each issue.",
95 | )
96 |
97 | parser.add_argument(
98 | "--issue-number", help="Classify the given github issue by number", type=int
99 | )
100 |
101 | args = parser.parse_args()
102 |
103 | classify_issues(
104 | args.owner, args.repo, args.retrieve_events, args.model, args.issue_number
105 | )
106 |
107 |
108 | if __name__ == "__main__":
109 | main()
110 |
--------------------------------------------------------------------------------
/scripts/integration_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euox pipefail
3 |
4 | # Script that runs the whole data pipeline as fast as possible to validate
5 | # that every part is working with the others
6 |
7 | # Supposed to be run from the repository root directory
8 |
9 | # Remove the models and any old data
10 | rm defectenhancementtaskmodel* || true;
11 | rm backout* || true;
12 | rm -Rf data || true;
13 |
14 | ls -lh
15 |
16 | # First retrieve a subset of bug data
17 | bugbug-data-bugzilla --limit 500
18 | ls -lh
19 | ls -lh data
20 |
21 | # The bug data force download the commit DB
22 | # Removes it to ensure the commit retrieval work as expected
23 | rm data/commit*
24 |
25 | # Then generate a test dataset of fixed inline comments
26 | bugbug-fixed-comments --limit 150
27 | ls -lh
28 | ls -lh data
29 |
30 | # Remove DB to ensure it works as expected
31 | rm data/fixed_comments.json
32 |
33 | # Then retrieve a subset of commit data
34 | bugbug-data-commits --limit 500 "${CACHE_DIR:-cache}"
35 | test -d ${CACHE_DIR:-cache}/mozilla-central
36 | ls -lh
37 | ls -lh data
38 |
39 |
40 | # Then train a bug model
41 | bugbug-train defectenhancementtask --limit 500 --no-download
42 |
43 | # Then train a commit model
44 | # FIXME: Disabled temporary due to a problem in identifying backout comments
45 | # See: https://github.com/mozilla/bugbug/issues/5020#issuecomment-2884394426
46 | # bugbug-train backout --limit 30000 --no-download
47 |
48 | # Then spin the http service up
49 | # This part duplicates the http service Dockerfiles because we cannot easily spin Docker containers
50 | # up on Taskcluster
51 | cp VERSION http_service/VERSION
52 | pip install --disable-pip-version-check --quiet --no-cache-dir ./http_service
53 |
54 | export REDIS_URL=redis://localhost:6379/4
55 |
56 | # Start Redis
57 | redis-server >/dev/null 2>&1 &
58 | redis_pid=$!
59 |
60 | sleep 1
61 |
62 | # Uncomment following line to clean up the redis-server
63 | redis-cli -n 4 FLUSHDB
64 |
65 | # Start the http server
66 | gunicorn -b 127.0.0.1:8000 bugbug_http.app --preload --timeout 30 -w 3 &
67 | gunicorn_pid=$!
68 |
69 | # Start the background worker
70 | env BUGBUG_ALLOW_MISSING_MODELS=1 BUGBUG_REPO_DIR=${CACHE_DIR:-cache}/mozilla-central bugbug-http-worker high default low &
71 | worker_pid=$!
72 |
73 | # Ensure we take down the containers at the end
74 | trap 'kill $gunicorn_pid && kill $worker_pid && kill $redis_pid' EXIT
75 |
76 | # Then check that we can correctly classify a bug
77 | sleep 10 && python http_service/tests/test_integration.py
78 |
--------------------------------------------------------------------------------
/scripts/maintenance_effectiveness_indicator.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import argparse
7 | import math
8 | from logging import INFO, basicConfig, getLogger
9 |
10 | import dateutil.parser
11 |
12 | from bugbug import bugzilla
13 | from bugbug.utils import get_secret
14 |
15 | basicConfig(level=INFO)
16 | logger = getLogger(__name__)
17 |
18 |
19 | def main() -> None:
20 | parser = argparse.ArgumentParser()
21 | parser.add_argument("teams", help="Bugzilla team", type=str, nargs="+")
22 | parser.add_argument(
23 | "start_date",
24 | help="Start date of the period (YYYY-MM-DD)",
25 | type=str,
26 | )
27 | parser.add_argument(
28 | "end_date",
29 | help="End date of the period (YYYY-MM-DD)",
30 | type=str,
31 | )
32 | parser.add_argument(
33 | "--components",
34 | help="Bugzilla components",
35 | type=str,
36 | nargs="*",
37 | )
38 |
39 | args = parser.parse_args()
40 |
41 | # Try to use a Bugzilla API key if available.
42 | try:
43 | bugzilla.set_token(get_secret("BUGZILLA_TOKEN"))
44 | except ValueError:
45 | logger.info(
46 | "If you want to include security bugs too, please set the BUGBUG_BUGZILLA_TOKEN environment variable to your Bugzilla API key."
47 | )
48 |
49 | result = bugzilla.calculate_maintenance_effectiveness_indicator(
50 | args.teams,
51 | dateutil.parser.parse(args.start_date),
52 | dateutil.parser.parse(args.end_date),
53 | args.components,
54 | )
55 |
56 | for factor, value in result["stats"].items():
57 | print("%s: %d" % (factor, round(value, 2) if value != math.inf else value))
58 |
59 | for query, link in result["queries"].items():
60 | print(f"{query}: {link}")
61 |
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/scripts/review_comments_retriever.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | from bugbug.tools.code_review import PhabricatorReviewData, ReviewCommentsDB
8 | from bugbug.vectordb import QdrantVectorDB
9 |
10 |
11 | def main():
12 | review_data = PhabricatorReviewData()
13 | vector_db = QdrantVectorDB("diff_comments")
14 | vector_db.setup()
15 | comments_db = ReviewCommentsDB(vector_db)
16 | # TODO: support resuming from where last run left off. We should run it from
17 | # scratch only once. Following runs should add only new comments.
18 | comments_db.add_comments_by_hunk(review_data.retrieve_comments_with_hunks())
19 |
20 |
21 | if __name__ == "__main__":
22 | main()
23 |
--------------------------------------------------------------------------------
/scripts/revision_retriever.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | from datetime import datetime, timezone
5 | from logging import getLogger
6 |
7 | import dateutil.parser
8 | from dateutil.relativedelta import relativedelta
9 |
10 | from bugbug import bugzilla, db, phabricator, repository
11 | from bugbug.utils import get_secret, zstd_compress
12 |
13 | logger = getLogger(__name__)
14 |
15 |
16 | class Retriever(object):
17 | def retrieve_revisions(
18 | self,
19 | limit_months: int = 2,
20 | limit_count: int | None = None,
21 | ) -> None:
22 | """Retrieve revisions from Phabricator.
23 |
24 | Args:
25 | limit_months: The number of months to go back in time to retrieve
26 | revisions. The limit is based on bugs last activity date and
27 | commits push date.
28 | limit_count: Only download the N oldest revisions, used mainly for
29 | integration tests.
30 | """
31 | phabricator.set_api_key(
32 | get_secret("PHABRICATOR_URL"), get_secret("PHABRICATOR_TOKEN")
33 | )
34 |
35 | db.download(phabricator.REVISIONS_DB)
36 |
37 | # Get the commits DB, as we need it to get the revision IDs linked to recent commits.
38 | assert db.download(repository.COMMITS_DB)
39 |
40 | # Get the bugs DB, as we need it to get the revision IDs linked to bugs.
41 | assert db.download(bugzilla.BUGS_DB)
42 |
43 | phabricator.download_modified_revisions()
44 |
45 | # Get IDs of revisions linked to commits.
46 | start_date = datetime.now(timezone.utc) - relativedelta(months=limit_months)
47 | revision_ids = list(
48 | (
49 | filter(
50 | None,
51 | (
52 | repository.get_revision_id(commit)
53 | for commit in repository.get_commits()
54 | if dateutil.parser.parse(commit["pushdate"]).replace(
55 | tzinfo=timezone.utc
56 | )
57 | >= start_date
58 | ),
59 | )
60 | )
61 | )
62 |
63 | # Get IDs of revisions linked to bugs.
64 | for bug in bugzilla.get_bugs():
65 | if dateutil.parser.parse(bug["last_change_time"]) < start_date:
66 | continue
67 |
68 | revision_ids += bugzilla.get_revision_ids(bug)
69 |
70 | if limit_count is not None:
71 | revision_ids = revision_ids[-limit_count:]
72 |
73 | phabricator.download_revisions(revision_ids)
74 |
75 | zstd_compress(phabricator.REVISIONS_DB)
76 |
77 |
78 | def main() -> None:
79 | description = "Retrieve revisions from Phabricator"
80 | parser = argparse.ArgumentParser(description=description)
81 | parser.add_argument(
82 | "--limit-months",
83 | type=int,
84 | default=24,
85 | help="The number of months to go back in time to retrieve revisions.",
86 | )
87 | parser.add_argument(
88 | "--limit",
89 | type=int,
90 | help="Only download the N oldest revisions, used mainly for integration tests",
91 | )
92 |
93 | # Parse args to show the help if `--help` is passed
94 | args = parser.parse_args()
95 |
96 | retriever = Retriever()
97 | retriever.retrieve_revisions(args.limit_months, args.limit)
98 |
99 |
100 | if __name__ == "__main__":
101 | main()
102 |
--------------------------------------------------------------------------------
/scripts/trainer_extract_args.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import logging
7 | import os
8 | import re
9 |
10 | logging.basicConfig(level=logging.INFO)
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | def get_model_name() -> str | None:
15 | pr_description = os.environ.get("PR_DESCRIPTION")
16 | if not pr_description:
17 | logger.error("The PR_DESCRIPTION environment variable does not exist")
18 | return None
19 |
20 | match = re.search(r"Train on Taskcluster:\s+([a-z_1-9]+)", pr_description)
21 | if not match:
22 | logger.error(
23 | "Could not identify the model name using the 'Train on Taskcluster' keyword from the Pull Request description"
24 | )
25 | return None
26 |
27 | model_name = match.group(1)
28 |
29 | return model_name
30 |
31 |
32 | def main():
33 | model = get_model_name()
34 | if model:
35 | print(model)
36 |
37 |
38 | if __name__ == "__main__":
39 | main()
40 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 |
8 | from setuptools import find_packages, setup
9 |
10 | here = os.path.dirname(__file__)
11 |
12 |
13 | def read_requirements(file_):
14 | with open(os.path.join(here, file_)) as f:
15 | return sorted(list(set(line.split("#")[0].strip() for line in f)))
16 |
17 |
18 | install_requires = read_requirements("requirements.txt")
19 |
20 |
21 | with open(os.path.join(here, "VERSION")) as f:
22 | version = f.read().strip()
23 |
24 | # Read the extra requirements
25 | extras = ["nlp", "nn"]
26 |
27 | extras_require = {}
28 |
29 | for extra in extras:
30 | extras_require[extra] = read_requirements("extra-%s-requirements.txt" % extra)
31 |
32 |
33 | setup(
34 | name="bugbug",
35 | version=version,
36 | description="ML tools for Mozilla projects",
37 | author="Marco Castelluccio",
38 | author_email="mcastelluccio@mozilla.com",
39 | install_requires=install_requires,
40 | extras_require=extras_require,
41 | packages=find_packages(exclude=["contrib", "docs", "tests"]),
42 | include_package_data=True,
43 | license="MPL2",
44 | entry_points={
45 | "console_scripts": [
46 | "bugbug-data-commits = scripts.commit_retriever:main",
47 | "bugbug-data-bugzilla = scripts.bug_retriever:main",
48 | "bugbug-data-test-scheduling-history = scripts.test_scheduling_history_retriever:main",
49 | "bugbug-data-revisions = scripts.revision_retriever:main",
50 | "bugbug-train = scripts.trainer:main",
51 | "bugbug-check = scripts.check:main",
52 | "bugbug-maintenance-effectiveness-indicator = scripts.maintenance_effectiveness_indicator:main",
53 | "bugbug-microannotate-generate = scripts.microannotate_generator:main",
54 | "bugbug-classify-commit = scripts.commit_classifier:main",
55 | "bugbug-classify-bug = scripts.bug_classifier:main",
56 | "bugbug-regressor-finder = scripts.regressor_finder:main",
57 | "bugbug-retrieve-training-metrics = scripts.retrieve_training_metrics:main",
58 | "bugbug-analyze-training-metrics = scripts.analyze_training_metrics:main",
59 | "bugbug-check-all-metrics = scripts.check_all_metrics:main",
60 | "bugbug-past-bugs-by-unit = scripts.past_bugs_by_unit:main",
61 | "bugbug-testing-policy-stats = scripts.testing_policy_stats:main",
62 | "bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main",
63 | "bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main",
64 | "bugbug-data-github = scripts.github_issue_retriever:main",
65 | "bugbug-fixed-comments = scripts.inline_comments_data_collection:main",
66 | ]
67 | },
68 | classifiers=[
69 | "Programming Language :: Python :: 3.10",
70 | "Programming Language :: Python :: 3.9",
71 | "Programming Language :: Python :: 3 :: Only",
72 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
73 | ],
74 | )
75 |
--------------------------------------------------------------------------------
/test-requirements.txt:
--------------------------------------------------------------------------------
1 | coverage==7.8.2
2 | hypothesis==6.135.0
3 | igraph==0.11.8
4 | jsonschema==4.24.0
5 | pre-commit==4.2.0
6 | pytest==8.3.5
7 | pytest-cov==6.1.1
8 | pytest-responses==0.5.1
9 | responses==0.21.0
10 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 | import shutil
8 |
9 | import pytest
10 | import zstandard
11 |
12 | from bugbug import bugzilla, repository
13 |
14 | FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
15 |
16 |
17 | @pytest.fixture(autouse=True)
18 | def mock_data(tmp_path):
19 | os.mkdir(tmp_path / "data")
20 |
21 | DBs = [
22 | os.path.basename(bugzilla.BUGS_DB),
23 | os.path.basename(repository.COMMITS_DB),
24 | os.path.basename("data/github_webcompat_web-bugs_issues.json"),
25 | ]
26 |
27 | for f in DBs:
28 | shutil.copyfile(os.path.join(FIXTURES_DIR, f), tmp_path / "data" / f)
29 | with open(tmp_path / "data" / f"{f}.zst.etag", "w") as f:
30 | f.write("etag")
31 |
32 | os.chdir(tmp_path)
33 |
34 |
35 | @pytest.fixture
36 | def get_fixture_path():
37 | def _get_fixture_path(path):
38 | path = os.path.join(FIXTURES_DIR, path)
39 | assert os.path.exists(path)
40 | return path
41 |
42 | return _get_fixture_path
43 |
44 |
45 | @pytest.fixture
46 | def mock_zst():
47 | def create_zst_file(db_path, content=b'{"Hello": "World"}'):
48 | with open(db_path, "wb") as output_f:
49 | cctx = zstandard.ZstdCompressor()
50 | with cctx.stream_writer(output_f) as compressor:
51 | compressor.write(content)
52 |
53 | return create_zst_file
54 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/blocked_bugs_number.json:
--------------------------------------------------------------------------------
1 | {"blocks": [548311, 1354004]}
2 | {"blocks": []}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/bug_reporter.json:
--------------------------------------------------------------------------------
1 | {"creator_detail": {"email": "bill.mccloskey@gmail.com", "real_name": "Bill McCloskey [inactive unless it's an emergency] (:billm)", "name": "bill.mccloskey@gmail.com", "nick": "billm", "id": 389993}}
2 | {"creator_detail": {"email": "rhelmer@mozilla.com", "real_name": "Robert Helmer [:rhelmer]", "name": "rhelmer@mozilla.com", "nick": "rhelmer", "id": 17036}}
3 | {"creator_detail": {"email": "intermittent-bug-filer@mozilla.bugs", "real_name": "Treeherder Bug Filer", "name": "intermittent-bug-filer@mozilla.bugs", "nick": "intermittent-bug-filer", "id": 573381}}
4 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/bug_types.json:
--------------------------------------------------------------------------------
1 | {"keywords": ["meta", "perf"], "whiteboard": "", "cf_crash_signature": ""}
2 | {"keywords": ["memory-leak", "regression"], "whiteboard": "[MemShrink:P1]", "cf_crash_signature": ""}
3 | {"whiteboard": "", "keywords": ["power"]}
4 | {"keywords": ["sec-want"], "whiteboard": "[sg:want][psm-padlock]"}
5 | {"keywords": ["crash", "regression"], "whiteboard": "", "cf_crash_signature": "[@ audiounit_property_listener_callback]"}
6 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/comment_count.json:
--------------------------------------------------------------------------------
1 | {"comment_count": 4}
2 | {"comment_count": 28}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/component.json:
--------------------------------------------------------------------------------
1 | {"component": "Graphics"}
2 | {"component": "CSS Parsing and Computation"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_crash_signature.json:
--------------------------------------------------------------------------------
1 | {"cf_crash_signature": ""}
2 | {"cf_crash_signature": "[@ RtlpScanEnvironment + 0x1dc | mozilla::detail::MutexImpl::lock()]"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_cve_in_alias.json:
--------------------------------------------------------------------------------
1 | {"alias": "CVE-2017-7813"}
2 | {"alias": null}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_github_url.json:
--------------------------------------------------------------------------------
1 | {"url": "https://github.com/w3c/webcomponents/issues/635"}
2 | {"url": ""}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_regression_range.json:
--------------------------------------------------------------------------------
1 | {"cf_has_regression_range": "yes"}
2 | {"cf_has_regression_range": "---"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_str.json:
--------------------------------------------------------------------------------
1 | {"cf_has_str": "yes"}
2 | {"cf_has_str": "---"}
3 | {"cf_has_str": "no"}
4 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_url.json:
--------------------------------------------------------------------------------
1 | {"url": "data:text/html;charset=UTF-8,"}
2 | {"url": ""}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/has_w3c_url.json:
--------------------------------------------------------------------------------
1 | {"url": "https://github.com/w3c/webcomponents/issues/635"}
2 | {"url": ""}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/is_coverity_issue.json:
--------------------------------------------------------------------------------
1 | {"summary": "Firefox Nightly 56 shows no buttons and no page content.", "whiteboard": ""}
2 | {"whiteboard": "", "summary": "[CID 1419486] signed/unsigned conversion error in pk11 signature test"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/is_mozillian.json:
--------------------------------------------------------------------------------
1 | {"creator_detail": {"email": "johngraciliano@gmail.com", "real_name": "", "name": "johngraciliano@gmail.com", "nick": "johngraciliano", "id": 532161}}
2 | {"creator_detail": {"email": "bdahl@mozilla.com", "real_name": "Brendan Dahl [:bdahl]", "name": "bdahl@mozilla.com", "nick": "bdahl", "id": 425126}}
3 | {"creator_detail": {"email": "asa@mozilla.org", "real_name": "Asa Dotzler [:asa]", "name": "asa@mozilla.org", "nick": "asa", "id": 5003}}
4 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/keywords.json:
--------------------------------------------------------------------------------
1 | {"keywords": ["crash", "intermittent-failure", "stale-bug"]}
2 | {"keywords": ["bulk-close-intermittents", "crash", "intermittent-failure"]}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/patches.json:
--------------------------------------------------------------------------------
1 | {"attachments": [{"content_type": "text/plain", "creator": "ehsan@mozilla.com", "flags": [{"modification_date": "2017-07-04T08:24:38Z", "creation_date": "2017-07-04T02:16:27Z", "type_id": 4, "status": "+", "name": "review", "id": 1606172, "setter": "mzehe@mozilla.com"}], "is_patch": 1, "creation_time": "2017-07-04T02:16:27Z", "id": 8883151, "is_obsolete": 0}]}
2 | {"attachments": []}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/product.json:
--------------------------------------------------------------------------------
1 | {"product": "Core"}
2 | {"product": "Firefox for Android"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/severity.json:
--------------------------------------------------------------------------------
1 | {"severity": "major"}
2 | {"severity": "normal"}
3 |
--------------------------------------------------------------------------------
/tests/fixtures/bug_features/whiteboard.json:
--------------------------------------------------------------------------------
1 | {"whiteboard": "[MemShrink][platform-rel-Facebook]"}
2 | {"whiteboard": ""}
3 | {"whiteboard": "inj+ [AV:Quick Heal] "}
4 | {"whiteboard": "[AV:Quick Heal][regressed sept 6th][dll version is 3.0.1.*]"}
5 | {"whiteboard": "[AV:Quick Heal]inj+"}
6 | {"whiteboard": "[AV:Quick Heal] inj+"}
7 | {"whiteboard": "inj+ [AV:Quick Heal]"}
8 | {"whiteboard": "inj+[AV:Quick Heal]"}
9 | {"whiteboard": "inj+ ux [AV:Quick Heal] qf"}
10 |
--------------------------------------------------------------------------------
/tests/test_assignee.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models import assignee
7 | from bugbug.models.assignee import AssigneeModel
8 |
9 |
10 | def test_get_assignee_labels():
11 | assignee.MINIMUM_ASSIGNMENTS = 1
12 | model = AssigneeModel()
13 | classes, _ = model.get_labels()
14 | assert len(classes) != 0
15 | assert classes[1320039] == "gijskruitbosch+bugs@gmail.com"
16 | assert classes[1045018] == "padenot@mozilla.com"
17 | assert 1319973 not in classes
18 |
--------------------------------------------------------------------------------
/tests/test_backout.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.backout import BackoutModel
7 |
8 |
9 | def test_get_backout_labels():
10 | model = BackoutModel()
11 | classes, _ = model.get_labels()
12 | assert classes["c2b5cf7bde83db072fc206c24d1cab72354be727"] == 1
13 | assert classes["9d576871fd33bed006dcdccfba880a4ed591f870"] != 1
14 |
--------------------------------------------------------------------------------
/tests/test_bug.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.defect import DefectModel
7 |
8 |
9 | def test_get_bug_labels():
10 | model = DefectModel()
11 | classes, _ = model.get_labels()
12 | # labels from bug_nobug.csv
13 | assert classes[1087488]
14 | assert not classes[1101825]
15 | # labels from regression_bug_nobug.csv
16 | assert not classes[1586096] # nobug
17 | assert classes[518272] # regression
18 | assert classes[528988] # bug_unknown_regression
19 | assert classes[1037762] # bug_no_regression
20 | # labels from defectenhancementtask.csv
21 | assert not classes[1488307] # task
22 | assert classes[1488310] # defect
23 | assert not classes[1531080] # enhancement
24 |
--------------------------------------------------------------------------------
/tests/test_bug_snapshot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | from bugbug import bugzilla
8 | from bugbug.bug_snapshot import rollback
9 |
10 |
11 | def test_bug_snapshot():
12 | for i, bug in enumerate(bugzilla.get_bugs()):
13 | print(bug["id"])
14 | print(i)
15 |
16 | rollback(bug, do_assert=True)
17 |
--------------------------------------------------------------------------------
/tests/test_bugtype.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import numpy as np
7 |
8 | from bugbug.models.bugtype import BugTypeModel
9 |
10 |
11 | def test_get_bugtype_labels():
12 | model = BugTypeModel()
13 | classes, keyword_list = model.get_labels()
14 |
15 | assert np.array_equal(classes[1319957], np.zeros(5))
16 |
17 | target = np.zeros(5)
18 | target[keyword_list.index("crash")] = 1
19 | assert np.array_equal(classes[1319973], target)
20 |
21 | target = np.zeros(5)
22 | target[keyword_list.index("memory")] = 1
23 | assert np.array_equal(classes[1325215], target)
24 |
25 | target = np.zeros(5)
26 | target[keyword_list.index("performance")] = 1
27 | assert np.array_equal(classes[1320195], target)
28 |
29 | target = np.zeros(5)
30 | target[keyword_list.index("security")] = 1
31 | assert np.array_equal(classes[1320039], target)
32 |
--------------------------------------------------------------------------------
/tests/test_bugzilla.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from typing import Any
7 |
8 | import pytest
9 |
10 | from bugbug import bugzilla
11 |
12 |
13 | def test_get_bugs():
14 | all_bugs = {int(bug["id"]) for bug in bugzilla.get_bugs(include_invalid=True)}
15 | legitimate_bugs = {int(bug["id"]) for bug in bugzilla.get_bugs()}
16 |
17 | assert 1541482 in all_bugs
18 | assert 1541482 not in legitimate_bugs
19 |
20 | assert 1559674 in all_bugs
21 | assert 1559674 not in legitimate_bugs
22 |
23 | assert 1549207 in all_bugs
24 | assert 1549207 not in legitimate_bugs
25 |
26 | assert 1572747 in all_bugs
27 | assert 1572747 in legitimate_bugs
28 |
29 |
30 | def test_get_fixed_versions():
31 | assert bugzilla.get_fixed_versions(
32 | {
33 | "target_milestone": "mozilla81",
34 | "cf_tracking_firefox83": "blocking",
35 | "cf_status_firefox82": "fixed",
36 | "cf_status_firefox81": "unaffected",
37 | }
38 | ) == [81, 82]
39 |
40 | assert bugzilla.get_fixed_versions(
41 | {
42 | "target_milestone": "mozilla82",
43 | "cf_tracking_firefox82": "---",
44 | "cf_status_firefox82": "fixed",
45 | "cf_status_firefox83": "fixed",
46 | }
47 | ) == [82, 83]
48 |
49 | assert bugzilla.get_fixed_versions(
50 | {
51 | "target_milestone": "mozilla82",
52 | }
53 | ) == [82]
54 |
55 | assert bugzilla.get_fixed_versions(
56 | {
57 | "target_milestone": "82 Branch",
58 | }
59 | ) == [82]
60 |
61 | assert bugzilla.get_fixed_versions(
62 | {
63 | "target_milestone": "Firefox 82",
64 | }
65 | ) == [82]
66 |
67 |
68 | @pytest.fixture
69 | def component_team_mapping():
70 | return {
71 | "products": [
72 | {
73 | "name": "JSS",
74 | "components": [
75 | {
76 | "name": "Library",
77 | "team_name": "Crypto",
78 | },
79 | {
80 | "name": "Tests",
81 | "team_name": "Crypto",
82 | },
83 | ],
84 | },
85 | {
86 | "name": "Core",
87 | "components": [
88 | {
89 | "name": "Graphics",
90 | "team_name": "GFX",
91 | },
92 | ],
93 | },
94 | ]
95 | }
96 |
97 |
98 | def test_get_component_team_mapping(
99 | responses: Any, component_team_mapping: dict
100 | ) -> None:
101 | responses.add(
102 | responses.GET,
103 | "https://bugzilla.mozilla.org/rest/product?type=accessible&include_fields=name&include_fields=components.name&include_fields=components.team_name",
104 | status=200,
105 | json=component_team_mapping,
106 | )
107 |
108 | assert bugzilla.get_component_team_mapping() == {
109 | "Core": {"Graphics": "GFX"},
110 | "JSS": {"Library": "Crypto", "Tests": "Crypto"},
111 | }
112 |
--------------------------------------------------------------------------------
/tests/test_code_review.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import responses
3 | from unidiff import PatchSet
4 |
5 | from bugbug.tools.code_review import find_comment_scope
6 |
7 |
8 | def test_find_comment_scope():
9 | responses.add_passthru("https://phabricator.services.mozilla.com/")
10 | responses.add_passthru(
11 | "https://mozphab-phabhost-cdn.devsvcprod.mozaws.net/file/data/"
12 | )
13 |
14 | test_data = {
15 | "https://phabricator.services.mozilla.com/D233024?id=964198": {
16 | "browser/components/newtab/test/browser/browser.toml": {
17 | 79: {
18 | "line_start": 78,
19 | "line_end": 79,
20 | "has_added_lines": False,
21 | }
22 | },
23 | "browser/components/asrouter/tests/browser/browser.toml": {
24 | 63: {
25 | "line_start": 60,
26 | "line_end": 74,
27 | "has_added_lines": True,
28 | },
29 | },
30 | },
31 | "https://phabricator.services.mozilla.com/D240754?id=995999": {
32 | "dom/canvas/WebGLShaderValidator.cpp": {
33 | 39: {
34 | "line_start": 37,
35 | "line_end": 42,
36 | "has_added_lines": True,
37 | },
38 | 46: {
39 | "line_start": 37,
40 | "line_end": 42,
41 | "has_added_lines": True,
42 | },
43 | }
44 | },
45 | }
46 |
47 | for revision_url, patch_files in test_data.items():
48 | raw_diff = requests.get(revision_url + "&download=true", timeout=5).text
49 | patch_set = PatchSet.from_string(raw_diff)
50 |
51 | for file_name, target_hunks in patch_files.items():
52 | patched_file = next(
53 | patched_file
54 | for patched_file in patch_set
55 | if patched_file.path == file_name
56 | )
57 |
58 | for line_number, expected_scope in target_hunks.items():
59 | assert find_comment_scope(patched_file, line_number) == expected_scope
60 |
--------------------------------------------------------------------------------
/tests/test_commit_features.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import pytest
7 |
8 | from bugbug.commit_features import AuthorExperience, CommitExtractor, ReviewersNum
9 | from bugbug.feature_cleanup import fileref, url
10 |
11 |
12 | def test_CommitExtractor():
13 | CommitExtractor([ReviewersNum(), AuthorExperience()], [fileref(), url()])
14 | with pytest.raises(AssertionError):
15 | CommitExtractor([ReviewersNum(), AuthorExperience()], [fileref(), fileref()])
16 | with pytest.raises(AssertionError):
17 | CommitExtractor([AuthorExperience(), AuthorExperience()], [fileref(), url()])
18 |
--------------------------------------------------------------------------------
/tests/test_defect.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.defect import DefectModel
7 |
8 |
9 | def test_get_defect_labels():
10 | model = DefectModel()
11 | classes, _ = model.get_labels()
12 | assert classes[1042414] == 1
13 | assert classes[1049816] != 1
14 |
--------------------------------------------------------------------------------
/tests/test_defect_enhancement_task.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.defect_enhancement_task import DefectEnhancementTaskModel
7 |
8 |
9 | def test_get_defect_enhancement_task_labels():
10 | model = DefectEnhancementTaskModel()
11 | classes, _ = model.get_labels()
12 | assert classes[1042414] == "defect"
13 | assert classes[1531080] == "task"
14 | assert classes[1348788] == "enhancement"
15 |
--------------------------------------------------------------------------------
/tests/test_devdocneeded.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.devdocneeded import DevDocNeededModel
7 |
8 |
9 | def test_get_devdocneeded_labels():
10 | model = DevDocNeededModel()
11 | classes, _ = model.get_labels()
12 | assert classes[528988] == 0
13 | assert classes[1053944] == 1
14 | assert classes[1531080] == 1
15 |
--------------------------------------------------------------------------------
/tests/test_hooks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import json
7 | import os
8 | import shutil
9 |
10 | import jsone
11 | import jsonschema
12 | import pytest
13 |
14 | from infra.set_hook_version import set_hook
15 |
16 | with open(os.path.join("VERSION")) as f:
17 | version = f.read().strip()
18 |
19 | parameters = [
20 | (os.path.realpath("infra/taskcluster-hook-data-pipeline.json"), {}),
21 | (os.path.realpath("infra/taskcluster-hook-check-models-start.json"), {}),
22 | (
23 | os.path.realpath("infra/taskcluster-hook-classify-patch.json"),
24 | {"PHABRICATOR_DEPLOYMENT": "prod", "DIFF_ID": 123},
25 | ),
26 | (
27 | os.path.realpath("infra/taskcluster-hook-test-select.json"),
28 | {"PHABRICATOR_DEPLOYMENT": "dev", "DIFF_ID": 123},
29 | ),
30 | (
31 | os.path.realpath("infra/taskcluster-hook-test-select.json"),
32 | {
33 | "PHABRICATOR_DEPLOYMENT": "prod",
34 | "DIFF_ID": 123,
35 | "RUNNABLE_JOBS": "http://localhost",
36 | },
37 | ),
38 | (os.path.realpath("infra/taskcluster-hook-landings-risk-report.json"), {}),
39 | ]
40 |
41 | for infra_path in os.listdir("infra"):
42 | if not infra_path.startswith("taskcluster-hook-"):
43 | continue
44 |
45 | assert any(
46 | path == os.path.realpath(os.path.join("infra", infra_path))
47 | for path, payload in parameters
48 | ), f"{infra_path} not found"
49 |
50 |
51 | @pytest.mark.parametrize("hook_file,payload", parameters)
52 | def test_jsone_validates(tmp_path, hook_file, payload):
53 | tmp_hook_file = tmp_path / "hook.json"
54 |
55 | shutil.copyfile(hook_file, tmp_hook_file)
56 |
57 | set_hook(tmp_hook_file, version)
58 |
59 | with open(tmp_hook_file, "r") as f:
60 | hook_content = json.load(f)
61 |
62 | jsonschema.validate(instance=payload, schema=hook_content["triggerSchema"])
63 |
64 | jsone.render(hook_content, context={"payload": payload})
65 |
--------------------------------------------------------------------------------
/tests/test_invalid_compatibility_report.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.invalid_compatibility_report import InvalidCompatibilityReportModel
7 |
8 |
9 | def test_get_invalid_labels():
10 | model = InvalidCompatibilityReportModel()
11 | classes, _ = model.get_labels()
12 | assert classes[70960]
13 | assert classes[70978]
14 | assert not classes[71052]
15 | assert not classes[71011]
16 |
--------------------------------------------------------------------------------
/tests/test_labels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 |
8 | from bugbug import labels
9 |
10 |
11 | def test_get_labels_dir():
12 | path = labels.get_labels_dir()
13 | assert os.path.isabs(path)
14 | assert path.endswith("labels")
15 |
16 |
17 | def test_get_all_bug_ids():
18 | bug_ids = labels.get_all_bug_ids()
19 | assert len(bug_ids) > 0
20 | assert all(isinstance(bug_id, int) for bug_id in bug_ids)
21 |
--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from logging import INFO, basicConfig, getLogger
7 |
8 | from bugbug import model
9 | from bugbug.models import MODELS, get_model_class
10 |
11 | basicConfig(level=INFO)
12 | logger = getLogger(__name__)
13 |
14 |
15 | def test_import_all_models():
16 | """Try loading all defined models to ensure that their full qualified
17 | names are still good
18 | """
19 |
20 | for model_name in MODELS:
21 | logger.info("Try loading model %s", model_name)
22 | get_model_class(model_name)
23 |
24 |
25 | def test_component_is_bugmodel():
26 | model_class = get_model_class("component")
27 | assert issubclass(model_class, model.BugModel)
28 | model_class = get_model_class("regression")
29 | assert issubclass(model_class, model.BugModel)
30 |
31 |
32 | def test_backout_is_commitmodel():
33 | model_class = get_model_class("backout")
34 | assert issubclass(model_class, model.CommitModel)
35 |
--------------------------------------------------------------------------------
/tests/test_needsdiagnosis.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.needsdiagnosis import NeedsDiagnosisModel
7 |
8 |
9 | def test_get_needsdiagnosis_labels():
10 | model = NeedsDiagnosisModel()
11 | classes, _ = model.get_labels()
12 | assert not classes[71052]
13 | assert not classes[71011]
14 | assert classes[71012]
15 | assert classes[70962]
16 |
--------------------------------------------------------------------------------
/tests/test_performancebug.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.performancebug import PerformanceBugModel
7 |
8 |
9 | def test_get_performancebug_labels():
10 | model = PerformanceBugModel()
11 | classes, _ = model.get_labels()
12 | assert classes[1461247] == 1
13 | assert classes[1457988] == 1
14 | assert classes[446261] == 0
15 | assert classes[452258] == 0
16 |
--------------------------------------------------------------------------------
/tests/test_pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import os
7 |
8 | import jsone
9 | import jsonschema
10 | import pytest
11 | import requests
12 | import responses
13 | import yaml
14 |
15 |
16 | @pytest.fixture(scope="session")
17 | def task_schema():
18 | responses.add_passthru("https://community-tc.services.mozilla.com/")
19 | r = requests.get(
20 | "https://community-tc.services.mozilla.com/schemas/queue/v1/create-task-request.json"
21 | )
22 | r.raise_for_status()
23 | return r.json()
24 |
25 |
26 | @pytest.fixture(scope="session")
27 | def payload_schema():
28 | responses.add_passthru("https://community-tc.services.mozilla.com/")
29 | r = requests.get(
30 | "https://community-tc.services.mozilla.com/schemas/docker-worker/v1/payload.json"
31 | )
32 | r.raise_for_status()
33 | return r.json()
34 |
35 |
36 | @pytest.mark.parametrize(
37 | "pipeline_file",
38 | (
39 | os.path.realpath(os.path.join("infra", f))
40 | for f in os.listdir("infra")
41 | if f.endswith(".yml")
42 | ),
43 | )
44 | def test_jsone_validates(pipeline_file, task_schema, payload_schema):
45 | responses.add_passthru("https://community-tc.services.mozilla.com/")
46 |
47 | with open(pipeline_file, "r") as f:
48 | yaml_content = yaml.safe_load(f.read())
49 |
50 | result = jsone.render(yaml_content, context={"version": "42.0"})
51 | tasks = result["tasks"]
52 |
53 | all_ids = [task["ID"] for task in tasks]
54 |
55 | # Make sure there are no duplicate IDs.
56 | assert len(all_ids) == len(set(all_ids))
57 |
58 | # Make sure all dependencies are present.
59 | for task in tasks:
60 | assert "dependencies" not in task or all(
61 | dependency in all_ids for dependency in task["dependencies"]
62 | )
63 |
64 | for task in tasks:
65 | if "ID" in task:
66 | del task["ID"]
67 |
68 | if "dependencies" in task:
69 | del task["dependencies"]
70 |
71 | jsonschema.validate(instance=task, schema=task_schema)
72 |
73 | jsonschema.validate(instance=task["payload"], schema=payload_schema)
74 |
--------------------------------------------------------------------------------
/tests/test_qaneeded.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug import bugzilla
7 | from bugbug.models.qaneeded import QANeededModel
8 |
9 |
10 | def test_get_qaneeded_labels():
11 | model = QANeededModel()
12 | classes, _ = model.get_labels()
13 | assert not classes[1389220]
14 | assert classes[1389223], "Bug should contain qawanted in a field"
15 | assert classes[1390433], "Bug should contain qe-verify in a field"
16 |
17 |
18 | def test_rollback():
19 | model = QANeededModel()
20 |
21 | histories = {}
22 | for bug in bugzilla.get_bugs():
23 | histories[int(bug["id"])] = bug["history"]
24 |
25 | def rollback_point(bug_id):
26 | count = 0
27 | for history in histories[bug_id]:
28 | for change in history["changes"]:
29 | if model.rollback(change):
30 | return count
31 | count += 1
32 | return count
33 |
34 | assert rollback_point(1390433) == 35, (
35 | "A bug field should start with qawanted or qe-verify"
36 | )
37 | assert rollback_point(1389136) == 9, (
38 | "A bug field should start with qawanted or qe-verify"
39 | )
40 |
41 | assert rollback_point(1388990) == 29
42 | assert rollback_point(1389223) == 8
43 |
--------------------------------------------------------------------------------
/tests/test_rcatype.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.rcatype import RCATypeModel
7 |
8 |
9 | def test_get_rca_from_whiteboard():
10 | model = RCATypeModel()
11 | # Case 1: No rca
12 | assert model.get_rca_from_whiteboard("[Whiteboard1][Not RCA type]") == []
13 | # Case 2: RCA : A and RCA - A
14 | assert model.get_rca_from_whiteboard("[RCA: cornercase]") == ["cornercase"]
15 | assert model.get_rca_from_whiteboard("[rca - codingerror]") == ["codingerror"]
16 | # Case 3: Multiple rca types
17 | assert model.get_rca_from_whiteboard("[rca - cornercase][rca - codingerror]") == [
18 | "cornercase",
19 | "codingerror",
20 | ]
21 | assert model.get_rca_from_whiteboard("[rca : systemerror][rca - codingerror]") == [
22 | "systemerror",
23 | "codingerror",
24 | ]
25 | assert model.get_rca_from_whiteboard("[rca - cornercase][rca : testingerror]") == [
26 | "cornercase",
27 | "testingerror",
28 | ]
29 | assert model.get_rca_from_whiteboard("[rca : cornercase][rca : codingerror]") == [
30 | "cornercase",
31 | "codingerror",
32 | ]
33 | assert model.get_rca_from_whiteboard("[RCA: codingerror - syntaxerror]") == [
34 | "codingerror"
35 | ]
36 | # Case 4: subcategories enabled, with rca already present in the list
37 | model = RCATypeModel(rca_subcategories_enabled=True)
38 | assert model.get_rca_from_whiteboard("[RCA: codingerror - syntaxerror]") == [
39 | "codingerror-syntaxerror"
40 | ]
41 | assert model.get_rca_from_whiteboard(
42 | "[RCA: codingerror - syntaxerror][rca: codingerror:logicalerror]"
43 | ) == ["codingerror-syntaxerror", "codingerror-logicalerror"]
44 | # Case 5: subcategories enabled, with rca not present in list
45 | assert model.get_rca_from_whiteboard("[RCA: codingerror - semanticerror]") == [
46 | "codingerror-semanticerror"
47 | ]
48 |
49 |
50 | def test_get_labels():
51 | model = RCATypeModel()
52 | classes, _ = model.get_labels()
53 |
54 | assert classes[1556846].tolist() == [
55 | 1.0,
56 | 0.0,
57 | 0.0,
58 | 0.0,
59 | 0.0,
60 | 0.0,
61 | 0.0,
62 | 1.0,
63 | 0.0,
64 | 0.0,
65 | 0.0,
66 | 0.0,
67 | 0.0,
68 | 0.0,
69 | 0.0,
70 | 0.0,
71 | ]
72 |
--------------------------------------------------------------------------------
/tests/test_regression.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | from bugbug.models.regression import RegressionModel
8 |
9 |
10 | def test_get_regression_labels():
11 | model = RegressionModel()
12 | classes, _ = model.get_labels()
13 | assert classes[1348788] == 0
14 | assert classes[518272] == 1
15 |
--------------------------------------------------------------------------------
/tests/test_stepstoreproduce.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 |
7 | from bugbug.models.stepstoreproduce import StepsToReproduceModel
8 |
9 |
10 | def test_get_labels():
11 | model = StepsToReproduceModel()
12 | classes, _ = model.get_labels()
13 | assert classes[1488310]
14 | assert not classes[1372243]
15 | assert 1319973 not in classes
16 |
--------------------------------------------------------------------------------
/tests/test_test_scheduling_features.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug import test_scheduling_features
7 |
8 |
9 | def test_path_distance():
10 | pd = test_scheduling_features.PathDistance()
11 |
12 | assert (
13 | pd(
14 | {"name": "dom/media/tests/mochitest.ini"},
15 | {"files": ["dom/media/tests/test.js", "dom/media/anotherFile.cpp"]},
16 | )
17 | == 0
18 | )
19 | assert (
20 | pd(
21 | {"name": "dom/media/tests/mochitest.ini"},
22 | {"files": ["dom/media/anotherFile.cpp"]},
23 | )
24 | == 1
25 | )
26 | assert (
27 | pd(
28 | {"name": "dom/media/tests/mochitest.ini"},
29 | {"files": ["dom/media/src/aFile.cpp"]},
30 | )
31 | == 2
32 | )
33 | assert (
34 | pd(
35 | {"name": "dom/media/tests/mochitest.ini"},
36 | {"files": ["dom/media/src/aFile.cpp", "dom/media/anotherFile.cpp"]},
37 | )
38 | == 1
39 | )
40 | assert (
41 | pd(
42 | {"name": "dom/media/tests/mochitest.ini"},
43 | {"files": ["layout/utils/bla.cpp"]},
44 | )
45 | == 5
46 | )
47 | assert (
48 | pd(
49 | {"name": "testing/web-platform/tests/content-security-policy/worker-src"},
50 | {"files": ["test"]},
51 | )
52 | == 4
53 | )
54 | assert (
55 | pd(
56 | {"name": "test"},
57 | {
58 | "files": [
59 | "testing/web-platform/tests/content-security-policy/worker-src"
60 | ]
61 | },
62 | )
63 | == 4
64 | )
65 |
--------------------------------------------------------------------------------
/tests/test_tracking.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.tracking import TrackingModel
7 |
8 |
9 | def test_get_tracking_labels():
10 | model = TrackingModel()
11 | classes, _ = model.get_labels()
12 | assert not classes[1101825]
13 | assert classes[1042096]
14 |
--------------------------------------------------------------------------------
/tests/test_trainer.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | import responses
7 |
8 | from bugbug import bugzilla, db
9 | from scripts import trainer
10 |
11 |
12 | def test_trainer():
13 | # Pretend the DB was already downloaded and no new DB is available.
14 |
15 | url = "https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_bugs.latest/artifacts/public/bugs.json"
16 |
17 | responses.add(
18 | responses.GET,
19 | f"{url}.version",
20 | status=200,
21 | body=str(db.DATABASES[bugzilla.BUGS_DB]["version"]),
22 | )
23 |
24 | responses.add(
25 | responses.HEAD,
26 | f"{url}.zst",
27 | status=200,
28 | headers={"ETag": "etag"},
29 | )
30 |
31 | trainer.Trainer().go(trainer.parse_args(["regression"]))
32 |
--------------------------------------------------------------------------------
/tests/test_uplift.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # This Source Code Form is subject to the terms of the Mozilla Public
3 | # License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 | # You can obtain one at http://mozilla.org/MPL/2.0/.
5 |
6 | from bugbug.models.uplift import UpliftModel
7 |
8 |
9 | def test_get_uplift_labels():
10 | model = UpliftModel()
11 | classes, _ = model.get_labels()
12 | assert classes[1364870] == 1
13 | assert classes[1350663] != 1
14 |
--------------------------------------------------------------------------------
/ui/changes/.eslintrc.yml:
--------------------------------------------------------------------------------
1 | env:
2 | browser: true
3 | es6: true
4 | plugins:
5 | - prettier
6 | - mozilla
7 | extends:
8 | - standard
9 | - prettier
10 | - plugin:mozilla/recommended
11 | parserOptions:
12 | ecmaVersion: 2018
13 | sourceType: module
14 | rules:
15 | max-len: off
16 | prettier/prettier: "error"
17 |
--------------------------------------------------------------------------------
/ui/changes/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "changes",
3 | "version": "1.0.0",
4 | "description": "To update Temporal polyfill:",
5 | "private": true,
6 | "scripts": {
7 | "dev": "npx snowpack dev",
8 | "release": "npx snowpack build",
9 | "test": "echo \"Error: no test specified\" && exit 1"
10 | },
11 | "keywords": [],
12 | "author": "",
13 | "license": "ISC",
14 | "devDependencies": {
15 | "prettier": "^3.5.3",
16 | "snowpack": "^3.8.8"
17 | },
18 | "dependencies": {
19 | "@js-temporal/polyfill": "^0.5.1",
20 | "apexcharts": "^4.7.0",
21 | "localforage": "^1.10.0"
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/ui/changes/snowpack.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: [
3 | /* ... */
4 | ],
5 | packageOptions: {
6 | /* ... */
7 | },
8 | devOptions: {
9 | /* ... */
10 | },
11 | buildOptions: {
12 | out: "dist",
13 | /* ... */
14 | },
15 | mount: {
16 | src: "/",
17 | /* ... */
18 | },
19 | alias: {
20 | /* ... */
21 | },
22 | };
23 |
--------------------------------------------------------------------------------
/ui/changes/src/bug.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | bugbug ui
6 |
7 |
11 |
12 |
13 |
14 | bugbug ui
15 |
20 |
21 |
22 |
36 |
37 |
38 |
39 | Bug List
40 |
41 |
42 |
43 |
44 |
45 | Bug |
46 | Date |
47 | Testing Tags |
48 | Coverage |
49 | Riskiness |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/ui/changes/src/bug.js:
--------------------------------------------------------------------------------
1 | import * as common from "./common.js";
2 |
3 | async function renderUI() {
4 | const data = await common.landingsData;
5 | const bugID = Number(common.getOption("bugID"));
6 |
7 | // 1433500
8 |
9 | const allBugSummaries = [].concat.apply([], Object.values(data));
10 |
11 | for (const bugSummary of allBugSummaries) {
12 | if (bugSummary["id"] == bugID) {
13 | await common.renderTable([bugSummary]);
14 | return;
15 | }
16 | }
17 |
18 | await common.renderTable([]);
19 | }
20 |
21 | (async function init() {
22 | await common.setupOptions(renderUI);
23 |
24 | await renderUI();
25 | })();
26 |
--------------------------------------------------------------------------------
/ui/changes/src/css/page.css:
--------------------------------------------------------------------------------
1 | @import url("./common.css");
2 |
3 | * {
4 | box-sizing: border-box;
5 | }
6 |
7 | th,
8 | td {
9 | vertical-align: top;
10 | }
11 |
12 | table {
13 | table-layout: fixed;
14 | width: 100%;
15 | white-space: nowrap;
16 | }
17 | table td,
18 | table th {
19 | white-space: nowrap;
20 | overflow: hidden;
21 | text-overflow: ellipsis;
22 | text-align: center;
23 | }
24 | table tr td:nth-child(1),
25 | table tr th:nth-child(1) {
26 | text-align: left;
27 | }
28 | table tr td:nth-child(2),
29 | table tr th:nth-child(2),
30 | table tr td:nth-child(3),
31 | table tr th:nth-child(3),
32 | table tr td:nth-child(4),
33 | table tr th:nth-child(4),
34 | table tr td:nth-child(5),
35 | table tr th:nth-child(5) {
36 | width: 100px;
37 | }
38 |
39 | table td ul {
40 | margin: 0;
41 | padding: 0;
42 | list-style: none;
43 | }
44 | tr {
45 | padding: 4px 0;
46 | border-bottom: solid 1px rgba(0, 0, 0, 0.2);
47 | }
48 |
49 | td .desc-box {
50 | width: auto;
51 | padding: 3px;
52 | }
53 | td .desc-box ul {
54 | margin: 0;
55 | padding-inline-start: 20px;
56 | }
57 |
58 | #links {
59 | position: absolute;
60 | right: 5px;
61 | top: 5px;
62 | }
63 |
64 | h3 {
65 | margin: 0;
66 | border-bottom: solid 1px rgba(0, 0, 0, 0.2);
67 | }
68 | details h3 {
69 | display: inline-block;
70 | border-bottom: none;
71 | }
72 | details summary {
73 | border-bottom: solid 1px rgba(0, 0, 0, 0.2);
74 | }
75 |
76 | #grid {
77 | display: grid;
78 | grid-template-columns: auto 1fr;
79 | padding: 0 5px;
80 | max-width: 100vw;
81 | overflow: hidden;
82 | }
83 | #grid aside {
84 | padding-right: 5px;
85 | }
86 | #grid main {
87 | overflow: auto;
88 | }
89 |
90 | #filter-container input:not([type="checkbox"]),
91 | #filter-container select {
92 | width: 180px;
93 | display: block;
94 | margin: 2px 0;
95 | padding: 0;
96 | }
97 | #filter-container label {
98 | font-style: italic;
99 | }
100 |
101 | /* For testing/ graph page */
102 | aside ul {
103 | margin: 0;
104 | padding: 0;
105 | }
106 | aside ul li {
107 | list-style: none;
108 | }
109 | /*
110 | .chart-container {
111 | display: flex;
112 | justify-content: center;
113 | }
114 | */
115 |
116 | .loading-data #grid,
117 | .loader {
118 | display: none;
119 | }
120 | .loading-data .loader {
121 | display: block;
122 | }
123 |
124 | /* Spinner */
125 | .loader,
126 | .loader:before,
127 | .loader:after {
128 | background: var(--heading-color);
129 | -webkit-animation: load1 1s infinite ease-in-out;
130 | animation: load1 1s infinite ease-in-out;
131 | width: 1em;
132 | height: 4em;
133 | }
134 | .loader {
135 | color: var(--heading-color);
136 | text-indent: -9999em;
137 | margin: 88px auto;
138 | position: relative;
139 | font-size: 11px;
140 | -webkit-transform: translateZ(0);
141 | -ms-transform: translateZ(0);
142 | transform: translateZ(0);
143 | -webkit-animation-delay: -0.16s;
144 | animation-delay: -0.16s;
145 | }
146 | .loader:before,
147 | .loader:after {
148 | position: absolute;
149 | top: 0;
150 | content: "";
151 | }
152 | .loader:before {
153 | left: -1.5em;
154 | -webkit-animation-delay: -0.32s;
155 | animation-delay: -0.32s;
156 | }
157 | .loader:after {
158 | left: 1.5em;
159 | }
160 | @-webkit-keyframes load1 {
161 | 0%,
162 | 80%,
163 | 100% {
164 | box-shadow: 0 0;
165 | height: 4em;
166 | }
167 | 40% {
168 | box-shadow: 0 -2em;
169 | height: 5em;
170 | }
171 | }
172 | @keyframes load1 {
173 | 0%,
174 | 80%,
175 | 100% {
176 | box-shadow: 0 0;
177 | height: 4em;
178 | }
179 | 40% {
180 | box-shadow: 0 -2em;
181 | height: 5em;
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/ui/changes/src/feature.js:
--------------------------------------------------------------------------------
1 | import * as common from "./common.js";
2 |
3 | let resultSummary = document.getElementById("result-summary");
4 | let resultGraphs = document.getElementById("result-graphs");
5 |
6 | async function renderFeatureChangesChart(chartEl, bugSummaries) {
7 | // Only show fixed bugs.
8 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null);
9 |
10 | if (bugSummaries.length == 0) {
11 | return;
12 | }
13 |
14 | let metabugs = (await common.featureMetabugs).reduce((acc, val) => {
15 | acc[val.id] = val.summary;
16 | return acc;
17 | }, {});
18 |
19 | let featureCounter = new common.Counter();
20 | for (let bugSummary of bugSummaries) {
21 | for (let bugID of bugSummary["meta_ids"]) {
22 | featureCounter[metabugs[bugID]] += 1;
23 | }
24 | }
25 |
26 | const metabug_summary_to_id = Object.entries(metabugs).reduce(
27 | (acc, [id, summary]) => {
28 | acc[summary] = id;
29 | return acc;
30 | },
31 | {}
32 | );
33 |
34 | common.renderTreemap(chartEl, `Feature metabug changes`, featureCounter, 0, {
35 | dataPointSelection: function (event, chartContext, config) {
36 | const summary = Object.keys(featureCounter)[config.dataPointIndex];
37 |
38 | const metaBugID = document.getElementById("metaBugID");
39 | metaBugID.value = metabug_summary_to_id[summary];
40 | const syntheticEvent = new Event("change");
41 | metaBugID.dispatchEvent(syntheticEvent);
42 | },
43 | });
44 | }
45 |
46 | async function renderSummary(bugSummaries) {
47 | let metaBugID = common.getOption("metaBugID");
48 |
49 | let changesets = [];
50 | if (bugSummaries.length) {
51 | changesets = bugSummaries
52 | .map((summary) => summary.commits.length)
53 | .reduce((a, b) => a + b);
54 | }
55 |
56 | let bugText = metaBugID ? `For bug ${metaBugID}: ` : ``;
57 | let summaryText = `${bugText}There are ${bugSummaries.length} bugs with ${changesets} changesets.`;
58 | resultSummary.textContent = summaryText;
59 |
60 | resultGraphs.textContent = "";
61 |
62 | let featureChangesChartEl = document.createElement("div");
63 | resultGraphs.append(featureChangesChartEl);
64 | await renderFeatureChangesChart(featureChangesChartEl, bugSummaries);
65 |
66 | let riskChartEl = document.createElement("div");
67 | resultGraphs.append(riskChartEl);
68 | await common.renderRiskChart(riskChartEl, bugSummaries);
69 |
70 | let regressionsChartEl = document.createElement("div");
71 | resultGraphs.append(regressionsChartEl);
72 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries);
73 |
74 | let timeToBugChartEl = document.createElement("div");
75 | resultGraphs.append(timeToBugChartEl);
76 | await common.renderTimeToBugChart(timeToBugChartEl, bugSummaries);
77 |
78 | let timeToConfirmChartEl = document.createElement("div");
79 | resultGraphs.append(timeToConfirmChartEl);
80 | await common.renderTimeToConfirmChart(timeToConfirmChartEl, bugSummaries);
81 | }
82 |
83 | async function renderUI(rerenderSummary = true) {
84 | const bugSummaries = await common.getFilteredBugSummaries();
85 |
86 | if (rerenderSummary) {
87 | await renderSummary(bugSummaries);
88 | }
89 |
90 | await common.renderTable(bugSummaries);
91 | }
92 |
93 | (async function init() {
94 | await common.setupOptions(renderUI);
95 |
96 | await renderUI();
97 | })();
98 |
--------------------------------------------------------------------------------
/ui/changes/src/index.js:
--------------------------------------------------------------------------------
1 | import * as common from "./common.js";
2 |
3 | let resultSummary = document.getElementById("result-summary");
4 | let resultGraphs = document.getElementById("result-graphs");
5 |
6 | async function renderSummary(bugSummaries) {
7 | let metaBugID = common.getOption("metaBugID");
8 |
9 | let changesets = [];
10 | if (bugSummaries.length) {
11 | changesets = bugSummaries
12 | .map((summary) => summary.commits.length)
13 | .reduce((a, b) => a + b);
14 | }
15 |
16 | let bugText = metaBugID ? `For bug ${metaBugID}: ` : ``;
17 | let summaryText = `${bugText}There are ${bugSummaries.length} bugs with ${changesets} changesets.`;
18 | resultSummary.textContent = summaryText;
19 |
20 | resultGraphs.textContent = "";
21 | let testingChartEl = document.createElement("div");
22 | resultGraphs.append(testingChartEl);
23 | common.renderTestingChart(testingChartEl, bugSummaries);
24 |
25 | let riskChartEl = document.createElement("div");
26 | resultGraphs.append(riskChartEl);
27 | await common.renderRiskChart(riskChartEl, bugSummaries);
28 |
29 | let regressionsChartEl = document.createElement("div");
30 | resultGraphs.append(regressionsChartEl);
31 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries);
32 |
33 | let severityChartEl = document.createElement("div");
34 | resultGraphs.append(severityChartEl);
35 | await common.renderSeverityChart(severityChartEl, bugSummaries);
36 |
37 | let fixTimesChartEl = document.createElement("div");
38 | resultGraphs.append(fixTimesChartEl);
39 | await common.renderFixTimesChart(fixTimesChartEl, bugSummaries);
40 |
41 | let timeToBugChartEl = document.createElement("div");
42 | resultGraphs.append(timeToBugChartEl);
43 | await common.renderTimeToBugChart(timeToBugChartEl, bugSummaries);
44 |
45 | let timeToConfirmChartEl = document.createElement("div");
46 | resultGraphs.append(timeToConfirmChartEl);
47 | await common.renderTimeToConfirmChart(timeToConfirmChartEl, bugSummaries);
48 | }
49 |
50 | async function renderUI(rerenderSummary = true) {
51 | const bugSummaries = await common.getFilteredBugSummaries();
52 |
53 | if (rerenderSummary) {
54 | await renderSummary(bugSummaries);
55 | }
56 |
57 | await common.renderTable(bugSummaries);
58 | }
59 |
60 | (async function init() {
61 | await common.setupOptions(renderUI);
62 |
63 | await renderUI();
64 | })();
65 |
--------------------------------------------------------------------------------
/ui/changes/src/release.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | bugbug ui - Release
6 |
7 |
11 |
12 |
13 |
14 | bugbug ui
15 |
20 |
21 |
22 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/ui/changes/src/release.js:
--------------------------------------------------------------------------------
1 | import * as common from "./common.js";
2 |
3 | let resultGraphs = document.getElementById("result-graphs");
4 |
5 | async function renderComponentChangesChart(chartEl, bugSummaries) {
6 | // Only show fixed bugs.
7 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null);
8 |
9 | if (bugSummaries.length == 0) {
10 | return;
11 | }
12 |
13 | let dimension = common.getOption("changeGrouping")[0];
14 |
15 | let componentCounter = new common.Counter();
16 | for (let bugSummary of bugSummaries) {
17 | componentCounter[bugSummary[dimension]] += 1;
18 | }
19 |
20 | common.renderTreemap(
21 | chartEl,
22 | `${dimension.charAt(0).toUpperCase()}${dimension.slice(1)} changes`,
23 | componentCounter
24 | );
25 | }
26 |
27 | async function renderAffectedComponentChangesChart(chartEl, bugSummaries) {
28 | // Only consider fixed bugs.
29 | bugSummaries = bugSummaries.filter((bugSummary) => bugSummary.date !== null);
30 |
31 | if (bugSummaries.length == 0) {
32 | return;
33 | }
34 |
35 | let componentCounter = new common.Counter();
36 | for (let bugSummary of bugSummaries) {
37 | componentCounter[bugSummary["component"]] += 1;
38 | }
39 |
40 | let componentConnectionMap =
41 | await common.getComponentDependencyMap("regressions");
42 |
43 | let affectedComponentCounter = new common.Counter();
44 | for (let [sourceComponent, count] of Object.entries(componentCounter)) {
45 | if (!componentConnectionMap.hasOwnProperty(sourceComponent)) {
46 | continue;
47 | }
48 |
49 | for (let [targetComponent, percentage] of Object.entries(
50 | componentConnectionMap[sourceComponent]
51 | )) {
52 | affectedComponentCounter[targetComponent] += count * percentage;
53 | }
54 | }
55 |
56 | common.renderTreemap(
57 | chartEl,
58 | "Most affected components",
59 | affectedComponentCounter
60 | );
61 | }
62 |
63 | async function renderUI() {
64 | resultGraphs.textContent = "";
65 |
66 | const bugSummaries = await common.getFilteredBugSummaries();
67 |
68 | let componentChangesChartEl = document.createElement("div");
69 | resultGraphs.append(componentChangesChartEl);
70 | await renderComponentChangesChart(componentChangesChartEl, bugSummaries);
71 |
72 | let affectedComponentChangesChartEl = document.createElement("div");
73 | resultGraphs.append(affectedComponentChangesChartEl);
74 | await renderAffectedComponentChangesChart(
75 | affectedComponentChangesChartEl,
76 | bugSummaries
77 | );
78 | }
79 |
80 | (async function init() {
81 | await common.setupOptions(renderUI);
82 |
83 | await renderUI();
84 | })();
85 |
--------------------------------------------------------------------------------
/ui/changes/src/team.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | bugbug ui - Team
6 |
7 |
11 |
12 |
13 |
14 | bugbug ui
15 |
20 |
21 |
102 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/ui/changes/src/team.js:
--------------------------------------------------------------------------------
1 | import { Temporal } from "@js-temporal/polyfill";
2 | import * as common from "./common.js";
3 |
4 | let resultGraphs = document.getElementById("result-graphs");
5 | const dependencySection = document.getElementById("dependency-section");
6 |
7 | async function renderUI() {
8 | resultGraphs.textContent = "";
9 | dependencySection.textContent = "";
10 |
11 | const bugSummaries = await common.getFilteredBugSummaries();
12 |
13 | let riskChartEl = document.createElement("div");
14 | resultGraphs.append(riskChartEl);
15 | await common.renderRiskChart(riskChartEl, bugSummaries);
16 |
17 | const riskListEl = await common.renderRiskList(bugSummaries);
18 | resultGraphs.append(riskListEl);
19 | resultGraphs.append(document.createElement("br"));
20 |
21 | let regressionsChartEl = document.createElement("div");
22 | resultGraphs.append(regressionsChartEl);
23 | await common.renderRegressionsChart(regressionsChartEl, bugSummaries, true);
24 |
25 | let severityChartEl = document.createElement("div");
26 | resultGraphs.append(severityChartEl);
27 | await common.renderSeverityChart(severityChartEl, bugSummaries, true);
28 |
29 | let fixTimesChartEl = document.createElement("div");
30 | resultGraphs.append(fixTimesChartEl);
31 | await common.renderFixTimesChart(fixTimesChartEl, bugSummaries);
32 |
33 | const fixTimesListEl = await common.renderFixTimesList(bugSummaries);
34 | resultGraphs.append(fixTimesListEl);
35 | resultGraphs.append(document.createElement("br"));
36 |
37 | let patchCoverageChartEl = document.createElement("div");
38 | resultGraphs.append(patchCoverageChartEl);
39 | await common.renderPatchCoverageChart(patchCoverageChartEl, bugSummaries);
40 |
41 | const patchCoverageListEl =
42 | await common.renderPatchCoverageList(bugSummaries);
43 | resultGraphs.append(patchCoverageListEl);
44 | resultGraphs.append(document.createElement("br"));
45 |
46 | let reviewTimeChartEl = document.createElement("div");
47 | resultGraphs.append(reviewTimeChartEl);
48 | await common.renderReviewTimeChart(reviewTimeChartEl, bugSummaries);
49 |
50 | const reviewTimeListEl = await common.renderReviewTimeList(bugSummaries);
51 | resultGraphs.append(reviewTimeListEl);
52 | resultGraphs.append(document.createElement("br"));
53 |
54 | let assignTimeChartEl = document.createElement("div");
55 | resultGraphs.append(assignTimeChartEl);
56 | await common.renderTimeToAssignChart(assignTimeChartEl, bugSummaries);
57 |
58 | let testFailureStatsChartEl = document.createElement("div");
59 | resultGraphs.append(testFailureStatsChartEl);
60 | await common.renderTestFailureStatsChart(testFailureStatsChartEl);
61 |
62 | const testFailureListEl = await common.renderTestFailureList();
63 | resultGraphs.append(testFailureListEl);
64 | resultGraphs.append(document.createElement("br"));
65 |
66 | let testSkipStatsChartEl = document.createElement("div");
67 | resultGraphs.append(testSkipStatsChartEl);
68 | await common.renderTestSkipStatsChart(testSkipStatsChartEl);
69 |
70 | const external_components = common.allComponents.filter(
71 | (component) => !common.getOption("components").includes(component)
72 | );
73 |
74 | const dependencyHeatmapChartEl = document.createElement("div");
75 | dependencySection.append(dependencyHeatmapChartEl);
76 | await common.renderDependencyHeatmap(
77 | dependencyHeatmapChartEl,
78 | "Dependencies from external components (columns) to selected components (rows)",
79 | external_components,
80 | common.getOption("components")
81 | );
82 | }
83 |
84 | (async function init() {
85 | let startDate = Temporal.Now.plainDateISO().subtract({ years: 1 }).toString();
86 | document.getElementById("createStartDate").value = document.getElementById(
87 | "fixStartDate"
88 | ).value = startDate;
89 |
90 | await common.setupOptions(renderUI);
91 |
92 | await renderUI();
93 | })();
94 |
--------------------------------------------------------------------------------