├── ql
    ├── .gitignore
    ├── tests
    │   └── SnippetMining
    │   │   ├── TestSnippetMining.expected
    │   │   └── TestSnippetMining.ql
    ├── qlpack.yml
    ├── codeql-pack.lock.yml
    ├── queries
    │   ├── TrivialTest.ql
    │   ├── MissingDone.ql
    │   ├── TestFixedByRetry.ql
    │   ├── FailureClassification.ql
    │   ├── UnhelpfulRefinement.ql
    │   ├── RefinerContributions.ql
    │   └── AssertionQuality.qll
    └── README.md
├── benchmark
    ├── .gitignore
    ├── tsconfig.json
    ├── testCollectorHelper.ts
    ├── package_stats.ts
    ├── generate_diversity_report.ts
    ├── performanceMeasurer.ts
    ├── editDistance.ts
    ├── generate_report.ts
    ├── run.ts
    └── testResultCollector.ts
├── .gitignore
├── .npmignore
├── test
    ├── tsconfig.json
    ├── input
    │   ├── js-fencing-1.md
    │   ├── non-lang-fencing.md
    │   ├── ts-fencing-1.md
    │   └── coffee-fencing.md
    ├── mockModel.ts
    ├── editDistance.ts
    ├── exploreAPIs.ts
    ├── APIFunction.ts
    ├── test-generation.ts
    ├── syntax.ts
    └── docSnippets.ts
├── src
    ├── tsconfig.json
    ├── completionModel.ts
    ├── testValidator.ts
    ├── index.ts
    ├── extensionPoints.ts
    ├── coverage.ts
    ├── mockModel.ts
    ├── syntax.ts
    ├── testResultCollector.ts
    ├── report.ts
    ├── generateTests.ts
    ├── snippetHelper.ts
    ├── codex.ts
    ├── mineSnippets.ts
    ├── mochaValidator.ts
    └── promptCrafting.ts
├── CODEOWNERS
├── typings
    └── espree.d.ts
├── tsconfig-base.json
├── .github
    ├── workflows
    │   ├── ci.yml
    │   ├── release.yml
    │   ├── measure-coverage.yml
    │   └── run-experiment.yml
    ├── parse_packages.js
    ├── benchmarks.txt
    └── non_trivial_coverage.sh
├── SUPPORT.md
├── LICENSE
├── SECURITY.md
├── package.json
├── CONTRIBUTING.md
├── examples
    ├── testGenerationScript.ts
    └── momentjs_test_generation.md
├── CODE_OF_CONDUCT.md
└── README.md


/ql/.gitignore:
--------------------------------------------------------------------------------
1 | .cache
2 | 


--------------------------------------------------------------------------------
/benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | *.js
2 | *.js.map
3 | 


--------------------------------------------------------------------------------
/ql/tests/SnippetMining/TestSnippetMining.expected:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | *.tgz
4 | .DS_Store


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | .github/
2 | benchmark/
3 | ql/
4 | test/
5 | 


--------------------------------------------------------------------------------
/benchmark/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig-base.json",
3 |   "include": ["*.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/test/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig-base.json",
3 |   "include": ["../typings/*.d.ts", "./**/*.ts"]
4 | }
5 | 


--------------------------------------------------------------------------------
/ql/qlpack.yml:
--------------------------------------------------------------------------------
1 | name: testpilot-experiments
2 | version: 0.1.0
3 | dependencies:
4 |   codeql/javascript-all: "*"
5 | extractor: javascript
6 | 


--------------------------------------------------------------------------------
/test/input/js-fencing-1.md:
--------------------------------------------------------------------------------
1 | ```js
2 | const vol = Volume.fromJSON({
3 |   "/app/index.js": "...",
4 |   "/app/package.json": "...",
5 | });
6 | ```
7 | 


--------------------------------------------------------------------------------
/test/input/non-lang-fencing.md:
--------------------------------------------------------------------------------
1 | ```
2 | const vol = Volume.fromJSON({
3 |   '/app/index.js': '...',
4 |   '/app/package.json': '...',
5 | });
6 | ```
7 | 


--------------------------------------------------------------------------------
/test/input/ts-fencing-1.md:
--------------------------------------------------------------------------------
1 | ```ts
2 | const vol = Volume.fromJSON({
3 |   "/app/index.js": "...",
4 |   "/app/package.json": "...",
5 | });
6 | ```
7 | 


--------------------------------------------------------------------------------
/test/input/coffee-fencing.md:
--------------------------------------------------------------------------------
1 | ```coffee
2 | const vol = Volume.fromJSON({
3 |   '/app/index.js': '...',
4 |   '/app/package.json': '...',
5 | });
6 | ```
7 | 


--------------------------------------------------------------------------------
/src/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "../tsconfig-base.json",
3 |   "compilerOptions": {
4 |     "outDir": "../dist",
5 |     "declaration": true
6 |   },
7 |   "include": ["../typings/*.d.ts", "./**/*.ts"]
8 | }
9 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # For more information, see [docs](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-syntax)
2 | 
3 | # This repository is maintained by: 
4 | * @max-schaefer @snadi @franktip
5 | 


--------------------------------------------------------------------------------
/typings/espree.d.ts:
--------------------------------------------------------------------------------
 1 | // a minimal type definition file covering only what we need
 2 | declare module "espree" {
 3 |   export interface Options {
 4 |     ecmaVersion?: number | "latest";
 5 |     loc?: boolean;
 6 |     comment?: boolean;
 7 |   }
 8 |   export function parse(code: string, options?: Options): any;
 9 | }
10 | 


--------------------------------------------------------------------------------
/ql/codeql-pack.lock.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | lockVersion: 1.0.0
 3 | dependencies:
 4 |   codeql/javascript-all:
 5 |     version: 0.6.4
 6 |   codeql/regex:
 7 |     version: 0.0.15
 8 |   codeql/tutorial:
 9 |     version: 0.0.12
10 |   codeql/util:
11 |     version: 0.0.12
12 |   codeql/yaml:
13 |     version: 0.0.4
14 | compiled: false
15 | 


--------------------------------------------------------------------------------
/tsconfig-base.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "es2018",
 4 |         "module": "NodeNext",
 5 |         "sourceMap": true,
 6 |         "experimentalDecorators": true,
 7 |         "skipLibCheck": true,
 8 |         "strict": true,
 9 |         "lib": [
10 |             "es2018"
11 |         ]
12 |     }
13 | }


--------------------------------------------------------------------------------
/src/completionModel.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * An abstract representation of a model such as Codex that can provide
 3 |  * completions for a prompt.
 4 |  */
 5 | export interface ICompletionModel {
 6 |   /**
 7 |    * Get a set of completions for the given prompt with the given sampling temperature.
 8 |    */
 9 |   completions(prompt: string, temperature: number): Promise<Set<string>>;
10 | }
11 | 


--------------------------------------------------------------------------------
/ql/queries/TrivialTest.ql:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @name Trivial test
 3 |  * @description Highlight tests that do not contain non-trivial assertions.
 4 |  */
 5 | 
 6 | import AssertionQuality
 7 | 
 8 | from GeneratedTest t, string reason
 9 | where
10 |   not exists(AssertionInGeneratedTest a | a.getFile() = t and a.isNonTrivial()) and
11 |   (
12 |     if exists(AssertionInGeneratedTest a | a.getFile() = t)
13 |     then reason = "only trivial assertions"
14 |     else reason = "no assertions"
15 |   )
16 | select t, "Test contains " + reason + "."
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Run tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [main]
 6 | 
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   run-tests:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Check out this repo
15 |         uses: actions/checkout@v3
16 | 
17 |       - name: Set up Node.js
18 |         uses: actions/setup-node@v3
19 | 
20 |       - name: Check formatting and run tests
21 |         run: |
22 |           npm run build
23 |           npm run autoformat:check
24 |           npm run test
25 | 


--------------------------------------------------------------------------------
/src/testValidator.ts:
--------------------------------------------------------------------------------
 1 | import { ICoverageSummary } from "./coverage";
 2 | import { TestOutcome } from "./report";
 3 | 
 4 | export abstract class TestValidator {
 5 |   /** Validate the given test, determining whether it passes or not. */
 6 |   public abstract validateTest(
 7 |     testName: string,
 8 |     testSource: string
 9 |   ): TestOutcome;
10 | 
11 |   /** Compute a coverage summary for all passing tests this validator has seen. */
12 |   public abstract computeCoverageSummary(): ICoverageSummary;
13 | 
14 |   /** Clean up any temporary data this validator has accumulated. */
15 |   public cleanup(): void {}
16 | }
17 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Support
 2 | 
 3 | ## How to file issues and get help
 4 | 
 5 | This project uses GitHub issues to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new issue.
 6 | 
 7 | For help or questions about using this project, please file an issue.
 8 | 
 9 | TestPilot is not actively developed but is maintained by GitHub staff and the community. We will do our best to respond to support and community questions in a timely manner.
10 | 
11 | ## GitHub Support Policy
12 | 
13 | Support for this project is limited to the resources listed above.
14 | 


--------------------------------------------------------------------------------
/test/mockModel.ts:
--------------------------------------------------------------------------------
 1 | import { expect } from "chai";
 2 | import { MockCompletionModel } from "../src/mockModel";
 3 | 
 4 | describe("test MockCompletionModel", () => {
 5 |   it("should be able to add and get completions", async () => {
 6 |     const model = new MockCompletionModel(true);
 7 |     model.addCompletions("foo", 0.5, ["bar", "baz"]);
 8 |     expect(await model.completions("foo", 0.5)).to.deep.equal(
 9 |       new Set(["bar", "baz"])
10 |     );
11 |   });
12 | 
13 |   it("should throw an error if completions are not found", async () => {
14 |     const model = new MockCompletionModel(true);
15 |     try {
16 |       await model.completions("foo", 0.5);
17 |       expect.fail();
18 |     } catch (e: any) {
19 |       expect(e.message).to.equal("Prompt not found at temperature 0.5: foo");
20 |     }
21 |   });
22 | 
23 |   it("should not throw an error if completions are not found and strictResponses is false", async () => {
24 |     const model = new MockCompletionModel(false);
25 |     expect(await model.completions("foo", 0.5)).to.deep.equal(new Set());
26 |   });
27 | });
28 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | export { Codex, PostOptions as CodexPostOptions } from "./codex";
 2 | export { ICompletionModel } from "./completionModel";
 3 | export { emptyCoverageSummary, ICoverageSummary } from "./coverage";
 4 | export { getDocSnippets } from "./docSnippets";
 5 | export {
 6 |   API,
 7 |   ApiElementDescriptor,
 8 |   APIFunction,
 9 |   exploreAPI,
10 |   findDocComments,
11 |   FunctionDescriptor,
12 | } from "./exploreAPI";
13 | export { TestGenerator } from "./generateTests";
14 | export { getSnippets } from "./mineSnippets";
15 | export { MochaValidator } from "./mochaValidator";
16 | export { MockCompletionModel } from "./mockModel";
17 | export { Prompt, RetryPrompt } from "./promptCrafting";
18 | export {
19 |   IMetaData,
20 |   ITestFailureInfo,
21 |   ITestInfo,
22 |   ITestReport,
23 |   ReportForTest,
24 |   TestOutcome,
25 |   TestStatus,
26 | } from "./report";
27 | export { trimCompletion } from "./syntax";
28 | export {
29 |   BaseTestResultCollector,
30 |   IPromptInfo,
31 |   ITestResultCollector,
32 | } from "./testResultCollector";
33 | export { TestValidator } from "./testValidator";
34 | 


--------------------------------------------------------------------------------
/ql/queries/MissingDone.ql:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @name Test failure due to missing `done` callback
 3 |  * @description Find tests that fail because they do not call the `done`
 4 |  *              callback.
 5 |  * @kind problem
 6 |  */
 7 | 
 8 | import AssertionQuality
 9 | 
10 | class TimedOutTest extends GeneratedTest {
11 |   TimedOutTest() { this.failsDueTo("TimeoutError") }
12 | 
13 |   predicate isMissingDone() {
14 |     exists(DataFlow::ParameterNode done |
15 |       done = DataFlow::globalVarRef("it").getACall().getABoundCallbackParameter(1, 0) and
16 |       done.getFile() = this and
17 |       not exists(done.getACall())
18 |     )
19 |   }
20 | }
21 | 
22 | query predicate stats(ReportJson report, int totalFailed, int totalTimeout, int totalMissingDone) {
23 |   totalFailed = count(GeneratedTest t | t = report.getATest() and t.fails()) and
24 |   totalTimeout = count(TimedOutTest t | t = report.getATest()) and
25 |   totalMissingDone = count(TimedOutTest t | t = report.getATest() and t.isMissingDone())
26 | }
27 | 
28 | from TimedOutTest t
29 | where t.isMissingDone()
30 | select t, "Test failure due to missing call to `done`."
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 GitHub, Inc
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ql/queries/TestFixedByRetry.ql:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @name Test fixed after retrying
 3 |  * @description Find failing tests that pass after having been refined
 4 |  *              with the `RetryWithError` refiner.
 5 |  * @kind problem
 6 |  */
 7 | 
 8 | import AssertionQuality
 9 | 
10 | predicate testFixedByRetry(
11 |   ReportJson report, Prompt orig, GeneratedTest failing, Prompt refined, GeneratedTest passing
12 | ) {
13 |   orig = report.getAPrompt() and
14 |   failing = orig.getATest(false, _) and
15 |   refined.isRefinedFrom(orig, failing, "RetryWithError") and
16 |   passing = refined.getATest(true, _)
17 | }
18 | 
19 | query predicate stats(
20 |   ReportJson report, ErrorCategory errorCategory, int failed, int fixed
21 | ) {
22 |   failed = count(GeneratedTest t | t = report.getATest() and t.failsDueTo(errorCategory)) and
23 |   fixed =
24 |     count(GeneratedTest t | testFixedByRetry(report, _, t, _, _) and t.failsDueTo(errorCategory))
25 | }
26 | 
27 | from Prompt orig, GeneratedTest failing, Prompt refined, GeneratedTest passing
28 | where testFixedByRetry(_, orig, failing, refined, passing)
29 | select failing, "This test was $@ by retrying.", passing, "fixed"
30 | 


--------------------------------------------------------------------------------
/ql/README.md:
--------------------------------------------------------------------------------
 1 | # Setting up and using CodeQL
 2 | 
 3 | ## Installation
 4 | 
 5 | Install the CodeQL CLI as described in the [documentation](https://docs.github.com/en/code-security/codeql-cli/getting-started-with-the-codeql-cli/setting-up-the-codeql-cli).
 6 | 
 7 | In this directory, run `codeql pack install` to install the CodeQL libraries for JavaScript.
 8 | 
 9 | ## Analyzing the results of a benchmark run
10 | 
11 | To analyze the results of a benchmark run, download the artifacts to some directory `$artifact_dir`, and then run the following command to build a database from the results in `$dbdir`:
12 | 
13 | ```sh
14 | LGTM_INDEX_FILTERS='include:**/*.json
15 |     exclude:**/coverageData/**/*.json' codeql database create --overwrite -l javascript --source-root $artifact_dir -- $dbdir
16 | ```
17 | 
18 | (Note that the environment variable `LGTM_INDEX_FILTERS` has to be set exactly as shown, with a _newline_ in between the `include:` and `exclude:` lines. Otherwise database creation will fail or result in an empty database.)
19 | 
20 | If the artifacts contain very large (>10MB) JSON files, those files will be skipped by default. To include them in the database, set the environment variable `LGTM_MAX_FILE_SIZE` to a larger value, such as `100MB`.
21 | 
22 | You can use either the CodeQL CLI or the CodeQL extension for VSCode to analyze the resulting database, using the queries in this repository.
23 | 


--------------------------------------------------------------------------------
/src/extensionPoints.ts:
--------------------------------------------------------------------------------
 1 | // function getExtensionPoints(prompt: Prompt, ast: any): Prompt[] {
 2 | //     let extensionPoints: Prompt[] = [];
 3 | //     let fixedPart = prompt.prefix + prompt.snippets + prompt.sign;
 4 | //     estraverse.traverse(ast, {
 5 | //         enter: function (node: any, parent) {
 6 | //             if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') {
 7 | //                 let cutoff = node.end - 1;
 8 | //                 if (cutoff >= 0) {
 9 | //                     extensionPoints.push({
10 | //                         prefix: prompt.prefix,
11 | //                         sign: prompt.sign,
12 | //                         snippets: prompt.snippets,
13 | //                         code: (fixedPart + prompt.code).slice(fixedPart.length, cutoff),
14 | //                         suffix: (fixedPart + prompt.code).slice(cutoff) + prompt.suffix,
15 | //                         id: prompt.id
16 | //                     });
17 | //                 }
18 | //             }
19 | //         },
20 | //         leave: function (node, parent) {
21 | //             //nothing for now
22 | //         }
23 | //     });
24 | //     // The first element in extensionPoints is for the describe function,
25 | //     // as it is the topmost function definition in the AST.
26 | //     // So we remove it to not add multiple tests.
27 | //     return extensionPoints.slice(1);
28 | // }
29 | 


--------------------------------------------------------------------------------
/ql/queries/FailureClassification.ql:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @name Test failure classification
 3 |  * @description Classify the cause of test failures.
 4 |  * @kind problem
 5 |  */
 6 | 
 7 | import AssertionQuality
 8 | 
 9 | /** Classify reasons for test failure. */
10 | predicate testFailsDueTo(ReportJson report, GeneratedTest failing, ErrorCategory errorCategory) {
11 |   failing = report.getATest() and
12 |   failing.failsDueTo(errorCategory)
13 | }
14 | 
15 | /** Compute statistics about reasons for test failure. */
16 | query predicate stats(ReportJson report, ErrorCategory category, int numFailed) {
17 |   numFailed = count(GeneratedTest t | testFailsDueTo(report, t, category))
18 | }
19 | 
20 | /**
21 |  * Consistency check: a test should be assigned a single error category iff it
22 |  * fails.
23 |  *
24 |  * This predicate should be empty.
25 |  */
26 | query predicate check(GeneratedTest t, string problem) {
27 |   t.fails() and
28 |   exists(int n | n = count(ErrorCategory err | t.failsDueTo(err)) |
29 |     n != 1 and
30 |     problem = "Test fails, but is assigned " + n + " error categories instead of one."
31 |   )
32 |   or
33 |   not t.fails() and
34 |   exists(ErrorCategory err | t.failsDueTo(err) |
35 |     problem = "Test does not fail, but is assigned error category " + err.toString() + "."
36 |   )
37 | }
38 | 
39 | from GeneratedTest failing, ErrorCategory errorCategory
40 | where testFailsDueTo(_, failing, errorCategory)
41 | select failing, "This test fails due to " + errorCategory + "."
42 | 


--------------------------------------------------------------------------------
/ql/tests/SnippetMining/TestSnippetMining.ql:
--------------------------------------------------------------------------------
 1 | import javascript
 2 | import queries.SnippetMining
 3 | import queries.NameBasedCallGraph
 4 | 
 5 | /** For this test, we want to mine calls to functions named `target`. */
 6 | class FunctionToMine extends TargetFunction {
 7 |   FunctionToMine() { this = "target" }
 8 | }
 9 | 
10 | /**
11 |  * Looks for a comment of the form `// call #n` in the same file (`path`)
12 |  * and on the same `line` as `invk`, and gets the identifier `#n`.
13 |  */
14 | string getId(InvokeExpr invk, string path, int line) {
15 |   exists(Comment c |
16 |     invk.getLocation().hasLocationInfo(path, _, _, line, _) and
17 |     c.getLocation().hasLocationInfo(path, line, _, _, _) and
18 |     result = c.getText().regexpFind("(?<=call )#\\d+", _, _)
19 |   )
20 | }
21 | 
22 | /**
23 |  * Hold if there is a comment `// relevant to call #n` on the given `line`
24 |  * in the file with the given `path`, and the method call `invk` has identifier
25 |  * `#n`.
26 |  */
27 | predicate expectedRelevantLine(InvokeExpr invk, string path, int line) {
28 |   exists(getId(invk, path, line))
29 |   or
30 |   exists(Comment c |
31 |     c.getLocation().hasLocationInfo(path, line, _, _, _) and
32 |     c.getText().regexpMatch(".*relevant to call .*" + getId(invk, _, _) + ".*")
33 |   )
34 | }
35 | 
36 | from InvokeExpr invk, string path, int line, string msg
37 | where
38 |   relevantLine(invk, path, line) and
39 |   not expectedRelevantLine(invk, path, line) and
40 |   msg = "unexpected relevant line"
41 |   or
42 |   not relevantLine(invk, path, line) and
43 |   expectedRelevantLine(invk, path, line) and
44 |   msg = "missing relevant line"
45 | select invk, msg, path, line
46 | 


--------------------------------------------------------------------------------
/src/coverage.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Represents the coverage information associated with each "category" of
 3 |  * coverage (e.g., "statements", "branches", "functions", "lines")
 4 |  */
 5 | interface ICoverageCategoryStats {
 6 |   total: number;
 7 |   covered: number;
 8 |   skipped: number;
 9 |   pct: number;
10 |   nonTrivialPct?: number;
11 | }
12 | 
13 | /**
14 |  * Represents the coverage information associated with a generated test suite,
15 |  * consisting of the coverage information for each "category" of coverage
16 |  * (e.g., "statements", "branches", "functions", "lines")
17 |  */
18 | export interface ICoverageStats {
19 |   lines: ICoverageCategoryStats;
20 |   statements: ICoverageCategoryStats;
21 |   functions: ICoverageCategoryStats;
22 |   branches: ICoverageCategoryStats;
23 |   branchesTrue: ICoverageCategoryStats;
24 | }
25 | 
26 | /**
27 |  * Represents a summary of the coverage information associated with a generated test suite,
28 |  * consisting of both the total coverage information, and similar information on a per-file basis
29 |  */
30 | export interface ICoverageSummary {
31 |   total: ICoverageStats;
32 |   [file: string]: ICoverageStats;
33 | }
34 | 
35 | /**
36 |  * An empty coverage summary object
37 |  */
38 | export function emptyCoverageSummary(): ICoverageSummary {
39 |   return {
40 |     total: {
41 |       lines: { total: 0, covered: 0, skipped: 0, pct: 0 },
42 |       statements: { total: 0, covered: 0, skipped: 0, pct: 0 },
43 |       functions: { total: 0, covered: 0, skipped: 0, pct: 0 },
44 |       branches: { total: 0, covered: 0, skipped: 0, pct: 0 },
45 |       branchesTrue: { total: 0, covered: 0, skipped: 0, pct: 0 },
46 |     },
47 |   };
48 | }
49 | 


--------------------------------------------------------------------------------
/src/mockModel.ts:
--------------------------------------------------------------------------------
 1 | import path from "path";
 2 | import { ICompletionModel } from "./completionModel";
 3 | import { readFileSync } from "fs";
 4 | 
 5 | export class MockCompletionModel implements ICompletionModel {
 6 |   private completionMap: Map<string, string[]> = new Map();
 7 | 
 8 |   constructor(private strictResponses: boolean) {}
 9 | 
10 |   static fromFile(file: string, strictResponses: boolean) {
11 |     const data = JSON.parse(readFileSync(file, "utf8"));
12 |     console.log("Loading completions from file");
13 |     const model = new MockCompletionModel(strictResponses);
14 |     for (const { file: promptFile, temperature, completions } of data.prompts) {
15 |       const prompt = readFileSync(
16 |         path.join(path.dirname(file), "prompts", promptFile),
17 |         "utf8"
18 |       );
19 |       model.addCompletions(prompt, temperature, completions);
20 |     }
21 |     return model;
22 |   }
23 | 
24 |   private key(prompt: string, temperature: number) {
25 |     return JSON.stringify([prompt, temperature]);
26 |   }
27 | 
28 |   public addCompletions(
29 |     prompt: string,
30 |     temperature: number,
31 |     completions: string[]
32 |   ) {
33 |     this.completionMap.set(this.key(prompt, temperature), completions);
34 |   }
35 | 
36 |   public async completions(
37 |     prompt: string,
38 |     temperature: number
39 |   ): Promise<Set<string>> {
40 |     const completions = this.completionMap.get(this.key(prompt, temperature));
41 |     if (!completions) {
42 |       const err = `Prompt not found at temperature ${temperature}: ${prompt}`;
43 |       if (this.strictResponses) {
44 |         throw new Error(err);
45 |       } else {
46 |         console.warn(err);
47 |       }
48 |     }
49 |     return new Set(completions);
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | Thanks for helping make GitHub safe for everyone.
 2 | 
 3 | # Security
 4 | 
 5 | GitHub takes the security of our software products and services seriously, including all of the open source code repositories managed through our GitHub organizations, such as [GitHub](https://github.com/GitHub).
 6 | 
 7 | Even though [open source repositories are outside of the scope of our bug bounty program](https://bounty.github.com/index.html#scope) and therefore not eligible for bounty rewards, we will ensure that your finding gets passed along to the appropriate maintainers for remediation. 
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | If you believe you have found a security vulnerability in any GitHub-owned repository, please report it to us through coordinated disclosure.
12 | 
13 | **Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.**
14 | 
15 | Instead, please send an email to opensource-security[@]github.com.
16 | 
17 | Please include as much of the information listed below as you can to help us better understand and resolve the issue:
18 | 
19 |   * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting)
20 |   * Full paths of source file(s) related to the manifestation of the issue
21 |   * The location of the affected source code (tag/branch/commit or direct URL)
22 |   * Any special configuration required to reproduce the issue
23 |   * Step-by-step instructions to reproduce the issue
24 |   * Proof-of-concept or exploit code (if possible)
25 |   * Impact of the issue, including how an attacker might exploit the issue
26 | 
27 | This information will help us triage your report more quickly.
28 | 
29 | ## Policy
30 | 
31 | See [GitHub's Safe Harbor Policy](https://docs.github.com/en/github/site-policy/github-bug-bounty-program-legal-safe-harbor#1-safe-harbor-terms)
32 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "testpilot",
 3 |   "version": "0.0.1",
 4 |   "description": "Test generation using large language models",
 5 |   "main": "dist/index.js",
 6 |   "author": "Max Schaefer <max-schaefer@github.com>",
 7 |   "contributors": [
 8 |     "Frank Tip <f.tip@northeastern.edu>",
 9 |     "Sarah Nadi <nadi@ualberta.ca>",
10 |     "Aryaz Eghbali <aryaz.egh@gmail.com>"
11 |   ],
12 |   "license": "MIT",
13 |   "scripts": {
14 |     "prebuild": "npm i",
15 |     "build": "tsc -p src && tsc -p benchmark",
16 |     "build:watch": "tsc --watch -p src && tsc --watch -p benchmark",
17 |     "pretest": "npm run build",
18 |     "test": "ts-mocha --forbid-only -p test/tsconfig.json test/*.ts",
19 |     "autoformat": "prettier --write src test typings benchmark examples",
20 |     "autoformat:check": "prettier --check src test/*.ts typings benchmark/*.ts examples/*.ts",
21 |     "prepack": "npm run build"
22 |   },
23 |   "devDependencies": {
24 |     "@types/adm-zip": "^0.5.0",
25 |     "@types/chai": "^4.3.1",
26 |     "@types/dedent": "^0.7.0",
27 |     "@types/deep-equal-in-any-order": "^1.0.1",
28 |     "@types/estraverse": "^5.1.1",
29 |     "@types/levenshtein": "^1.0.1",
30 |     "@types/mocha": "^9.1.1",
31 |     "@types/node": "^12.20.55",
32 |     "@types/yargs": "^17.0.10",
33 |     "chai": "^4.3.6",
34 |     "deep-equal-in-any-order": "^2.0.0",
35 |     "prettier": "^2.7.1",
36 |     "source-map-support": "^0.5.21",
37 |     "ts-mocha": "^10.0.0",
38 |     "typescript": "^4.9.3"
39 |   },
40 |   "dependencies": {
41 |     "adm-zip": "^0.5.9",
42 |     "axios": "^1.7.4",
43 |     "common-js-file-extensions": "^1.0.4",
44 |     "console-stamp": "^3.1.0",
45 |     "dedent": "^0.7.0",
46 |     "espree": "^9.3.2",
47 |     "estraverse": "^5.3.0",
48 |     "fast-glob": "^3.2.12",
49 |     "levenshtein": "^1.0.5",
50 |     "mocha": "^10.0.0",
51 |     "nyc": "^15.1.0",
52 |     "pirates": "^4.0.5",
53 |     "simple-git": "^3.16.0",
54 |     "yargs": "^17.6.2"
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/ql/queries/UnhelpfulRefinement.ql:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @name Unhelpful refinement
 3 |  * @description Find a prompt refinement where the original prompt produced
 4 |  *              a test that passed, but the refined prompt does not.
 5 |  * @kind problem
 6 |  */
 7 | 
 8 | import AssertionQuality
 9 | 
10 | predicate unhelpfulRefinement(
11 |   ReportJson report, Refiner refiner, Prompt orig, GeneratedTest passing, Prompt refined
12 | ) {
13 |   orig = report.getAPrompt() and
14 |   passing = orig.getATest(true, true) and
15 |   refined.isRefinedFrom(orig, refiner) and
16 |   not exists(refined.getATest(true, _))
17 | }
18 | 
19 | query predicate stats(string package, string refiner, int totalRefinements, int totalUnhelpful, float ratio) {
20 |   exists(ReportJson report | package = report.getPackageName() |
21 |     totalRefinements = strictcount(Prompt p | p = report.getAPrompt() and p.isRefinedFrom(_, refiner)) and
22 |     totalUnhelpful = count(Prompt p | unhelpfulRefinement(report, refiner, p, _, _)) and
23 |     ratio = totalUnhelpful.(float) / totalRefinements
24 |   )
25 |   or
26 |   package = "all" and
27 |   totalRefinements = strictcount(Prompt p | p.isRefinedFrom(_, refiner)) and
28 |   totalUnhelpful = count(Prompt p | unhelpfulRefinement(_, refiner, p, _, _)) and
29 |   ratio = totalUnhelpful.(float) / totalRefinements
30 |   or
31 |   refiner = "any" and
32 |   package = "all" and
33 |   totalRefinements = strictcount(Prompt p | p.isRefinedFrom(_, _)) and
34 |   totalUnhelpful = count(Prompt p | unhelpfulRefinement(_, _, p, _, _)) and
35 |   ratio = totalUnhelpful.(float) / totalRefinements
36 | }
37 | 
38 | from Prompt orig, GeneratedTest passing, Refiner refiner, Prompt refined
39 | where unhelpfulRefinement(_, refiner, orig, passing, refined)
40 | select orig,
41 |   "This prompt produced a $@, but after $@ with " + refiner +
42 |     " only failing tests were produced, for example $@.", passing, "passing test", refined,
43 |   "refining", refined.getATest(false, _), "this one"
44 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing
 2 | 
 3 | [fork]: https://github.com/githubnext/testpilot/fork
 4 | [pr]: https://github.com/githubnext/testpilot/compare
 5 | [code-of-conduct]: CODE_OF_CONDUCT.md
 6 | 
 7 | Hi there! We're thrilled that you'd like to contribute to this project. Your help is essential for keeping it great.
 8 | 
 9 | Contributions to this project are [released](https://help.github.com/articles/github-terms-of-service/#6-contributions-under-repository-license) to the public under the [project's open source license](LICENSE).
10 | 
11 | Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms.
12 | 
13 | ## Submitting a pull request
14 | 
15 | 1. [Fork][fork] and clone the repository
16 | 2. Configure and install the dependencies: `npm install`
17 | 3. Build the project: `npm run build`
18 | 4. Make sure the tests pass on your machine: `npm run test`
19 | 5. Make sure the code is formatted correctly: `npm run autoformat:check`; if it is not, format it: `npm run autoformat`
20 | 6. Create a new branch: `git checkout -b my-branch-name`
21 | 7. Make your change, add tests, and make sure the tests and format checks still pass
22 | 8. Push to your fork and [submit a pull request][pr]
23 | 9. Pat yourself on the back and wait for your pull request to be reviewed and merged.
24 | 
25 | Here are a few things you can do that will increase the likelihood of your pull request being accepted:
26 | 
27 | - Write tests.
28 | - Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests.
29 | - Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
30 | 
31 | ## Resources
32 | 
33 | - [How to Contribute to Open Source](https://opensource.guide/how-to-contribute/)
34 | - [Using Pull Requests](https://help.github.com/articles/about-pull-requests/)
35 | - [GitHub Help](https://help.github.com)
36 | 


--------------------------------------------------------------------------------
/.github/parse_packages.js:
--------------------------------------------------------------------------------
 1 | const { readFileSync } = require("fs");
 2 | const { join } = require("path");
 3 | 
 4 | function parsePackage(packageURL) {
 5 |   let pkg = new URL(packageURL);
 6 | 
 7 |   // pathname is /<owner>/<repo>/tree/<sha>/<path>
 8 |   // gitlab urls sometimes have an extra entry e.g., https://gitlab.com/nerd-vision/opensource/gitlab-js/tree/c2c9ef54b1ea0fc82b284bc72dc2ff0935983f4c
 9 |   const components = pkg.pathname.split("/");
10 |   if (
11 |     components.length < 5 ||
12 |     (components[3] !== "tree" && components[4] !== "tree")
13 |   ) {
14 |     throw new Error(`Invalid package URL: ${packageURL}`);
15 |   }
16 | 
17 |   var sliceIndex = 5;
18 |   var owner = components[1];
19 |   var repo = components[2];
20 |   var sha = components[4];
21 | 
22 |   if (pkg.host === "gitlab.com") {
23 |     if (components.length > 5) {
24 |       owner = components[1].concat("/", components[2]);
25 |       repo = components[3];
26 |       sha = components[5];
27 |       sliceIndex = 6;
28 |     }
29 |   }
30 | 
31 |   return {
32 |     host: pkg.host,
33 |     owner: owner,
34 |     repo: repo,
35 |     sha: sha,
36 |     path: components.slice(sliceIndex).join("/"),
37 |   };
38 | }
39 | 
40 | const packages = [];
41 | const args = process.argv.slice(2);
42 | const skipSlowBenchmarks =
43 |   args[0] === "--skip-slow-benchmarks" ? (args.shift(), true) : false;
44 | const pkg = args[0].trim();
45 | const lines = [];
46 | if (pkg.startsWith("+")) {
47 |   const file = join(__dirname, pkg.slice(1));
48 |   lines.push(...readFileSync(file, "utf8").split("\n"));
49 | } else {
50 |   lines.push(pkg);
51 | }
52 | for (const line of lines) {
53 |   if (line.startsWith("#") || line.trim() === "") continue;
54 |   if (skipSlowBenchmarks && line.includes("# slow")) continue;
55 |   const parsedPackage = parsePackage(line.split(/\s/)[0]);
56 |   // look for `dependencies: ...` in the comment
57 |   const m = line.match(/#.*dependencies:\s*(.*)/);
58 |   const deps = m ? m[1].trim() : "";
59 |   parsedPackage.dependencies = deps;
60 |   packages.push(parsedPackage);
61 | }
62 | console.log(JSON.stringify(packages));
63 | 


--------------------------------------------------------------------------------
/benchmark/testCollectorHelper.ts:
--------------------------------------------------------------------------------
 1 | /***
 2 |  * Create a unique statement id from path and start/end location for a given statement
 3 |  */
 4 | export function createUniqueStmtId(
 5 |   relpath: string,
 6 |   startLine: number,
 7 |   startColumn: number,
 8 |   endLine: number,
 9 |   endColumn: number
10 | ) {
11 |   return `${relpath}@${startLine}:${startColumn}-${endLine}:${endColumn}`;
12 | }
13 | 
14 | /**
15 |  * Get a map from statement index to unique statement id for a given file in the coverage report
16 |  * @param recordedStmtMap: the statement map recorded in the coverage report
17 |  * @param fileRelPath: the relative path of the file in the coverage report
18 |  * @returns a map from statement index to unique statement id (in same format as createUniqueStmtId)
19 |  */
20 | export function getFileStmts(recordedStmtMap: any, fileRelPath: string) {
21 |   const statementMap = new Map<string, string>();
22 |   for (const key of Object.keys(recordedStmtMap)) {
23 |     const {
24 |       start: { line: startLine, column: startColumn },
25 |       end: { line: endLine, column: endColumn },
26 |     } = recordedStmtMap[key];
27 |     const statementId = createUniqueStmtId(
28 |       fileRelPath,
29 |       startLine,
30 |       startColumn,
31 |       endLine,
32 |       endColumn
33 |     );
34 |     statementMap.set(key, statementId);
35 |   }
36 |   return statementMap;
37 | }
38 | 
39 | /**
40 |  * Get the list of statements covered from a given file in the coverage report
41 |  * @param fileCoverage: the coverage report for a given file
42 |  * @param relpath: the relative path of the file in the coverage report
43 |  * @returns a list of covered statements (in same format as createUniqueStmtId)
44 |  */
45 | export function getCoveredStmtsForFile(fileCoverage: any, relpath: string) {
46 |   const statementMap = getFileStmts(fileCoverage.statementMap, relpath);
47 |   const coveredStmtIds = [];
48 |   for (const stmtIndx of Object.keys(fileCoverage.s)) {
49 |     const isCovered = fileCoverage.s[stmtIndx];
50 |     if (isCovered) {
51 |       coveredStmtIds.push(statementMap.get(stmtIndx)!);
52 |     }
53 |   }
54 |   return coveredStmtIds;
55 | }
56 | 


--------------------------------------------------------------------------------
/examples/testGenerationScript.ts:
--------------------------------------------------------------------------------
 1 | import path from "path";
 2 | import {
 3 |   APIFunction,
 4 |   FunctionDescriptor,
 5 |   Codex,
 6 |   TestGenerator,
 7 |   MochaValidator,
 8 |   BaseTestResultCollector,
 9 | } from "./";
10 | 
11 | (async () => {
12 |   // FunctionDescriptor
13 |   const functionDescriptor: FunctionDescriptor = {
14 |     type: "function",
15 |     signature: "(amount: number, unit: string)",
16 |     isAsync: false,
17 |     implementation: `
18 |     // Pseudo-implementation for moment().add
19 |   `,
20 |     isConstructor: false,
21 |     docComment:
22 |       "Adds the specified amount of time to the moment object. The unit can be years, months, weeks, days, hours, minutes, seconds, or milliseconds. This function modifies the original moment object and returns it for chaining.",
23 |   };
24 | 
25 |   const apiFunction = new APIFunction(
26 |     "moment().add",
27 |     functionDescriptor,
28 |     "moment"
29 |   );
30 | 
31 |   // LLM
32 |   const model = new Codex(false, {
33 |     n: 5,
34 |     max_tokens: 150,
35 |     temperature: 0.7,
36 |   });
37 | 
38 |   // Validator + Collector
39 |   const momentPath = path.join(require.resolve("moment"), "../");
40 |   const validator = new MochaValidator("moment", momentPath);
41 |   const collector = new BaseTestResultCollector();
42 | 
43 |   const temperatures = [0.7];
44 |   const snippetMap = new Map([
45 |     [
46 |       apiFunction.functionName,
47 |       ["moment().add(10, 'days')", "moment().add(1, 'year').format('YYYY')"],
48 |     ],
49 |   ]);
50 | 
51 |   // TestGenerator
52 |   const generator = new TestGenerator(
53 |     temperatures,
54 |     (fn) => snippetMap.get(fn),
55 |     model,
56 |     validator,
57 |     collector
58 |   );
59 | 
60 |   // Generate the test
61 |   console.log("Generating test for moment().format()");
62 |   await generator.generateAndValidateTests(apiFunction);
63 | 
64 |   // Collect Results
65 |   const testInfos = collector.getTestInfos();
66 | 
67 |   console.log("Test generation complete. Test Details:");
68 |   testInfos.forEach((test) => {
69 |     console.log(
70 |       `Test ID: ${test.id}, Test Name: ${test.testName}, Outcome: ${test.outcome.status}`
71 |     );
72 |   });
73 | })();
74 | 


--------------------------------------------------------------------------------
/benchmark/package_stats.ts:
--------------------------------------------------------------------------------
 1 | import axios from "axios";
 2 | import * as child_process from "child_process";
 3 | import * as fs from "fs";
 4 | import * as os from "os";
 5 | import * as path from "path";
 6 | import simpleGit from "simple-git";
 7 | 
 8 | if (process.argv.length !== 3) {
 9 |   console.error("Usage: package_stats.js <package_dir>");
10 |   console.error("  package_dir: Directory containing package.json");
11 |   console.error();
12 |   console.error("This script computes statistics for a package.");
13 |   process.exit(1);
14 | }
15 | const pkgDir = process.argv[2];
16 | const packageName = JSON.parse(
17 |   fs.readFileSync(path.join(pkgDir, "package.json"), "utf8")
18 | ).name;
19 | 
20 | (async () => {
21 |   const git = simpleGit(pkgDir);
22 |   const weeklyDownloadsUrl = `https://api.npmjs.org/downloads/point/last-week/${packageName}`;
23 |   let weeklyDownloads = 0;
24 |   try {
25 |     weeklyDownloads = (await axios.get(weeklyDownloadsUrl)).data.downloads;
26 |   } catch (e) {
27 |     console.warn(`Failed to get weekly downloads for ${packageName}: ${e}`);
28 |     console.warn("Weekly downloads will be set to 0.");
29 |   }
30 |   const nyc = path.join(__dirname, "..", "node_modules", ".bin", "nyc");
31 |   const tmpdir = fs.mkdtempSync(path.join(os.tmpdir(), "package_stats"));
32 |   child_process.execFileSync(
33 |     nyc,
34 |     [
35 |       "--reporter=json-summary",
36 |       `--report-dir=${tmpdir}`,
37 |       `--temp-dir=${tmpdir}`,
38 |       "node",
39 |       "-e",
40 |       'require(".")',
41 |     ],
42 |     { cwd: pkgDir }
43 |   );
44 |   const coverageFromLoading = JSON.parse(
45 |     fs.readFileSync(path.join(tmpdir, "coverage-summary.json"), "utf8")
46 |   ).total;
47 |   const loc = coverageFromLoading.lines.total;
48 |   const repository = (await git.listRemote(["--get-url"])).trim();
49 |   const sha = (await git.revparse(["HEAD"])).trim();
50 |   console.log(
51 |     JSON.stringify(
52 |       {
53 |         packageName,
54 |         repository,
55 |         sha,
56 |         loc,
57 |         weeklyDownloads,
58 |         coverageFromLoading,
59 |       },
60 |       null,
61 |       2
62 |     )
63 |   );
64 | })().catch((e) => {
65 |   console.error(e);
66 |   process.exit(1);
67 | });
68 | 


--------------------------------------------------------------------------------
/.github/benchmarks.txt:
--------------------------------------------------------------------------------
 1 | https://github.com/jprichardson/node-fs-extra/tree/6bffcd81881ae474d3d1765be7dd389b5edfd0e0
 2 | https://github.com/jprichardson/node-jsonfile/tree/9c6478a85899a9318547a6e9514b0403166d8c5c
 3 | https://github.com/fshost/node-dir/tree/a57c3b1b571dd91f464ae398090ba40f64ba38a2
 4 | https://github.com/petkaantonov/bluebird/tree/6c8c069c34829557abfaca66d7f22383b389a4b5
 5 | https://github.com/kriskowal/q/tree/6bc7f524eb104aca8bffde95f180b5210eb8dd4b
 6 | https://github.com/isaacs/node-graceful-fs/tree/c1b377782112ae0f25b2abe561fbbea6cfb6f876
 7 | https://github.com/tildeio/rsvp.js/tree/21e0c9720e08ffa53d597c54fed17119899a9a83
 8 | https://github.com/isaacs/node-glob/tree/8315c2d576f9f3092cdc2f2cc41a398bc656035a
 9 | https://github.com/maugenst/zip-a-folder/tree/5089113647753d5086ea20f052f9d29840866ee1
10 | https://github.com/streamich/memfs/tree/ec83e6fe1f57432eac2ab61c5367ba9ec3a775a1 # slow; dependencies: typescript@4.9.5
11 | https://github.com/chakrit/node-uneval/tree/7578dc67090f650a171610a08ea529eba9d27438
12 | https://github.com/felixge/node-dirty/tree/d7fb4d4ecf0cce144efa21b674965631a7955e61
13 | https://github.com/pull-stream/pull-stream/tree/29b4868bb3864c427c3988855c5d65ad5cb2cb1c
14 | https://github.com/simple-statistics/simple-statistics/tree/31f037dd5550d554c4a96c3ee35b12e10a1c9cb7
15 | https://github.com/swang/plural/tree/f0027d66ecb37ce0108c8bcb4a6a448d1bf64047
16 | https://github.com/js-sdsl/js-sdsl/tree/055866ad5515037c724a529fecb2d3c2b35b2075
17 | https://github.com/infusion/Complex.js/tree/d995ca105e8adef4c38d0ace50643daf84e0dd1c
18 | https://github.com/quilljs/delta/tree/5ffb853d645aa5b4c93e42aa52697e2824afc869
19 | https://github.com/manuelmhtr/countries-and-timezones/tree/e34cb4b6832795cbac8d44f6f9c97eb1038b831b
20 | https://github.com/rainder/node-geo-point/tree/c839d477ff7a48d1fc6574495cbbc6196161f494
21 | https://gitlab.com/nerd-vision/opensource/gitlab-js/tree/c2c9ef54b1ea0fc82b284bc72dc2ff0935983f4c
22 | https://gitlab.com/comfort-stereo/omnitool/tree/0edf7d148337051c7c2307738423f0ff3db494c7 # slow
23 | https://gitlab.com/demsking/image-downloader/tree/19a53f652824bd0c612cc5bcd3a2eb173a16f938
24 | https://gitlab.com/autokent/crawler-url-parser/tree/202c5b25ad693d284804261e2b3815fe66e0723e
25 | https://gitlab.com/cptpackrat/spacl-core/tree/fcb8511a0d01bdc206582cfacb3e2b01a0288f6a
26 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release a new version of TestPilot
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       prerelease:
 6 |         description: >
 7 |           Create a pre-release instead of a full release.
 8 |         type: boolean
 9 |         default: true
10 |       name:
11 |         description: >
12 |           Name of the release to create. If not specified, the name of the
13 |           release will be the version number specified in the package.json file,
14 |           plus the HEAD commit SHA for pre-releases.
15 |         default: ""
16 |       description:
17 |         description: >
18 |           Description of this release.
19 |         default: ""
20 | jobs:
21 |   release:
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v2
26 | 
27 |       - name: Setup Node.js
28 |         uses: actions/setup-node@v2
29 | 
30 |       - name: Build
31 |         run: npm pack
32 | 
33 |       - name: Determine release name
34 |         run: |
35 |           if ! [ -z "${{ github.event.inputs.name }}" ]; then
36 |             release_name="${{ github.event.inputs.name }}"
37 |           else
38 |             release_name="v$(jq -r '.version' package.json)"
39 |             # if this is a pre-release, append the commit SHA
40 |             if [ "${{ github.event.inputs.prerelease }}" = "true" ]; then
41 |               release_name="${release_name}-$(git rev-parse --short HEAD)"
42 |             fi
43 |           fi
44 |           # check if a tag with this name already exists
45 |           if git rev-parse -q --verify "refs/tags/${release_name}"; then
46 |             echo "Tag ${release_name} already exists. Aborting."
47 |             exit 1
48 |           fi
49 |           echo "Release name: ${release_name}"
50 |           echo "release_name=${release_name}" >> $GITHUB_ENV
51 | 
52 |       - name: Release
53 |         uses: softprops/action-gh-release@v1
54 |         with:
55 |           name: "${{ env.release_name }}"
56 |           body: "${{ github.event.inputs.description }}"
57 |           tag_name: "${{ env.release_name }}"
58 |           prerelease: "${{ github.event.inputs.prerelease }}"
59 |           fail_on_unmatched_files: true
60 |           files: |
61 |             *.tgz
62 |         env:
63 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
64 | 


--------------------------------------------------------------------------------
/ql/queries/RefinerContributions.ql:
--------------------------------------------------------------------------------
 1 | import AssertionQuality
 2 | 
 3 | /**
 4 |  * A pseudo-refiner, that is, either a concrete refiner, "all" (meaning all
 5 |  * refiners), or "none" (meaning no refiners).
 6 |  */
 7 | class PseudoRefiner extends string {
 8 |   PseudoRefiner() {
 9 |     this instanceof Refiner or
10 |     this = "all" or
11 |     this = "none"
12 |   }
13 | }
14 | 
15 | /**
16 |  * Gets a prompt from `report` that does _not_ depend on the given `refiner`.
17 |  *
18 |  * If `refiner` is '"all"', all initial, unrefined prompts (which do not depend
19 |  * on any refiner) are returned.
20 |  * If `refiner` is '"none"', all prompts are returned.
21 |  */
22 | Prompt promptWithout(ReportJson report, PseudoRefiner refiner) {
23 |   result = report.getAPrompt() and
24 |   (
25 |     result.doesNotNeed(refiner)
26 |     or
27 |     refiner = "all" and not result.isRefinedFrom(_, _)
28 |     or
29 |     refiner = "none"
30 |   )
31 | }
32 | 
33 | GeneratedTest testWithout(
34 |   ReportJson report, PseudoRefiner refiner, boolean passes, boolean nontrivial
35 | ) {
36 |   result = promptWithout(report, refiner).getATest(passes, nontrivial)
37 | }
38 | 
39 | /**
40 |  * Gets the number of passing tests in `report` that do not depend on
41 |  * `refiner`.
42 |  */
43 | int getPassingTestsWithout(ReportJson report, PseudoRefiner refiner) {
44 |   result = count(testWithout(report, refiner, true, _))
45 | }
46 | 
47 | /**
48 |  * Gets the number of statements covered by passing tests in `report` that do
49 |  * not depend on `refiner`.
50 |  */
51 | int getStatementsCoveredWithout(ReportJson report, PseudoRefiner refiner) {
52 |   result = count(string stmtId | testWithout(report, refiner, true, _).coversStmt(stmtId))
53 | }
54 | 
55 | /**
56 |  * Gets the number of statements covered by non-trivial passing tests in
57 |  * `report` that do not depend on `refiner`.
58 |  */
59 | int getStatementsNonTriviallyCoveredWithout(ReportJson report, PseudoRefiner refiner) {
60 |   result = count(string stmtId | testWithout(report, refiner, true, true).coversStmt(stmtId))
61 | }
62 | 
63 | /**
64 |  * Computes a percentage value with two decimal places (using floor, not
65 |  * rounding, for consistency with nyc).
66 |  */
67 | bindingset[numerator, denominator]
68 | float perc(float numerator, float denominator) {
69 |   result = ((numerator / denominator * 100) * 100).floor() / 100.0
70 | }
71 | 
72 | from
73 |   ReportJson report, string refiner, int numTests, int numStatements, float passingTestPercWithout,
74 |   float coveragePercWithout, float nonTrivialCoveragePercWithout
75 | where
76 |   numTests = report.getNumberOfTests() and
77 |   numStatements = report.getNumberOfStatements() and
78 |   passingTestPercWithout = perc(getPassingTestsWithout(report, refiner), numTests) and
79 |   coveragePercWithout = perc(getStatementsCoveredWithout(report, refiner), numStatements) and
80 |   nonTrivialCoveragePercWithout =
81 |     perc(getStatementsNonTriviallyCoveredWithout(report, refiner), numStatements)
82 | select report, refiner, passingTestPercWithout, coveragePercWithout, nonTrivialCoveragePercWithout
83 | 


--------------------------------------------------------------------------------
/.github/non_trivial_coverage.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 5 | 
 6 | # Usage: non_trivial_coverage.sh <report_dir>
 7 | if [ $# -ne 1 ] || [ ! -d "$1" ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
 8 |   echo "Usage: $0 <report_dir>"
 9 |   echo "  report_dir: Directory containing coverage reports"
10 |   echo
11 |   echo "This script identifies non-trivial tests and adds corresponding coverage information to the report."
12 |   exit 1
13 | fi
14 | report_dir=$1
15 | output=$1/report.json
16 | 
17 | if [ ! -f $output ]; then
18 |   echo "No coverage report found at $output"
19 |   exit 1
20 | fi
21 | 
22 | dbdir=`mktemp -d`
23 | trap "rm -rf $dbdir" EXIT
24 | 
25 | echo "Creating database in $dbdir..."
26 | # make sure there is at least one JavaScript file to avoid extractor error
27 | echo ';' >$report_dir/dummy.js
28 | LGTM_INDEX_FILTERS='include:**/*.json
29 | exclude:**/coverageData/**/*.json' codeql database create -l javascript -s $report_dir $dbdir
30 | 
31 | echo "Running query for identifying non-trivial tests..."
32 | codeql query run --output $dbdir/TrivialTest.bqrs -d $dbdir $MY_DIR/../ql/queries/TrivialTest.ql
33 | 
34 | echo "Marking non-trivial tests and computing coverage information..."
35 | codeql bqrs decode --format csv --no-titles $dbdir/TrivialTest.bqrs | sed 's/"//g' | cut -d, -f1 | xargs -r -n 1 basename >$dbdir/trivial_tests.txt
36 | node <<EOF
37 | const fs = require('fs');
38 | const path = require('path');
39 | 
40 | const trivialTests = fs.readFileSync('$dbdir/trivial_tests.txt', 'utf8').split('\n');
41 | const report = JSON.parse(fs.readFileSync('$output', 'utf8'));
42 | 
43 | let nonTrivialTests = 0, nonTrivialPassingTests = 0;
44 | const nonTriviallyCoveredStatements = new Set();
45 | for (const test of report.tests) {
46 |   test.isTrivial = trivialTests.includes(test.testFile);
47 |   if (!test.isTrivial) {
48 |     nonTrivialTests++;
49 |     if (test.status === 'PASSED') {
50 |       nonTrivialPassingTests++;
51 |       for (const statement of test.coveredStatements) {
52 |         nonTriviallyCoveredStatements.add(statement);
53 |       }
54 |     }
55 |   }
56 | }
57 | report.stats.nrNonTrivialTests = nonTrivialTests;
58 | report.stats.nrNonTrivialPasses = nonTrivialPassingTests;
59 | report.coverage.total.statements.nonTrivialCovered = nonTriviallyCoveredStatements.size;
60 | report.coverage.total.statements.nonTrivialPct = Math.floor(nonTriviallyCoveredStatements.size / report.coverage.total.statements.total * 10000) / 100;
61 | fs.writeFileSync('$output', JSON.stringify(report, null, 2));
62 | EOF
63 | 
64 | echo "Running query for computing per-refiner statistics..."
65 | codeql query run --output $dbdir/RefinerContributions.bqrs -d $dbdir $MY_DIR/../ql/queries/RefinerContributions.ql
66 | codeql bqrs decode --format json $dbdir/RefinerContributions.bqrs | \
67 |   jq '[
68 |     .["#select"].tuples[] |
69 |     {
70 |       "key": .[1],
71 |       "value": {
72 |         "passingTests": .[2],
73 |         "coverage": .[3],
74 |         "nonTrivialCoverage": .[4]
75 |       }
76 |     }
77 |   ] | from_entries' >$1/refiners.json


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at opensource@github.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/src/syntax.ts:
--------------------------------------------------------------------------------
  1 | import * as espree from "espree";
  2 | 
  3 | /** A map from opening brackets to their corresponding closing brackets. */
  4 | const closing = new Map([
  5 |   ["(", ")"],
  6 |   ["{", "}"],
  7 |   ["[", "]"],
  8 | ]);
  9 | 
 10 | /** All closing brackets. */
 11 | const closers = new Set(closing.values());
 12 | 
 13 | /**
 14 |  * Fix the given code by adding missing closing brackets.
 15 |  *
 16 |  * @param code The incomplete code.
 17 |  * @returns Fixed code with closing brackets, or undefined if it cannot be fixed
 18 |  *          with closing brackets.
 19 |  */
 20 | export function closeBrackets(
 21 |   code: string
 22 | ): { source: string; ast: any } | undefined {
 23 |   let brackets = ""; // all outstanding closing brackets, in order
 24 |   for (let i = 0; i < code.length; ++i) {
 25 |     if (code[i] === "/" && code[i + 1] === "/") {
 26 |       // skip line comment
 27 |       const nl = code.indexOf("\n", i);
 28 |       if (nl === -1) {
 29 |         break;
 30 |       }
 31 |       i = nl;
 32 |     } else if (closing.has(code[i])) {
 33 |       // when we see an opening bracket, add the corresponding closing bracket
 34 |       brackets = closing.get(code[i]) + brackets;
 35 |     } else if (closers.has(code[i])) {
 36 |       if (brackets[0] === code[i]) {
 37 |         // closing brackets matches, so remove it
 38 |         brackets = brackets.slice(1);
 39 |       } else {
 40 |         // closing brackets does not match, so we cannot fix this code
 41 |         return undefined;
 42 |       }
 43 |     }
 44 |   }
 45 |   try {
 46 |     const ast = espree.parse(code + brackets, { ecmaVersion: "latest" });
 47 |     return { source: code + brackets, ast };
 48 |   } catch (err) {}
 49 |   return undefined;
 50 | }
 51 | 
 52 | /**
 53 |  * Trim a completion to avoid incomplete lines and extra whitespace, and make
 54 |  * sure it does not break out of enclosing syntactic scopes by closing more
 55 |  * brackets than it opens.
 56 |  *
 57 |  * @param completion The completion.
 58 |  * @returns The trimmed completion.
 59 |  */
 60 | export function trimCompletion(completion: string): string {
 61 |   let endOfLastLine = completion.includes("\n")
 62 |     ? completion.lastIndexOf("\n")
 63 |     : 0;
 64 | 
 65 |   // Avoid incomplete lines
 66 |   if (!completion.match(/[;})]\s*$/)) {
 67 |     completion = completion.slice(0, endOfLastLine);
 68 |   }
 69 | 
 70 |   // check if more brackets are closed than opened
 71 |   let stack = [];
 72 |   for (let i = 0; i < completion.length; ++i) {
 73 |     if (completion[i] === "{" || completion[i] === "(") {
 74 |       stack.push(completion[i]);
 75 |     } else if (completion[i] === "}" || completion[i] === ")") {
 76 |       if (stack.length === 0) {
 77 |         completion = completion.slice(0, i);
 78 |         break;
 79 |       }
 80 |       stack.pop();
 81 |     }
 82 |   }
 83 | 
 84 |   return completion.trim();
 85 | }
 86 | 
 87 | /**
 88 |  * Combine a function's doc comment into a single trimmed commented string
 89 |  * @param docComment the original doc comment, as extracted by exploreAPI
 90 |  * @returns the doc comment with all non-empty lines starting with // (instead of '* ')
 91 |  */
 92 | export function trimAndCombineDocComment(docComment: string): string {
 93 |   return commentOut(
 94 |     docComment
 95 |       .split("\n")
 96 |       .map((line) => line.replace("*", "").trim())
 97 |       .filter((line) => line !== "")
 98 |       .join("\n")
 99 |   );
100 | }
101 | 
102 | /**
103 |  * Comment out the given code line by line.
104 |  */
105 | export function commentOut(code: string): string {
106 |   let lines = code.split("\n");
107 |   // remove trailing empty line
108 |   if (lines[lines.length - 1] === "") {
109 |     lines.pop();
110 |   }
111 |   return lines.map((line) => `// ${line}\n`).join("");
112 | }
113 | 


--------------------------------------------------------------------------------
/src/testResultCollector.ts:
--------------------------------------------------------------------------------
  1 | import { emptyCoverageSummary, ICoverageSummary } from "./coverage";
  2 | import { Prompt } from "./promptCrafting";
  3 | import { ITestInfo, TestOutcome } from "./report";
  4 | 
  5 | export interface IPromptInfo {
  6 |   /** The prompt. */
  7 |   prompt: Prompt;
  8 |   /** A unique ID for this prompt. */
  9 |   id: number;
 10 |   /** The file to store the prompt in. */
 11 |   file: string;
 12 |   /** The sampling temperature for this prompt. */
 13 |   temperature: number;
 14 |   /** The set of completions obtained for this prompt. */
 15 |   completions: Set<string>;
 16 | }
 17 | 
 18 | export interface ITestResultCollector {
 19 |   /**
 20 |    * Record information about a test for the given API function from the given
 21 |    * prompt. If the test was already recorded, the existing test info is returned,
 22 |    * with the new prompt added to the list of prompts.
 23 |    */
 24 |   recordTestInfo(testSource: string, prompt: Prompt, api: string): ITestInfo;
 25 | 
 26 |   /**
 27 |    * Record a test result.
 28 |    *
 29 |    * @param test the test that was run
 30 |    * @param temperature the sampling temperature used to generate the test
 31 |    * @param outcome the outcome of the test
 32 |    */
 33 |   recordTestResult(
 34 |     test: ITestInfo,
 35 |     temperature: number,
 36 |     outcome: TestOutcome
 37 |   ): void;
 38 | 
 39 |   /**
 40 |    * Record information about a prompt.
 41 |    *
 42 |    * @param prompt the prompt
 43 |    * @param temperature the sampling temperature
 44 |    * @param completions the set of completions for the prompt
 45 |    */
 46 |   recordPromptInfo(
 47 |     prompt: Prompt,
 48 |     temperature: number,
 49 |     completions: Set<string>
 50 |   ): void;
 51 | 
 52 |   /**
 53 |    * Record coverage information.
 54 |    *
 55 |    * @param coverageSummary the coverage information
 56 |    */
 57 |   recordCoverageInfo(coverageSummary: ICoverageSummary): void;
 58 | }
 59 | 
 60 | export /**
 61 |  * A simple result collector that keeps track of tests and prompts, but does not
 62 |  * do anything with them.
 63 |  */
 64 | class BaseTestResultCollector implements ITestResultCollector {
 65 |   protected readonly tests: Map<string, ITestInfo> = new Map();
 66 |   protected readonly prompts: Map<Prompt, IPromptInfo> = new Map();
 67 |   protected coverageSummary: ICoverageSummary = emptyCoverageSummary();
 68 | 
 69 |   public recordTestInfo(
 70 |     testSource: string,
 71 |     prompt: Prompt,
 72 |     api: string
 73 |   ): ITestInfo {
 74 |     let testInfo = this.tests.get(testSource);
 75 |     if (testInfo) {
 76 |       testInfo.prompts.push(prompt);
 77 |     } else {
 78 |       const id = this.tests.size;
 79 |       testInfo = {
 80 |         id,
 81 |         testName: `test_${id}.js`,
 82 |         outcome: TestOutcome.OTHER,
 83 |         testSource: testSource,
 84 |         prompts: [prompt],
 85 |         api,
 86 |       };
 87 |       this.tests.set(testSource, testInfo);
 88 |     }
 89 |     return testInfo;
 90 |   }
 91 | 
 92 |   public recordTestResult(
 93 |     test: ITestInfo,
 94 |     temperature: number,
 95 |     outcome: TestOutcome
 96 |   ) {
 97 |     test.outcome = outcome;
 98 |   }
 99 | 
100 |   public recordPromptInfo(
101 |     prompt: Prompt,
102 |     temperature: number,
103 |     completions: Set<string>
104 |   ) {
105 |     const id = this.prompts.size;
106 |     const file = `prompt_${id}.js`;
107 |     this.prompts.set(prompt, { prompt, id, file, temperature, completions });
108 |   }
109 | 
110 |   public recordCoverageInfo(coverageSummary: ICoverageSummary) {
111 |     this.coverageSummary = coverageSummary;
112 |   }
113 | 
114 |   public getPromptInfos(): IPromptInfo[] {
115 |     return Array.from(this.prompts.values());
116 |   }
117 | 
118 |   public getTestInfos(): ITestInfo[] {
119 |     return Array.from(this.tests.values());
120 |   }
121 | }
122 | 


--------------------------------------------------------------------------------
/test/editDistance.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import dedent from "dedent";
  3 | import deepEqualInAnyOrder from "deep-equal-in-any-order";
  4 | import { parseTests } from "../benchmark/editDistance";
  5 | 
  6 | const chai = require("chai");
  7 | chai.use(deepEqualInAnyOrder);
  8 | 
  9 | const testFileName = "testFileName.js";
 10 | /**
 11 |  * helper function to create expected tests from an array of input tests
 12 |  * @param tests
 13 |  * @param testFileName
 14 |  * @returns Set of Test objects
 15 |  */
 16 | function createExpectedTests(tests: string[], testFileName: string) {
 17 |   const expectedTests = new Set();
 18 | 
 19 |   //add tests to expectedTests with index and fileName
 20 |   tests.forEach(function (test, index) {
 21 |     expectedTests.add({
 22 |       fileName: testFileName,
 23 |       index: index,
 24 |       contents: dedent(test),
 25 |     });
 26 |   });
 27 |   return expectedTests;
 28 | }
 29 | 
 30 | function creatTestFileContent(tests: string[]) {
 31 |   return tests
 32 |     .map(function (test) {
 33 |       return dedent(test);
 34 |     })
 35 |     .join("\n\n");
 36 | }
 37 | 
 38 | function setupAndExecuteTest(tests: string[]) {
 39 |   const testFileContent = creatTestFileContent(tests);
 40 | 
 41 |   const expectedTests = createExpectedTests(tests, testFileName);
 42 |   const extractedTests = parseTests(testFileName, testFileContent);
 43 | 
 44 |   expect(expectedTests).to.deep.equal(extractedTests);
 45 | }
 46 | 
 47 | describe("editDistance parseTests", () => {
 48 |   it("should detect multiple tests", () => {
 49 |     const tests = [
 50 |       'it("should eat its own dog food", function () {\n\n    var a = Complex(1, -5).toString();\n}) ',
 51 |       "it('test case', function(done) {\n        let complex = complex_js.ZERO.asin();\n})",
 52 |     ];
 53 | 
 54 |     setupAndExecuteTest(tests);
 55 |   });
 56 | 
 57 |   it("should handle { or ) in it description", () => {
 58 |     const tests = [
 59 |       "it(\"sends { index, value } progress updates\", function () {\n var test = '';})",
 60 |       "it( 'sends ) index, value } progress updates', function () {\n var test = '';})",
 61 |       "it('sends ( index, value } progress updates', function () {\n var test = '';})",
 62 |       "it('sends } index, value } progress updates', function () {\n var test = '';})",
 63 |     ];
 64 | 
 65 |     setupAndExecuteTest(tests);
 66 |   });
 67 | 
 68 |   it("should detect arrow functions", () => {
 69 |     const tests = [
 70 |       dedent`
 71 |         it('my test', () => {
 72 |           // should set the timeout of this test to 1000 ms; instead will fail
 73 |           this.timeout(1000);
 74 |           assert.ok(true);
 75 |         })`,
 76 |     ];
 77 | 
 78 |     setupAndExecuteTest(tests);
 79 |   });
 80 | 
 81 |   it("should not match split", () => {
 82 |     const tests = [
 83 |       dedent`
 84 |         split('my test', () => {
 85 |           // should set the timeout of this test to 1000 ms; instead will fail
 86 |           this.timeout(1000);
 87 |           assert.ok(true);
 88 |         })`,
 89 |     ];
 90 | 
 91 |     const testFileContent = creatTestFileContent(tests);
 92 |     const extractedTests = parseTests(testFileName, testFileContent);
 93 |     expect(extractedTests.size).equal(0);
 94 |   });
 95 | 
 96 |   it("should handle malformed tests", () => {
 97 |     const tests = [
 98 |       dedent`
 99 |         it('my test', () => ()`,
100 |     ];
101 | 
102 |     const testFileContent = creatTestFileContent(tests);
103 |     const extractedTests = parseTests(testFileName, testFileContent);
104 |     expect(extractedTests.size).equal(0);
105 |   });
106 | 
107 |   it("should detect jtests", () => {
108 |     const tests = [
109 |       "test('HashSet hash function test', () => { new HashMap(arr.map(x => [Math.floor(Number(x)), 1]));)}",
110 |     ];
111 |     setupAndExecuteTest(tests);
112 |   });
113 | });
114 | 


--------------------------------------------------------------------------------
/test/exploreAPIs.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import * as exploreAPI from "../src/exploreAPI";
  3 | import dedent from "dedent";
  4 | import * as espree from "espree";
  5 | import { connect } from "http2";
  6 | 
  7 | describe("test source code normalization", () => {
  8 |   it("should normalize regular functions", () => {
  9 |     const code = dedent`
 10 |         function   someNumbers () {
 11 |             yield 0; 
 12 |             yield 1; 
 13 |             yield -1;
 14 |         }
 15 |         `;
 16 |     const expected = dedent`
 17 |         function someNumbers(){yield 0;yield 1;yield-1;}
 18 |         `;
 19 |     expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected);
 20 |   });
 21 | 
 22 |   it("should normalize generator functions", () => {
 23 |     const code = dedent`
 24 |         function   *someNumbers () {
 25 |             yield 0; 
 26 |             yield 1; yield -1;
 27 |         }
 28 |         `;
 29 |     const expected = dedent`
 30 |         function*someNumbers(){yield 0;yield 1;yield-1;}
 31 |         `;
 32 |     expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected);
 33 |   });
 34 | 
 35 |   it("should normalize class methods", () => {
 36 |     const code = dedent`
 37 |          simpleMethod () {const x = 1;}
 38 |         `;
 39 |     const expected = dedent`
 40 |         simpleMethod(){const x=1;}
 41 |         `;
 42 |     expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected);
 43 |   });
 44 | 
 45 |   it("should normalize async class methods", () => {
 46 |     const code = dedent`
 47 |          async simpleMethod (foo: string) {const x = 1;}
 48 |         `;
 49 |     const expected = dedent`
 50 |         async simpleMethod(foo:string){const x=1;}
 51 |         `;
 52 |     expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected);
 53 |   });
 54 | 
 55 |   it("should normalize async generator class methods", () => {
 56 |     const code = dedent`
 57 |          async *simpleMethod (foo: string) {const x = 1;}
 58 |         `;
 59 |     const expected = dedent`
 60 |         async*simpleMethod(foo:string){const x=1;}
 61 |         `;
 62 |     expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected);
 63 |   });
 64 | });
 65 | 
 66 | describe("test finding doc comments", () => {
 67 |   it("should correctly match doc comments", () => {
 68 |     const docComment = dedent`
 69 |             /**
 70 |             * Test Doc Comment
 71 |             * @param foo a parameter
 72 |             */
 73 |         `;
 74 | 
 75 |     const function1Def = dedent`
 76 |         function simpleMethod(foo) {
 77 |             const x = 1;
 78 |         }
 79 |         `;
 80 | 
 81 |     const function2Def = dedent`
 82 |         function otherMethod(param) {
 83 |             const x = 1;
 84 |         }
 85 |         `;
 86 | 
 87 |     const code = docComment.concat("\n", function1Def, "\n", function2Def);
 88 |     const docComments = new Map<string, string>();
 89 |     exploreAPI.findDocComments(code, docComments);
 90 | 
 91 |     expect(
 92 |       docComments.get(exploreAPI.normalizeFunctionSource(function1Def))
 93 |     ).to.equal(docComment.slice(2, -2));
 94 |     expect(
 95 |       docComments.get(exploreAPI.normalizeFunctionSource(function2Def))
 96 |     ).to.equal(undefined);
 97 |   });
 98 | 
 99 |   it("should be robust against failed parsing", () => {
100 |     const docComment = dedent`
101 |             /**
102 |             * Test Doc Comment
103 |             */
104 |         `;
105 | 
106 |     const functionDef = dedent`
107 |         functoin simpleMethod(param) {
108 |             const x = 1;
109 |         }
110 |         `;
111 | 
112 |     const code = docComment.concat("\n", functionDef);
113 |     const docComments = new Map<string, string>();
114 |     exploreAPI.findDocComments(code, docComments);
115 |     expect(
116 |       docComments.get(exploreAPI.normalizeFunctionSource(functionDef))
117 |     ).to.equal(undefined);
118 |   });
119 | });
120 | 


--------------------------------------------------------------------------------
/test/APIFunction.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import { APIFunction } from "../src/exploreAPI";
  3 | 
  4 | describe("test APIFunction", () => {
  5 |   it("should be possible to construct an API function from a short access path directly", () => {
  6 |     const apiFunction = new APIFunction(
  7 |       "zip-a-folder",
  8 |       {
  9 |         type: "function",
 10 |         signature: "(srcFolder, zipFilePath)",
 11 |         isAsync: false,
 12 |         isConstructor: true,
 13 |         implementation: "",
 14 |       },
 15 |       "zip-a-folder"
 16 |     );
 17 |     expect(apiFunction.packageName).to.equal("zip-a-folder");
 18 |     expect(apiFunction.accessPath).to.equal("zip-a-folder");
 19 |     expect(apiFunction.functionName).to.equal("zip-a-folder");
 20 | 
 21 |     const sig = "class zip-a-folder(srcFolder, zipFilePath)";
 22 |     expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig));
 23 |     expect(apiFunction.signature).to.equal(sig);
 24 |   });
 25 | 
 26 |   it("should be possible to construct an API function from a typical access path directly", () => {
 27 |     const apiFunction = new APIFunction(
 28 |       "plural.addRule",
 29 |       {
 30 |         type: "function",
 31 |         signature: "(match, result)",
 32 |         isAsync: false,
 33 |         isConstructor: false,
 34 |         implementation: "",
 35 |       },
 36 |       "plural"
 37 |     );
 38 |     expect(apiFunction.packageName).to.equal("plural");
 39 |     expect(apiFunction.accessPath).to.equal("plural.addRule");
 40 |     expect(apiFunction.functionName).to.equal("addRule");
 41 | 
 42 |     const sig = "plural.addRule(match, result)";
 43 |     expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig));
 44 |     expect(apiFunction.signature).to.equal(sig);
 45 |   });
 46 | 
 47 |   it("should be possible to construct an APIFunction from a longer access path directly", () => {
 48 |     const apiFunction = new APIFunction(
 49 |       "zip-a-folder.ZipAFolder.tar",
 50 |       {
 51 |         type: "function",
 52 |         signature: "(srcFolder, tarFilePath, zipAFolderOptions)",
 53 |         isAsync: true,
 54 |         isConstructor: false,
 55 |         implementation: "",
 56 |       },
 57 |       "zip-a-folder"
 58 |     );
 59 |     expect(apiFunction.packageName).to.equal("zip-a-folder");
 60 |     expect(apiFunction.accessPath).to.equal("zip-a-folder.ZipAFolder.tar");
 61 |     expect(apiFunction.functionName).to.equal("tar");
 62 | 
 63 |     const sig =
 64 |       "zip-a-folder.ZipAFolder.tar(srcFolder, tarFilePath, zipAFolderOptions) async";
 65 |     expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig));
 66 |     expect(apiFunction.signature).to.equal(sig);
 67 |   });
 68 | 
 69 |   it("should handle invalid signatures correctly", () => {
 70 |     expect(() => APIFunction.fromSignature("")).to.throw();
 71 |     expect(() => APIFunction.fromSignature("zip-a-folder")).to.throw();
 72 |     expect(() =>
 73 |       APIFunction.fromSignature("zip-a-folder(srcFolder, zipFilePath) asnyc")
 74 |     ).to.throw();
 75 |   });
 76 | 
 77 |   it("should handle package names containing a dot correctly", () => {
 78 |     const apiFunction = new APIFunction(
 79 |       "zip.a-folder.ZipAFolder.tar",
 80 |       {
 81 |         type: "function",
 82 |         signature: "(srcFolder, tarFilePath, zipAFolderOptions)",
 83 |         isAsync: true,
 84 |         isConstructor: false,
 85 |         implementation: "",
 86 |       },
 87 |       "zip.a-folder"
 88 |     );
 89 |     expect(apiFunction.packageName).to.equal("zip.a-folder");
 90 |     expect(apiFunction.accessPath).to.equal("zip.a-folder.ZipAFolder.tar");
 91 |     expect(apiFunction.functionName).to.equal("tar");
 92 | 
 93 |     const sig =
 94 |       "zip.a-folder.ZipAFolder.tar(srcFolder, tarFilePath, zipAFolderOptions) async";
 95 |     expect(apiFunction.signature).to.equal(sig);
 96 |   });
 97 | 
 98 |   it("should be possible to serialize and deserialize API functions", () => {
 99 |     const apiFunction = APIFunction.fromSignature(
100 |       "zip-a-folder(srcFolder, zipFilePath)"
101 |     );
102 |     const serialized = JSON.stringify(apiFunction);
103 |     const deserialized = APIFunction.fromJSON(JSON.parse(serialized));
104 |     expect(deserialized).to.deep.equal(apiFunction);
105 |   });
106 | });
107 | 


--------------------------------------------------------------------------------
/benchmark/generate_diversity_report.ts:
--------------------------------------------------------------------------------
  1 | import fs from "fs";
  2 | import path from "path";
  3 | import { ITestReport } from "..";
  4 | 
  5 | function formatNum(numerator: number, denominator: number) {
  6 |   if (denominator == 0) return "--";
  7 |   return `${numerator} (${((numerator / denominator) * 100).toFixed(0)} %)`;
  8 | }
  9 | 
 10 | type CoverageStats = {
 11 |   [packageName: string]: {
 12 |     proj: string;
 13 |     numPassing: number;
 14 |     coverage: number;
 15 |     numCoveredStmts: number;
 16 |     stmtCovMap: Map<number, string[]>;
 17 |   };
 18 | };
 19 | 
 20 | function parseReports(root: string) {
 21 |   const coverageStats: CoverageStats = {};
 22 | 
 23 |   for (const proj of fs.readdirSync(root)) {
 24 |     const projDir = path.join(root, proj);
 25 |     if (!fs.lstatSync(projDir).isDirectory()) continue;
 26 | 
 27 |     const stmtCovMap = new Map(); // map from statement to list of tests covering that statement
 28 |     const reportData = JSON.parse(
 29 |       fs.readFileSync(path.join(projDir, "report.json"), "utf8")
 30 |     ) as ITestReport;
 31 |     const packageName = reportData.metaData.packageName;
 32 |     const numCoveredStmts = reportData.coverage?.total.statements?.covered ?? 0;
 33 |     const coverage = reportData.coverage?.total.statements?.pct ?? 0;
 34 |     const numPassing = reportData.stats?.nrPasses ?? 0;
 35 | 
 36 |     for (const test of reportData.tests) {
 37 |       for (const coveredStmt of test.coveredStatements ?? []) {
 38 |         if (!stmtCovMap.has(coveredStmt)) {
 39 |           stmtCovMap.set(coveredStmt, []);
 40 |         }
 41 |         stmtCovMap.get(coveredStmt).push(test.testName);
 42 |       }
 43 |     }
 44 | 
 45 |     coverageStats[packageName] = {
 46 |       proj,
 47 |       numPassing,
 48 |       coverage,
 49 |       numCoveredStmts,
 50 |       stmtCovMap,
 51 |     };
 52 |   }
 53 |   return coverageStats;
 54 | }
 55 | 
 56 | function printTestDiversityReport(title: string, coverageStats: CoverageStats) {
 57 |   console.log(`
 58 | # ${title}
 59 | 
 60 | Project| # Passing Tests| Coverage | # Covered Stmts | Avg. num tests/stmt | # Uniquely Covered Stmts | # Uniquely Covering Tests
 61 | --- | ---: | ---: | ---: | ---: | ---: | ---:`);
 62 | 
 63 |   for (const {
 64 |     proj,
 65 |     numPassing,
 66 |     coverage,
 67 |     numCoveredStmts,
 68 |     stmtCovMap,
 69 |   } of Object.values(coverageStats)) {
 70 |     const coveringTestPerStmt = Array.from(stmtCovMap.values());
 71 |     const averageTestsPerStmt = (
 72 |       coveringTestPerStmt
 73 |         .map((coveringTests) => coveringTests.length)
 74 |         .reduce((a, b) => a + b, 0) / coveringTestPerStmt.length
 75 |     ).toFixed(2);
 76 | 
 77 |     let numUniquelyCoveredStmts = 0;
 78 |     const uniquelyCoveringTests = new Set();
 79 |     for (const coveringTests of stmtCovMap.values()) {
 80 |       if (coveringTests.length == 1) {
 81 |         numUniquelyCoveredStmts++;
 82 |         uniquelyCoveringTests.add(coveringTests[0]);
 83 |       }
 84 |     }
 85 |     const numUniquelyCoveringTests = formatNum(
 86 |       uniquelyCoveringTests.size,
 87 |       numPassing
 88 |     );
 89 | 
 90 |     console.log(
 91 |       `${proj}| ${numPassing} | ${coverage}% | ${numCoveredStmts} | ${averageTestsPerStmt} |  ${numUniquelyCoveredStmts} | ${numUniquelyCoveringTests}`
 92 |     );
 93 |   }
 94 | 
 95 |   console.log(`Interpreting table:
 96 |   - First three columns are the same as the typical table we output
 97 |   - \# Covered stmts: the number of statements covered by the passing tests, from the report.json file
 98 |   - Avg num tests/stmt: for each covered statement, we find the tests that cover this statement and then calculate the average num of tests/stmt
 99 |   - \# Uniquely Covered Stmts: these are statements covered by only one test
100 |   - \# Uniquely Covering Tests: number of tests that uniquely cover at least one statement (and percentage w.r.t number of passing tests; the higher the percentage the better although 100% is unlikely)
101 |    `);
102 | }
103 | 
104 | if (require.main === module) {
105 |   if (process.argv.length != 3) {
106 |     console.error("Usage: node generate_diversity_report.js <artifact_dir>");
107 |     process.exit(1);
108 |   }
109 |   const artifactDir = process.argv[2];
110 |   let coverageStats = parseReports(artifactDir);
111 |   printTestDiversityReport(
112 |     "Diversity of Tests w.r.t Stmt Coverage",
113 |     coverageStats
114 |   );
115 | }
116 | 


--------------------------------------------------------------------------------
/.github/workflows/measure-coverage.yml:
--------------------------------------------------------------------------------
  1 | name: Measure coverage of default test suite
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       packages:
  7 |         description: "Packages to generate tests for"
  8 |         default: "+benchmarks.txt"
  9 |       debug_enabled:
 10 |         type: boolean
 11 |         description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
 12 |         default: false
 13 | 
 14 | jobs:
 15 |   setup:
 16 |     runs-on: ubuntu-latest
 17 |     outputs:
 18 |       packages: "${{ steps.parse_packages.outputs.packages }}"
 19 |     steps:
 20 |       - uses: actions/checkout@v3
 21 | 
 22 |       - uses: actions/setup-node@v3
 23 |         with:
 24 |           node-version: 12
 25 | 
 26 |       - id: parse_packages
 27 |         run: |
 28 |           packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \
 29 |             "${{ github.event.inputs.packages || '+benchmarks.txt' }}")
 30 |           echo "packages=$packages" >> $GITHUB_OUTPUT
 31 | 
 32 |   benchmark:
 33 |     needs:
 34 |       - setup
 35 |     runs-on: ubuntu-latest
 36 |     continue-on-error: true
 37 |     strategy:
 38 |       fail-fast: false
 39 |       matrix:
 40 |         package: ${{ fromJson(needs.setup.outputs.packages) }}
 41 |     steps:
 42 |       - name: Set up Node.js
 43 |         uses: actions/setup-node@v3
 44 |         with:
 45 |           node-version: 12
 46 | 
 47 |       - name: Checkout github package repo
 48 |         if: ${{ matrix.package.host == 'github.com' }}
 49 |         uses: actions/checkout@v3
 50 |         with:
 51 |           repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }}
 52 |           ref: ${{ matrix.package.sha }}
 53 |           path: "source"
 54 |       
 55 |       - name: Checkout gitlab package repo
 56 |         if: ${{ matrix.package.host == 'gitlab.com' }}
 57 |         run: |
 58 |           git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source
 59 |           cd source
 60 |           git checkout ${{ matrix.package.sha }}
 61 | 
 62 |       - name: Determine package name
 63 |         id: pkg-name
 64 |         run: |
 65 |           # name of the package
 66 |           TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name )
 67 | 
 68 |           # some packages have a / in their names (looking at you, gitlab-js!)
 69 |           if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then
 70 |             TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/}
 71 |           fi
 72 | 
 73 |           # path to the package within the repo checkout
 74 |           TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}"
 75 |           # make sure there isn't already a directory with the same name
 76 |           if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then
 77 |             echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists"
 78 |             exit 1
 79 |           fi
 80 |           # rename checkout, since some packages examine its name (looking at you, bluebird!)
 81 |           mv source $TESTPILOT_PACKAGE_NAME
 82 |           echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH"
 83 |           # export environment variables
 84 |           echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV
 85 |           echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV
 86 |           echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT
 87 | 
 88 |       - name: Install package, its dependencies, and test packages
 89 |         run: |
 90 |           cd $TESTPILOT_PACKAGE_PATH
 91 |           npm i || npm i --legacy-peer-deps
 92 |           # if matrix.package.dependencies is not empty, install them
 93 |           if ! [ -z "${{ matrix.package.dependencies }}" ]; then
 94 |             npm i ${{ matrix.package.dependencies }}
 95 |           fi
 96 |           npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack'
 97 |           npm i --no-save mocha nyc
 98 | 
 99 |       - name: Measure coverage of default test suite
100 |         run: |
101 |           cd $TESTPILOT_PACKAGE_PATH
102 |           ./node_modules/.bin/nyc npm test
103 | 
104 |       - name: Setup tmate session
105 |         uses: mxschmitt/action-tmate@v3
106 |         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
107 | 


--------------------------------------------------------------------------------
/src/report.ts:
--------------------------------------------------------------------------------
  1 | import { ICoverageSummary } from "./coverage";
  2 | import { Prompt } from "./promptCrafting";
  3 | 
  4 | export enum TestStatus {
  5 |   PASSED = "PASSED",
  6 |   FAILED = "FAILED",
  7 |   PENDING = "PENDING",
  8 |   OTHER = "OTHER",
  9 | }
 10 | 
 11 | export type TestOutcome =
 12 |   | { status: "PASSED"; coverageReport?: string; coverageData?: string }
 13 |   | { status: "PENDING" | "OTHER" }
 14 |   | { status: "FAILED"; err: ITestFailureInfo };
 15 | 
 16 | export namespace TestOutcome {
 17 |   export function PASSED(
 18 |     coverageReport?: string,
 19 |     coverageData?: string
 20 |   ): TestOutcome {
 21 |     return { status: "PASSED", coverageReport, coverageData };
 22 |   }
 23 |   export const PENDING: TestOutcome = { status: "PENDING" };
 24 |   export const OTHER: TestOutcome = { status: "OTHER" };
 25 |   export function FAILED(err: ITestFailureInfo): TestOutcome {
 26 |     return { status: "FAILED", err };
 27 |   }
 28 | }
 29 | 
 30 | export interface ITestFailureInfo {
 31 |   message: string;
 32 |   code?: string;
 33 |   stack?: string;
 34 | }
 35 | 
 36 | /**
 37 |  * Represents a test and all associated information
 38 |  */
 39 | export interface ITestInfo {
 40 |   /** The numeric ID of the test. */
 41 |   id: number;
 42 |   /** The name of the test (constructed from the ID). */
 43 |   testName: string;
 44 |   /** The outcome of the test. */
 45 |   outcome: TestOutcome;
 46 |   /** The name of the file containing the test. */
 47 |   testSource: string;
 48 |   /** The prompts that gave rise to this test. */
 49 |   prompts: Prompt[];
 50 |   /** The API method for which this test was generated. */
 51 |   api: string;
 52 | }
 53 | 
 54 | /**
 55 |  * Represents the metadata associated with a generated test suite
 56 |  */
 57 | export interface IMetaData {
 58 |   /** The name of the package under test. */
 59 |   packageName: string;
 60 |   /** Whether usage snippets were mined from documentation. */
 61 |   useDocSnippets: boolean;
 62 |   /** Whether usage snippets were mined from code. */
 63 |   useCodeSnippets: boolean;
 64 |   /** The maximum number of snippets to include in a prompt, or "all" if no limit was imposed. */
 65 |   numSnippets: number | "all";
 66 |   /** The maximum length of each snippet in lines. */
 67 |   snippetLength: number;
 68 |   /** The number of completions to obtain for each prompt. */
 69 |   numCompletions: number;
 70 | }
 71 | 
 72 | export type ReportForTest = {
 73 |   /** name of the test */
 74 |   testName: string;
 75 |   /** API method for which the test was generated */
 76 |   api: string;
 77 |   /** name of the file containing the test */
 78 |   testFile: string;
 79 |   /** IDs of the prompts that gave rise to the test */
 80 |   promptIds: number[];
 81 |   /** status of the test */
 82 |   status: TestStatus;
 83 |   /** error information if the test failed */
 84 |   err: ITestFailureInfo | {};
 85 |   /** statements covered by the test */
 86 |   coveredStatements: string[];
 87 |   /** duration of the test, if known */
 88 |   duration: number | undefined;
 89 | };
 90 | 
 91 | /**
 92 |  * Represents all test results, statistics, prompts, completions, and coverage information
 93 |  * associated with a generated test suite
 94 |  */
 95 | export interface ITestReport {
 96 |   metaData: IMetaData;
 97 |   /** total number of unique snippets available in the snippet map. */
 98 |   nrUniqueSnippets: number;
 99 |   stats: {
100 |     /** total number of tests */
101 |     nrTests: number;
102 |     /** number of passing tests */
103 |     nrPasses: number;
104 |     /** number of failing tests */
105 |     nrFailures: number;
106 |     /** number of pending tests */
107 |     nrPending: number;
108 |     /** number of other tests */
109 |     nrOther: number;
110 |     /** time taken to explore package API */
111 |     apiExplorationTime: number;
112 |     /** time taken to extract doc comments */
113 |     docCommentExtractionTime: number;
114 |     /** time taken to extract snippets */
115 |     snippetExtractionTime: number;
116 |     /** cumulative response time for all Codex queries */
117 |     codexQueryTime: number;
118 |     /** end-to-end wall-clock time (in milliseconds) taken to generate the test suite */
119 |     totalTime: number;
120 |     /** number of tests containing at least one non-trivial assertion */
121 |     nrNonTrivialTests?: number;
122 |     /** number of passing tests containing at least one non-trivial assertion */
123 |     nrNonTrivialPasses?: number;
124 |   };
125 |   tests: ReportForTest[];
126 |   coverage: ICoverageSummary;
127 | }
128 | 


--------------------------------------------------------------------------------
/benchmark/performanceMeasurer.ts:
--------------------------------------------------------------------------------
  1 | import { performance, PerformanceObserver } from "perf_hooks";
  2 | import { CodexPostOptions } from "..";
  3 | 
  4 | export class PerformanceMeasurer {
  5 |   /** Time stamp when this measurer was instantiated. */
  6 |   private readonly start: number;
  7 | 
  8 |   /** Time to explore package API, in milliseconds (includes time to extract doc comments). */
  9 |   private apiExplorationTime: number | undefined = undefined;
 10 | 
 11 |   /** Time to extract doc comments, in milliseconds. */
 12 |   private docCommentExtractionTime: number | undefined = undefined;
 13 | 
 14 |   /** Time to extract snippets, in milliseconds. */
 15 |   private snippetExtractionTime: number | undefined = undefined;
 16 | 
 17 |   /** Runtimes for generated tests in milliseconds. */
 18 |   private readonly testDurations: Map<string, number> = new Map();
 19 | 
 20 |   /**
 21 |    * Response times for requests to the Codex model together with the
 22 |    * corresponding request options.
 23 |    */
 24 |   private readonly codexQueryTimes: [CodexPostOptions, number][] = [];
 25 | 
 26 |   /** An observer for performance measurements. */
 27 |   private readonly observer = new PerformanceObserver((entries) => {
 28 |     for (const entry of entries.getEntries()) {
 29 |       if (entry.name.startsWith("duration:")) {
 30 |         // for each test `test_i.js`, we get a performance measurement `duration:test_i.js`
 31 |         const testName = entry.name.substring("duration:".length);
 32 |         if (this.testDurations.has(testName)) {
 33 |           console.warn(`Multiple durations for test ${testName}`);
 34 |         }
 35 |         this.testDurations.set(testName, entry.duration);
 36 |       } else if (entry.name.startsWith("codex-query:")) {
 37 |         // for each Codex query, we get a performance measurement `codex-query:<options>`
 38 |         const options = JSON.parse(entry.name.substring("codex-query:".length));
 39 |         // remove `logit_bias` property; it's an internal workaround
 40 |         delete options.logit_bias;
 41 |         this.codexQueryTimes.push([options, entry.duration]);
 42 |       } else if (entry.name === "snippet-extraction") {
 43 |         this.snippetExtractionTime = entry.duration;
 44 |       } else if (entry.name === "doc-comment-extraction") {
 45 |         if (this.docCommentExtractionTime === undefined) {
 46 |           this.docCommentExtractionTime = entry.duration;
 47 |         } else {
 48 |           this.docCommentExtractionTime += entry.duration;
 49 |         }
 50 |       } else if (entry.name === "api-exploration") {
 51 |         this.apiExplorationTime = entry.duration;
 52 |       }
 53 |     }
 54 |   });
 55 | 
 56 |   constructor() {
 57 |     this.start = performance.now();
 58 |     this.observer.observe({ entryTypes: ["measure"] });
 59 |   }
 60 | 
 61 |   /**
 62 |    * Get the time (in milliseconds) taken to explore package API, not
 63 |    * including time to extract doc comments.
 64 |    */
 65 |   getApiExplorationTime(): number | undefined {
 66 |     if (this.apiExplorationTime && this.docCommentExtractionTime) {
 67 |       return Math.max(
 68 |         0,
 69 |         this.apiExplorationTime - this.docCommentExtractionTime
 70 |       );
 71 |     }
 72 |     return this.apiExplorationTime;
 73 |   }
 74 | 
 75 |   /** Get the time (in milliseconds) taken to extract doc comments. */
 76 |   getDocCommentExtractionTime(): number | undefined {
 77 |     return this.docCommentExtractionTime;
 78 |   }
 79 | 
 80 |   /** Get the time (in milliseconds) taken to extract snippets. */
 81 |   getSnippetExtractionTime(): number | undefined {
 82 |     return this.snippetExtractionTime;
 83 |   }
 84 | 
 85 |   /** Get the time (in milliseconds) taken to run the given test. */
 86 |   getTestDuration(testName: string): number | undefined {
 87 |     return this.testDurations.get(testName);
 88 |   }
 89 | 
 90 |   /**
 91 |    * Get a list of response times (in milliseconds) for Codex queries
 92 |    * together with the corresponding request parameters.
 93 |    */
 94 |   getCodexQueryTimes(): [CodexPostOptions, number][] {
 95 |     return this.codexQueryTimes;
 96 |   }
 97 | 
 98 |   /** Get the cumulative response time (in milliseconds) for all Codex queries. */
 99 |   getTotalCodexQueryTime(): number {
100 |     return this.codexQueryTimes.reduce(
101 |       (sum, [, duration]) => sum + duration,
102 |       0
103 |     );
104 |   }
105 | 
106 |   /** Get the total elapsed time (in milliseconds) since this measurer was instantiated. */
107 |   getTotalTime(): number {
108 |     return performance.now() - this.start;
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/generateTests.ts:
--------------------------------------------------------------------------------
  1 | import { ICompletionModel } from "./completionModel";
  2 | import { APIFunction } from "./exploreAPI";
  3 | import {
  4 |   IPromptRefiner,
  5 |   Prompt,
  6 |   RetryWithError,
  7 |   SnippetIncluder,
  8 |   DocCommentIncluder,
  9 |   FunctionBodyIncluder,
 10 |   defaultPromptOptions,
 11 | } from "./promptCrafting";
 12 | import { ITestInfo, TestOutcome, TestStatus } from "./report";
 13 | import { SnippetMap } from "./snippetHelper";
 14 | import { ITestResultCollector } from "./testResultCollector";
 15 | import { TestValidator } from "./testValidator";
 16 | 
 17 | /**
 18 |  * Context class collecting various bits of information needed for test
 19 |  * generation.
 20 |  */
 21 | export class TestGenerator {
 22 |   private refiners: IPromptRefiner[] = [
 23 |     new SnippetIncluder(),
 24 |     new RetryWithError(),
 25 |     new DocCommentIncluder(),
 26 |     new FunctionBodyIncluder(),
 27 |   ];
 28 | 
 29 |   constructor(
 30 |     private temperatures: number[],
 31 |     private snippetMap: SnippetMap,
 32 |     private model: ICompletionModel,
 33 |     private validator: TestValidator,
 34 |     private collector: ITestResultCollector
 35 |   ) {}
 36 | 
 37 |   /**
 38 |    * Generate tests for a given function and validate them.
 39 |    */
 40 |   async generateAndValidateTests(fun: APIFunction) {
 41 |     for (const temperature of this.temperatures) {
 42 |       let generatedPassingTests = false;
 43 |       const generatedPrompts = new Map<string, Prompt>();
 44 |       const snippets = this.snippetMap(fun.functionName) ?? [];
 45 |       const worklist = [new Prompt(fun, snippets, defaultPromptOptions())];
 46 |       while (worklist.length > 0) {
 47 |         const prompt = worklist.pop()!;
 48 | 
 49 |         // check whether we've generated this prompt before; if so, record that
 50 |         // fact by updating provenance info and skip it
 51 |         const assembledPrompt = prompt.assemble();
 52 |         const previousPrompt = generatedPrompts.get(assembledPrompt);
 53 |         if (previousPrompt) {
 54 |           previousPrompt.withProvenance(...prompt.provenance);
 55 |           continue;
 56 |         }
 57 |         generatedPrompts.set(assembledPrompt, prompt);
 58 | 
 59 |         const completions = await this.model.completions(
 60 |           prompt.assemble(),
 61 |           temperature
 62 |         );
 63 |         for (const completion of completions) {
 64 |           const testInfo = this.validateCompletion(
 65 |             prompt,
 66 |             completion,
 67 |             temperature
 68 |           );
 69 |           if (testInfo.outcome.status === TestStatus.PASSED) {
 70 |             generatedPassingTests = true;
 71 |           }
 72 | 
 73 |           this.refinePrompts(prompt, completion, testInfo, worklist);
 74 |         }
 75 |         this.collector.recordPromptInfo(prompt, temperature, completions);
 76 |       }
 77 |       if (generatedPassingTests) break;
 78 |     }
 79 |   }
 80 | 
 81 |   /**
 82 |    * Build a test for the given prompt and completion, validate it, and return
 83 |    * a test info object.
 84 |    */
 85 |   public validateCompletion(
 86 |     prompt: Prompt,
 87 |     completion: string,
 88 |     temperature: number
 89 |   ) {
 90 |     const testSource = prompt.completeTest(completion);
 91 | 
 92 |     const testInfo = this.collector.recordTestInfo(
 93 |       testSource ?? completion,
 94 |       prompt,
 95 |       prompt.fun.accessPath
 96 |     );
 97 |     if (testInfo.prompts.length > 1) {
 98 |       // we have already validated this test
 99 |       return testInfo;
100 |     }
101 | 
102 |     let outcome;
103 |     if (completion === "") {
104 |       outcome = TestOutcome.FAILED({ message: "Empty test" });
105 |     } else if (testSource) {
106 |       outcome = this.validator.validateTest(
107 |         testInfo.testName,
108 |         testInfo.testSource
109 |       );
110 |     } else {
111 |       outcome = TestOutcome.FAILED({ message: "Invalid syntax" });
112 |     }
113 |     this.collector.recordTestResult(testInfo, temperature, outcome);
114 |     return testInfo;
115 |   }
116 | 
117 |   /**
118 |    * Refine the prompt based on the test outcome, and add the refined prompts
119 |    * to the worklist.
120 |    */
121 |   private refinePrompts(
122 |     prompt: Prompt,
123 |     completion: string,
124 |     testInfo: ITestInfo,
125 |     worklist: Prompt[]
126 |   ) {
127 |     for (const refiner of this.refiners) {
128 |       for (const refinedPrompt of refiner.refine(
129 |         prompt,
130 |         completion,
131 |         testInfo.outcome
132 |       )) {
133 |         const provenance = {
134 |           originalPrompt: prompt,
135 |           testId: testInfo.id,
136 |           refiner: refiner.name,
137 |         };
138 |         worklist.push(refinedPrompt.withProvenance(provenance));
139 |       }
140 |     }
141 |   }
142 | }
143 | 


--------------------------------------------------------------------------------
/src/snippetHelper.ts:
--------------------------------------------------------------------------------
  1 | import levenshtein from "levenshtein";
  2 | 
  3 | type Partition = Set<string>;
  4 | 
  5 | export type SnippetMap = (functionName: string) => string[] | undefined;
  6 | 
  7 | export class Snippets {
  8 |   /** The maximum number of snippets we can comfortably handle. */
  9 |   MAX_SNIPPETS: number;
 10 | 
 11 |   /** A cache recording Levenshtein distance between pairs of strings. */
 12 |   distanceCache: Map<string, number>;
 13 | 
 14 |   constructor() {
 15 |     this.MAX_SNIPPETS = 50;
 16 |     this.distanceCache = new Map<string, number>();
 17 |   }
 18 | 
 19 |   /**
 20 |    * Create the partitions. Initially each snippet is in its own partition.
 21 |    * @param snippets The snippets to partition.
 22 |    * @returns The partitions.
 23 |    */
 24 |   createPartitions(snippets: Set<string>): Partition[] {
 25 |     return [...snippets].map((snippet) => new Set([snippet]));
 26 |   }
 27 | 
 28 |   /**
 29 |    * Compute the Levenshtein distance between two strings, utilizing a cache.
 30 |    */
 31 |   computeDistance(a: string, b: string): number {
 32 |     // construct key for cache; this isn't injective, but it's good enough for our purposes
 33 |     const key = `${a}|||${b}`;
 34 |     if (this.distanceCache.has(key)) {
 35 |       return this.distanceCache.get(key)!;
 36 |     } else {
 37 |       const distance = new levenshtein(a, b).distance;
 38 |       this.distanceCache.set(key, distance);
 39 |       return distance;
 40 |     }
 41 |   }
 42 | 
 43 |   /**
 44 |    * Determine the lowest Levenshtein distance between elements of two partitions.
 45 |    * @param partition1 The first partition to compare.
 46 |    * @param partition2 The second partition to compare.
 47 |    * @returns The lowest Levenshtein distance between elements of the two partitions.
 48 |    */
 49 |   comparePartitions(partition1: Partition, partition2: Partition): number {
 50 |     let lowestDistance = Number.MAX_VALUE;
 51 |     partition1.forEach((snippet1) => {
 52 |       partition2.forEach((snippet2) => {
 53 |         const distance = this.computeDistance(snippet1, snippet2);
 54 |         if (distance < lowestDistance) {
 55 |           lowestDistance = distance;
 56 |         }
 57 |       });
 58 |     });
 59 |     return lowestDistance;
 60 |   }
 61 | 
 62 |   /**
 63 |    * Merge the two partitions with the lowest Levenshtein distance between them.
 64 |    * @param partitions The partitions.
 65 |    * @returns The partitions after merging.
 66 |    */
 67 |   mergeMostSimilarPartitions(partitions: Partition[]): Partition[] {
 68 |     let index1 = -1;
 69 |     let index2 = -1;
 70 |     let mostSimilarDistance = Number.MAX_VALUE;
 71 |     for (let i = 0; i < partitions.length; i++) {
 72 |       for (let j = i + 1; j < partitions.length; j++) {
 73 |         const distance = this.comparePartitions(partitions[i], partitions[j]);
 74 |         if (distance < mostSimilarDistance) {
 75 |           index1 = i;
 76 |           index2 = j;
 77 |           mostSimilarDistance = distance;
 78 |         }
 79 |       }
 80 |     }
 81 |     if (index1 !== -1 && index2 !== -1) {
 82 |       const mergedPartition = new Set([
 83 |         ...partitions[index1],
 84 |         ...partitions[index2],
 85 |       ]);
 86 |       partitions.splice(Math.max(index1, index2), 1); // make sure to remove the element at the larger index first
 87 |       partitions.splice(Math.min(index1, index2), 1);
 88 |       partitions.push(mergedPartition);
 89 | 
 90 |       index1 = -1;
 91 |       index2 = -1;
 92 |     } else {
 93 |       throw new Error();
 94 |     }
 95 |     return partitions;
 96 |   }
 97 | 
 98 |   /**
 99 |    * Select a set of representative snippets. This is done by grouping
100 |    * the snippets into partitions so that the elements of each partition
101 |    * are as similar as possible, and then selecting the smallest snippet
102 |    * from each partition.
103 |    * @param snippets The snippets to select representatives for.
104 |    * @returns The selected snippets.
105 |    */
106 |   selectSnippets(snippets: Set<string>, n: number): Set<string> {
107 |     // create partitions: initially, each snippet is in its own partition
108 |     let partitions = this.createPartitions(snippets);
109 | 
110 |     // while we have too many partitions, merge the most similar ones
111 |     while (partitions.length > n) {
112 |       partitions = this.mergeMostSimilarPartitions(partitions);
113 |     }
114 | 
115 |     // find shortest snippet in each partition and add it to the selected snippets
116 |     const selectedSnippets = new Set<string>();
117 |     for (let i = 0; i < partitions.length; i++) {
118 |       let shortestSnippet = "";
119 |       let shortestSnippetLength = Number.MAX_VALUE;
120 |       partitions[i].forEach((snippet) => {
121 |         if (snippet.length < shortestSnippetLength) {
122 |           shortestSnippet = snippet;
123 |           shortestSnippetLength = snippet.length;
124 |         }
125 |       });
126 |       selectedSnippets.add(shortestSnippet);
127 |     }
128 |     return selectedSnippets;
129 |   }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/codex.ts:
--------------------------------------------------------------------------------
  1 | import axios from "axios";
  2 | import fs from "fs";
  3 | import { performance } from "perf_hooks";
  4 | import { ICompletionModel } from "./completionModel";
  5 | import { trimCompletion } from "./syntax";
  6 | 
  7 | const defaultPostOptions = {
  8 |   max_tokens: 100, // maximum number of tokens to return
  9 |   temperature: 0, // sampling temperature; higher values increase diversity
 10 |   n: 5, // number of completions to return
 11 |   top_p: 1, // no need to change this
 12 | };
 13 | export type PostOptions = Partial<typeof defaultPostOptions>;
 14 | 
 15 | function getEnv(name: string): string {
 16 |   const value = process.env[name];
 17 |   if (!value) {
 18 |     console.error(`Please set the ${name} environment variable.`);
 19 |     process.exit(1);
 20 |   }
 21 |   return value;
 22 | }
 23 | 
 24 | export class Codex implements ICompletionModel {
 25 |   private readonly apiEndpoint: string;
 26 |   private readonly authHeaders: string;
 27 | 
 28 |   constructor(
 29 |     private readonly isStarCoder: boolean,
 30 |     private readonly instanceOptions: PostOptions = {}
 31 |   ) {
 32 |     this.apiEndpoint = this.isStarCoder
 33 |       ? getEnv("STARCODER_API_ENDPOINT")
 34 |       : getEnv("TESTPILOT_LLM_API_ENDPOINT");
 35 |     this.authHeaders = this.isStarCoder
 36 |       ? "{}"
 37 |       : getEnv("TESTPILOT_LLM_AUTH_HEADERS");
 38 |     console.log(
 39 |       `Using ${this.isStarCoder ? "StarCoder" : "GPT"} API at ${
 40 |         this.apiEndpoint
 41 |       }`
 42 |     );
 43 |   }
 44 | 
 45 |   /**
 46 |    * Query Codex for completions with a given prompt.
 47 |    *
 48 |    * @param prompt The prompt to use for the completion.
 49 |    * @param requestPostOptions The options to use for the request.
 50 |    * @returns A promise that resolves to a set of completions.
 51 |    */
 52 |   public async query(
 53 |     prompt: string,
 54 |     requestPostOptions: PostOptions = {}
 55 |   ): Promise<Set<string>> {
 56 |     const headers = {
 57 |       "Content-Type": "application/json",
 58 |       ...JSON.parse(this.authHeaders),
 59 |     };
 60 |     const options = {
 61 |       ...defaultPostOptions,
 62 |       // options provided to constructor override default options
 63 |       ...this.instanceOptions,
 64 |       // options provided to this function override default and instance options
 65 |       ...requestPostOptions,
 66 |     };
 67 | 
 68 |     performance.mark("codex-query-start");
 69 | 
 70 |     const postOptions = this.isStarCoder
 71 |       ? {
 72 |           inputs: prompt,
 73 |           parameters: {
 74 |             max_new_tokens: options.max_tokens,
 75 |             temperature: options.temperature || 0.01, // StarCoder doesn't allow 0
 76 |             n: options.n,
 77 |           },
 78 |         }
 79 |       : {
 80 |           prompt,
 81 |           ...options,
 82 |         };
 83 | 
 84 |     const res = await axios.post(this.apiEndpoint, postOptions, { headers });
 85 | 
 86 |     performance.measure(
 87 |       `codex-query:${JSON.stringify({
 88 |         ...options,
 89 |         promptLength: prompt.length,
 90 |       })}`,
 91 |       "codex-query-start"
 92 |     );
 93 |     if (res.status !== 200) {
 94 |       throw new Error(
 95 |         `Request failed with status ${res.status} and message ${res.statusText}`
 96 |       );
 97 |     }
 98 |     if (!res.data) {
 99 |       throw new Error("Response data is empty");
100 |     }
101 |     const json = res.data;
102 |     if (json.error) {
103 |       throw new Error(json.error);
104 |     }
105 |     let numContentFiltered = 0;
106 |     const completions = new Set<string>();
107 |     if (this.isStarCoder) {
108 |       completions.add(json.generated_text);
109 |     } else {
110 |       for (const choice of json.choices || [{ text: "" }]) {
111 |         if (choice.finish_reason === "content_filter") {
112 |           numContentFiltered++;
113 |         }
114 |         completions.add(choice.text);
115 |       }
116 |     }
117 |     if (numContentFiltered > 0) {
118 |       console.warn(
119 |         `${numContentFiltered} completions were truncated due to content filtering.`
120 |       );
121 |     }
122 |     return completions;
123 |   }
124 | 
125 |   /**
126 |    * Get completions from Codex and postprocess them as needed; print a warning if it did not produce any
127 |    *
128 |    * @param prompt the prompt to use
129 |    */
130 |   public async completions(
131 |     prompt: string,
132 |     temperature: number
133 |   ): Promise<Set<string>> {
134 |     try {
135 |       let result = new Set<string>();
136 |       for (const completion of await this.query(prompt, { temperature })) {
137 |         result.add(trimCompletion(completion));
138 |       }
139 |       return result;
140 |     } catch (err: any) {
141 |       console.warn(`Failed to get completions: ${err.message}`);
142 |       return new Set<string>();
143 |     }
144 |   }
145 | }
146 | 
147 | if (require.main === module) {
148 |   (async () => {
149 |     const codex = new Codex(false);
150 |     const prompt = fs.readFileSync(0, "utf8");
151 |     const responses = await codex.query(prompt, { n: 1 });
152 |     console.log([...responses][0]);
153 |   })().catch((err) => {
154 |     console.error(err);
155 |     process.exit(1);
156 |   });
157 | }
158 | 


--------------------------------------------------------------------------------
/examples/momentjs_test_generation.md:
--------------------------------------------------------------------------------
  1 | # Example: Test Generation for Moment.js Function
  2 | 
  3 | This example demonstrates the process of generating tests for the **`moment().add`** function in Moment.js using a custom test generation framework.
  4 | 
  5 | ## **Importing Dependencies**
  6 | 
  7 | ```typescript
  8 | import path from "path";
  9 | import {
 10 |   APIFunction,
 11 |   FunctionDescriptor,
 12 |   Codex,
 13 |   TestGenerator,
 14 |   MochaValidator,
 15 |   BaseTestResultCollector,
 16 | } from "./";
 17 | ```
 18 | 
 19 | Imports necessary libraries and modules, including the test generation and validation classes.
 20 | 
 21 | ## **Defining the Function Descriptor**
 22 | 
 23 | ```typescript
 24 | const functionDescriptor: FunctionDescriptor = {
 25 |   type: "function",
 26 |   signature: "(amount: number, unit: string)",
 27 |   isAsync: false,
 28 |   implementation: `
 29 |   // Pseudo-implementation for moment().add
 30 |   `,
 31 |   isConstructor: false,
 32 |   docComment:
 33 |     "Adds the specified amount of time to the moment object. The unit can be years, months, weeks, days, hours, minutes, seconds, or milliseconds. This function modifies the original moment object and returns it for chaining.",
 34 | };
 35 | ```
 36 | 
 37 | Describes the function being tested, including its signature and a brief documentation.
 38 | 
 39 | ## **Initializing the Test Generator Components**
 40 | 
 41 | ```typescript
 42 | const apiFunction = new APIFunction("moment().add", functionDescriptor, "moment");
 43 | const model = new Codex(false, {
 44 |   n: 5,
 45 |   max_tokens: 150,
 46 |   temperature: 0.7,
 47 | });
 48 | const momentPath = path.join(require.resolve("moment"), "../");
 49 | const validator = new MochaValidator("moment", momentPath);
 50 | const collector = new BaseTestResultCollector();
 51 | const temperatures = [0.7];
 52 | const snippetMap = new Map([
 53 |   [apiFunction.functionName, ["moment().add(10, 'days')", "moment().add(1, 'year').format('YYYY')"]],
 54 | ]);
 55 | const generator = new TestGenerator(temperatures, (fn) => snippetMap.get(fn), model, validator, collector);
 56 | ```
 57 | 
 58 | Initializes the object that makes prompts to the Codex-based completion API, and sets up paths and validators for test generation. Used `https://api.openai.com/v1/engines/gpt-3.5-turbo-instruct/completions`.
 59 | 
 60 | ## **Test Generation and Collection**
 61 | 
 62 | ```tsx
 63 | console.log("Generating test for moment().format()");
 64 | await generator.generateAndValidateTests(apiFunction);
 65 | const testInfos = collector.getTestInfos();
 66 | console.log("Test generation complete. Test Details:");
 67 | testInfos.forEach((test) => {
 68 |   console.log(`Test ID: ${test.id}, Test Name: ${test.testName}, Outcome: ${test.outcome.status}`);
 69 | });
 70 | 
 71 | ```
 72 | 
 73 | Generates tests and logs the results to the console.
 74 | 
 75 | ## **Note on Test File Management**
 76 | 
 77 | By default, the test files are temporarily stored in the **`node_modules/<library>/`** directory and are erased after testing. To change this behavior and save the test files, you can implement custom versions of the **`MochaValidator`** to a file saving version as shown below:
 78 | 
 79 | ```typescript
 80 | class CustomMochaValidator extends MochaValidator {
 81 |   constructor(packageName, packagePath, testDirectory) {
 82 |     super(packageName, packagePath);
 83 |     this.testDirectory = testDirectory; // Custom directory for saving test files
 84 |     // Ensure the directory exists
 85 |     if (!fs.existsSync(this.testDirectory)) {
 86 |       fs.mkdirSync(this.testDirectory, { recursive: true });
 87 |     }
 88 |   }
 89 | 
 90 |   validateTest(testName, testSource) {
 91 |     let testFile = path.join(this.testDirectory, testName + '.js');
 92 |     fs.writeFileSync(testFile, testSource);
 93 |     console.log(`Test saved to: ${testFile}`); // Log where the test is saved
 94 |     // Call original validateTest logic here if needed, or simulate a test outcome
 95 |     return { status: 'PASSED' }; // Simulate a passed test outcome
 96 |   }
 97 | 
 98 |   // Override the cleanup to prevent deletion
 99 |   cleanup() {
100 |     console.log('Cleanup skipped, tests preserved.');
101 |   }
102 | }
103 | ```
104 | 
105 | > OBS: The `CustomMochaValidator` implementation above is just an idea. It was not tested, unlike the code before.
106 | 
107 | ## Running the script
108 | 
109 | The code shown in this example is at `/examples/testGenerationScript.ts`, but it will not run by default. To run the test generation script follow the below steps:
110 | 
111 | 1. Copy `testGenerationScript.ts` to `src/`, making sure that the second import directory is `./`
112 | 
113 |     ```sh
114 |     cp examples/testGenerationScript.ts src/
115 |     ```
116 | 
117 | 2. Install Moment.js with `npm`
118 | 
119 |     ```sh
120 |     npm install moment
121 |     ```
122 | 
123 | 3. Build the files again
124 | 
125 |     ```sh
126 |     npm run build
127 |     ```
128 | 
129 | 4. Finally, set the environment variables and run the script with `node`:
130 | 
131 |     ```sh
132 |     export TESTPILOT_LLM_API_ENDPOINT='https://api.openai.com/v1/engines/gpt-3.5-turbo-instruct/completions'
133 |     export TESTPILOT_LLM_AUTH_HEADERS='{"Authorization": "Bearer <your API key>", "OpenAI-Organization": "<your organization ID>"}'
134 |     node dist/testGenerationScript.js
135 |     ```
136 | 


--------------------------------------------------------------------------------
/test/test-generation.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import dedent from "dedent";
  3 | import fs from "fs";
  4 | import path from "path";
  5 | import { MochaValidator } from "../src/mochaValidator";
  6 | import { TestStatus } from "../src/report";
  7 | 
  8 | describe("MochaValidator", function () {
  9 |   this.timeout(10000);
 10 | 
 11 |   function check(tests: string[], expectedOutcomes: TestStatus[]) {
 12 |     const testDir = fs.mkdtempSync(path.join(".", "test-"));
 13 |     const validator = new MochaValidator("", testDir);
 14 |     try {
 15 |       for (let i = 0; i < tests.length; i++) {
 16 |         const testName = `test_${i}.js`;
 17 |         const valid = validator.validateTest(testName, tests[i]);
 18 |         expect(valid.status).to.equal(expectedOutcomes[i]);
 19 |       }
 20 |     } finally {
 21 |       fs.rmdirSync(testDir, { recursive: true });
 22 |       validator.cleanup();
 23 |     }
 24 |   }
 25 | 
 26 |   it("should run tests and report pass", () => {
 27 |     let tests = [
 28 |       dedent`let mocha = require('mocha');
 29 |                     let assert = require('assert');
 30 |                     describe('test', () => {
 31 |                         it('test', () => {
 32 |                             assert([1, 2, 3].length === 3);
 33 |                         });
 34 |                     });`,
 35 |       dedent`let mocha = require('mocha');
 36 |                     let expect = require('chai').expect;
 37 |                     describe('test', () => {
 38 |                         it('test', () => {
 39 |                             expect([1, 2, 3, 4, 5].slice(1, 3)).to.eql([2, 3]);
 40 |                         });
 41 |                     });`,
 42 |     ];
 43 |     check(tests, [TestStatus.PASSED, TestStatus.PASSED]);
 44 |   });
 45 | 
 46 |   it("should run tests and report fail", () => {
 47 |     let tests = [
 48 |       dedent`let mocha = require('mocha');
 49 |                     let assert = require('assert');
 50 |                     describe('test', () => {
 51 |                         it('test', () => {
 52 |                             assert([1, 2, 3].length === 2);
 53 |                         });
 54 |                     });`,
 55 |       dedent`let mocha = require('mocha');
 56 |                     let expect = require('chai').expect;
 57 |                     describe('test', () => {
 58 |                         it('test', () => {
 59 |                             expect([1, 2, 3, 4, 5].slice(1, 3)).to.eql([3, 4]);
 60 |                         });
 61 |                     });`,
 62 |     ];
 63 |     check(tests, [TestStatus.FAILED, TestStatus.FAILED]);
 64 |   });
 65 | 
 66 |   it("should correctly classify a test reported as both passing and failing by Mocha", () => {
 67 |     let test = dedent`
 68 |             const fs = require('fs');
 69 |             describe('test fs', function() {
 70 |                 it('test fs.ReadStream.prototype.push', function(done) {
 71 |                     let rs = fs.createReadStream(__filename);
 72 |                     rs.push("hello world");
 73 |                     rs.on("data", () => done());
 74 |                 })
 75 |             })
 76 |         `;
 77 |     check([test], [TestStatus.FAILED]);
 78 |   });
 79 | 
 80 |   it.skip("should correctly classify another test reported as both passing and failing by Mocha", () => {
 81 |     let test = dedent`
 82 |             describe('test fs', function() {
 83 |                 it('test fs.ReadStream', function(done) {
 84 |                     new require('fs').ReadStream('/i/absolutely/do/not/exist');
 85 |                     done();
 86 |                 })
 87 |             })
 88 |         `;
 89 |     check([test], [TestStatus.FAILED]);
 90 |   });
 91 | 
 92 |   it("should be robust against Mocha crashing and not producing a report", () => {
 93 |     let test = "describe('totally broken test', function() {)";
 94 |     check([test], [TestStatus.FAILED]);
 95 |   });
 96 | 
 97 |   it("should be robust against non-terminating tests (this test takes about five seconds)", () => {
 98 |     let test = dedent`
 99 |             let assert = require('assert');
100 |             let glob = require('glob');
101 |             describe('test glob', function() {
102 |                 it('test glob.Glob.prototype.setMaxListeners', function(done) {
103 |                     glob.Glob.prototype.setMaxListeners(2);
104 |                     let p = glob.Glob("./**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/*/index.js", {nodir: true}, (err, files) => {
105 |                         console.log("end");
106 |                         console.log(p);
107 |                     })
108 |                 })
109 |             })
110 |         `;
111 |     check([test], [TestStatus.FAILED]);
112 |   }).timeout(6000);
113 | 
114 |   it("should not classify a test as failing simply because it prints an error message to stderr", () => {
115 |     let test = dedent`
116 |             let assert = require('assert');
117 |             describe('test', function() {
118 |                 it('test', function(done) {
119 |                     console.error("Error: hello world");
120 |                     assert(true);
121 |                     done();
122 |                 })
123 |             })
124 |         `;
125 |     check([test], [TestStatus.PASSED]);
126 |   });
127 | });
128 | 


--------------------------------------------------------------------------------
/test/syntax.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import dedent from "dedent";
  3 | import { closeBrackets, commentOut, trimCompletion } from "../src/syntax";
  4 | 
  5 | describe("test closeBrackets", function () {
  6 |   it("should handle syntactically invalid code", function () {
  7 |     expect(
  8 |       closeBrackets(dedent`
  9 |             function f({) {
 10 |                 return 1;
 11 |             }
 12 |         `)
 13 |     ).to.be.undefined;
 14 |   });
 15 | 
 16 |   it("should handle code that closes more brackets than it opens", function () {
 17 |     expect(
 18 |       closeBrackets(dedent`
 19 |             function f() {
 20 |                 return 1;
 21 |             }}
 22 |         `)
 23 |     ).to.be.undefined;
 24 |   });
 25 | 
 26 |   it("should skip brackets in comments", function () {
 27 |     const complete = dedent`
 28 |             let mocha = require('mocha');
 29 |             let assert = require('assert');
 30 |             // testing {
 31 |             describe('test', () => {
 32 |                 it('test', () => { // tests sth (
 33 |                     assert([1, 2, 3].length === 3);
 34 |                 });
 35 |             });`;
 36 |     let result = closeBrackets(complete);
 37 |     expect(result).to.not.be.undefined;
 38 |     expect(result!.source).to.equal(complete);
 39 |   });
 40 | 
 41 |   let template = dedent`
 42 |     let mocha = require('mocha');
 43 |     let assert = require('assert');
 44 |     // testing (
 45 |     describe('test', () => {
 46 |         it('test', () => { // tests sth {
 47 |             assert([1, 2, 3].length === 3);<1>})}<2>)<3>`;
 48 | 
 49 |   for (const i of [1, 2, 3]) {
 50 |     it(`should complete from <${i}>`, function () {
 51 |       let incomplete = template
 52 |         .slice(0, template.indexOf(`<${i}>`))
 53 |         .replace(/<\d>/g, "");
 54 |       let complete = template.replace(/<\d>/g, "");
 55 |       let result = closeBrackets(incomplete);
 56 |       expect(result).to.not.be.undefined;
 57 |       expect(result!.source).to.equal(complete);
 58 |     });
 59 |   }
 60 | 
 61 |   it("should handle square brackets", function () {
 62 |     expect(
 63 |       closeBrackets(dedent`
 64 |             let arr = [
 65 |                 [1, 2, 3],
 66 |                 [4, 5, 6
 67 |             `)!.source
 68 |     ).to.equal(dedent`
 69 |             let arr = [
 70 |                 [1, 2, 3],
 71 |                 [4, 5, 6]]
 72 |             `);
 73 |   });
 74 | });
 75 | 
 76 | describe("test trimCompletion", function () {
 77 |   it("should trim off incomplete lines", function () {
 78 |     expect(
 79 |       trimCompletion(dedent`
 80 |             assert([1, 2, 3].length === 3);
 81 |             assert(
 82 |         `)
 83 |     ).to.equal(dedent`
 84 |             assert([1, 2, 3].length === 3);
 85 |         `);
 86 |   });
 87 | 
 88 |   it("should not trim off complete statements", function () {
 89 |     expect(
 90 |       trimCompletion(dedent`
 91 |             assert([1, 2, 3].length === 3);
 92 |             assert([1, 2].length === 2);
 93 |         `)
 94 |     ).to.equal(dedent`
 95 |             assert([1, 2, 3].length === 3);
 96 |             assert([1, 2].length === 2);
 97 |         `);
 98 |   });
 99 | 
100 |   it("should not trim off complete statements, even if followed by whitespace", function () {
101 |     expect(trimCompletion("assert([1, 2, 3].length === 3);  ")).to.equal(
102 |       "assert([1, 2, 3].length === 3);"
103 |     );
104 |   });
105 | 
106 |   it("should not trim off complete blocks", function () {
107 |     expect(
108 |       trimCompletion(dedent`
109 |             if (true) {
110 |                 assert([1, 2, 3].length === 3);
111 |             }
112 |         `)
113 |     ).to.equal(dedent`
114 |             if (true) {
115 |                 assert([1, 2, 3].length === 3);
116 |             }
117 |         `);
118 |   });
119 | 
120 |   it("should correctly trim incomplete statements if there is only a single line", function () {
121 |     expect(
122 |       trimCompletion(dedent`
123 |             assert(
124 |         `)
125 |     ).to.equal("");
126 |   });
127 | 
128 |   it("should trim completions that close more brackets than they open", function () {
129 |     expect(
130 |       trimCompletion(dedent`
131 |             assert([1, 2, 3].length === 3);
132 |         });
133 |         it('should do something else', function () {
134 |             assert([1, 2].length === 2)
135 |         `)
136 |     ).to.equal(dedent`
137 |             assert([1, 2, 3].length === 3);
138 |         `);
139 |   });
140 | 
141 |   it("should trim completions that close more parentheses than they open", function () {
142 |     expect(
143 |       trimCompletion(dedent`
144 |             assert([1, 2, 3].length === 3));
145 |         `)
146 |     ).to.equal(dedent`
147 |             assert([1, 2, 3].length === 3)
148 |         `);
149 |   });
150 | });
151 | 
152 | describe("test commentOut", function () {
153 |   it("should comment out a single line", function () {
154 |     expect(commentOut("line\n")).to.equal("// line\n");
155 |   });
156 | 
157 |   it("should comment out multiple lines", function () {
158 |     expect(commentOut("line 1\nline 2\n")).to.equal("// line 1\n// line 2\n");
159 |   });
160 | 
161 |   it("should add a final newline if it is missing", function () {
162 |     expect(commentOut("line")).to.equal("// line\n");
163 |   });
164 | 
165 |   it("should return the empty string if the input is empty", function () {
166 |     expect(commentOut("")).to.equal("");
167 |   });
168 | });
169 | 


--------------------------------------------------------------------------------
/test/docSnippets.ts:
--------------------------------------------------------------------------------
  1 | import { expect } from "chai";
  2 | import dedent from "dedent";
  3 | import deepEqualInAnyOrder from "deep-equal-in-any-order";
  4 | import * as docSnippetMiner from "../src/docSnippets";
  5 | 
  6 | const chai = require("chai");
  7 | chai.use(deepEqualInAnyOrder);
  8 | 
  9 | describe("unit test findFencedCodeBlocks", () => {
 10 |   it("should not detect snippets in non-covered language fencing", () => {
 11 |     const extractedSnippets = docSnippetMiner.findFencedCodeBlocks(
 12 |       `${__dirname}/input/coffee-fencing.md`
 13 |     );
 14 |     expect(extractedSnippets.size).equal(0);
 15 |   });
 16 | 
 17 |   it("should detect snippets in general fencing", () => {
 18 |     const expectedSnippets = [
 19 |       "```\n" +
 20 |         "const vol = Volume.fromJSON({\n" +
 21 |         "  '/app/index.js': '...',\n" +
 22 |         "  '/app/package.json': '...',\n" +
 23 |         "});\n" +
 24 |         "```",
 25 |     ];
 26 |     const extractedSnippets = docSnippetMiner.findFencedCodeBlocks(
 27 |       `${__dirname}/input/non-lang-fencing.md`
 28 |     );
 29 |     expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets);
 30 |   });
 31 | 
 32 |   it("should detect snippet in js fencing", () => {
 33 |     const expectedSnippets = [
 34 |       dedent`
 35 |         \`\`\`js
 36 |         const vol = Volume.fromJSON({
 37 |           "/app/index.js": "...",
 38 |           "/app/package.json": "...",
 39 |         });
 40 |         \`\`\`
 41 |       `,
 42 |     ];
 43 |     const extractedSnippets = docSnippetMiner.findFencedCodeBlocks(
 44 |       `${__dirname}/input/js-fencing-1.md`
 45 |     );
 46 |     expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets);
 47 |   });
 48 | 
 49 |   it("should detect snippet in ts fencing", () => {
 50 |     const expectedSnippets = [
 51 |       dedent`
 52 |         \`\`\`ts
 53 |         const vol = Volume.fromJSON({
 54 |           "/app/index.js": "...",
 55 |           "/app/package.json": "...",
 56 |         });
 57 |         \`\`\`
 58 |       `,
 59 |     ];
 60 |     const extractedSnippets = docSnippetMiner.findFencedCodeBlocks(
 61 |       `${__dirname}/input/ts-fencing-1.md`
 62 |     );
 63 |     expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets);
 64 |   });
 65 | 
 66 |   it.skip("should detect snippet with formatted fencing", () => {
 67 |     const expectedSnippets = [
 68 |       "```js\nconcat = require('pull-stream/sinks/concat')\n```",
 69 |       "```js\nconcat(cb)\n```",
 70 |     ];
 71 |     const extractedSnippets = docSnippetMiner.findFencedCodeBlocks(
 72 |       `${__dirname}/input/pull-stream-concat.md`
 73 |     );
 74 |     expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets);
 75 |   });
 76 | });
 77 | 
 78 | describe("unit tests for callsAPIMethod", () => {
 79 |   it("should find method call in js fencing", () => {
 80 |     const inputSnippet =
 81 |       "```js\n" +
 82 |       "const vol = Volume.fromJSON({\n" +
 83 |       "  '/app/index.js': '...',\n" +
 84 |       "  '/app/package.json': '...',\n" +
 85 |       "});\n" +
 86 |       "```";
 87 | 
 88 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "fromJSON")).to.be.true;
 89 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "Volume")).to.be.false;
 90 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "app")).to.be.false;
 91 |   });
 92 | 
 93 |   it("should not partially match method names", () => {
 94 |     const inputSnippet =
 95 |       "```js\n" +
 96 |       "vol.writeFileSync('/script.sh', 'sudo rm -rf *')\n" +
 97 |       'vol.toJSON(); // {"/script.sh": "sudo rm -rf *"}\n' +
 98 |       "fromTest();\n" +
 99 |       "toFile = 5;\n";
100 |     ("```");
101 | 
102 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "toJSON")).to.be.true;
103 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "JSON")).to.be.false;
104 | 
105 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "fromTest")).to.be.true;
106 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "from")).to.be.false;
107 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "Test")).to.be.false;
108 | 
109 |     expect(docSnippetMiner.callsAPIMethod(inputSnippet, "toFile")).to.be.false;
110 |   });
111 | });
112 | 
113 | describe("test snippet trimming to max length", () => {
114 |   it("should not trim", () => {
115 |     const inputSnippet = dedent`
116 |       import { fs } from 'memfs';
117 | 
118 |       fs.writeFileSync('/hello.txt', 'World!');
119 |       fs.readFileSync('/hello.txt', 'utf8'); // World!
120 |     `;
121 | 
122 |     expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 4)).to.equal(
123 |       inputSnippet
124 |     );
125 |     expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 6)).to.equal(
126 |       inputSnippet
127 |     );
128 |   });
129 | 
130 |   it("it should trim to maxLength", () => {
131 |     const inputSnippet = dedent`
132 |         import { fs, vol } from 'memfs';
133 |         
134 |         const json = {
135 |           './README.md': '1',
136 |           './src/index.js': '2',
137 |           './node_modules/debug/index.js': '3',
138 |         };
139 |         vol.fromJSON(json, '/app');
140 |         
141 |         fs.readFileSync('/app/README.md', 'utf8'); // 1
142 |         vol.readFileSync('/app/src/index.js', 'utf8'); // 2
143 |     `;
144 | 
145 |     const expectedSnippet = dedent`
146 |       import { fs, vol } from 'memfs';
147 |       
148 |       const json = {
149 |         './README.md': '1',
150 |         './src/index.js': '2',
151 |         './node_modules/debug/index.js': '3',
152 |     `;
153 | 
154 |     expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 6)).to.equal(
155 |       expectedSnippet
156 |     );
157 |   });
158 | });
159 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Note: This version of TestPilot has been archived. Please refer to the new version at [https://github.com/neu-se/testpilot2](https://github.com/neu-se/testpilot2).
  2 | 
  3 | # TestPilot
  4 | 
  5 | TestPilot is a tool for automatically generating unit tests for npm packages
  6 | written in JavaScript/TypeScript using a large language model (LLM).
  7 | 
  8 | Note that TestPilot represents an early exploration in the use of LLMs for
  9 | test generation, and has been made available in open source as a basis for
 10 | research and exploration. For day-to-day use the test generation features
 11 | in [Copilot Chat](https://docs.github.com/en/copilot/github-copilot-chat/about-github-copilot-chat)
 12 | are likely to yield better results.
 13 | 
 14 | ## Background
 15 | 
 16 | TestPilot generates tests for a given function `f` by prompting the LLM with a
 17 | skeleton of a test for `f`, including information about `f` embedded in code
 18 | comments, such as its signature, the body of `f`, and examples usages of `f`
 19 | automatically mined from project documentation. The model's response is then
 20 | parsed and translated into a runnable unit test. Optionally, the test is run and
 21 | if it fails the model is prompted again with additional information about the
 22 | failed test, giving it a chance to refine the test.
 23 | 
 24 | Unlike other systems for LLM-based test generation, TestPilot does not require
 25 | any additional training or reinforcement learning, and no examples of functions
 26 | and their associated tests are needed.
 27 | 
 28 | A research paper describing TestPilot in detail is available on
 29 | [arXiv](https://arxiv.org/abs/2302.06527) and [IEEExplore](https://ieeexplore.ieee.org/document/10329992).
 30 | 
 31 | ## Requirements
 32 | 
 33 | In general, to be able to run TestPilot you need access to a Codex-style LLM
 34 | with completion API. Set the `TESTPILOT_LLM_API_ENDPOINT` environment variable to
 35 | the URL of the LLM API endpoint you want to use, and
 36 | `TESTPILOT_LLM_AUTH_HEADERS` to a JSON object containing the headers you need to
 37 | authenticate with the API.
 38 | 
 39 | Typical values for these variables might be:
 40 | 
 41 | - `TESTPILOT_LLM_API_ENDPOINT='https://api.openai.com/v1/engines/code-cushman-001/completions'`
 42 | - `TESTPILOT_LLM_AUTH_HEADERS='{"Authorization": "Bearer <your API key>", "OpenAI-Organization": "<your organization ID>"}'`
 43 | 
 44 | Note, however, that you can run TestPilot in reproduction mode without access to
 45 | the LLM API where model responses are taken from the output of a previous run;
 46 | see below for details.
 47 | 
 48 | ## Installation
 49 | 
 50 | You can install TestPilot from a pre-built package or from source.
 51 | 
 52 | ### Installing from a pre-built package
 53 | 
 54 | TestPilot is a available as a pre-built npm package, though it is not currently
 55 | published to the npm registry. You can download a tarball from the repository
 56 | and install it in the usual way. Note that this distribution only contains the
 57 | core part of TestPilot, not the benchmarking harness.
 58 | 
 59 | ### Installing from source
 60 | 
 61 | The `src/` directory contains the source code for TestPilot, which is written in
 62 | TypeScript and gets compiled into the `dist/` directory. Tests are in `test/`;
 63 | the `benchmark/` directory contains a benchmarking harness for running TestPilot
 64 | on multiple npm packages; and `ql/` contains the CodeQL queries used to analyze
 65 | the results.
 66 | 
 67 | In the root directory of a checkout of this repository, run `npm build` to
 68 | install dependencies and build the package.
 69 | 
 70 | You can also use `npm run build:watch` to automatically build anytime you make
 71 | changes to the code. Note, however, that this will not automatically install
 72 | dependencies, and also will not build the benchmarking harness.
 73 | 
 74 | Use `npm run test` to run the tests. For convenience, this will also install
 75 | dependencies and run a build.
 76 | 
 77 | ## Benchmarking
 78 | 
 79 | If you install TestPilot from source, you can use the benchmarking harness to
 80 | run TestPilot on multiple packages and analyze the results. This is not
 81 | currently available if you install TestPilot from a pre-built package.
 82 | 
 83 | ### Running locally
 84 | 
 85 | Basic usage is as follows:
 86 | 
 87 | ```sh
 88 | node benchmark/run.js --outputDir <report_dir> --package <package_dir>
 89 | ```
 90 | 
 91 | This generates tests for all functions exported by the package in
 92 | `<package_dir>`, validates them, and writes the results to `<report_dir>`.
 93 | 
 94 | Note that this assumes that package dependencies are installed and any build
 95 | steps have been run (e.g., using `npm i` and `npm run build`). TestPilot also
 96 | relies on `mocha`, so if the package under test does not already depend on it,
 97 | you must install it separately, for example using the command `npm i --no-save
 98 | mocha`.
 99 | 
100 | ### Running on Actions
101 | 
102 | The `run-experiment.yml` workflow runs an experiment on GitHub Actions,
103 | producing the final report as an artifact you can download. The `results-all`
104 | artifact contains the results of all packages, while the other artifacts contain
105 | the individual results of each package.
106 | 
107 | ### Reproducing results
108 | 
109 | The results of TestPilot are non-deterministic, so even if you run it from the
110 | same package on the same machine multiple times, you will get different results.
111 | However, the benchmarking harness records enough data to be able to replay a
112 | benchmark run in many cases.
113 | 
114 | To do this, use the `--api` and `--responses` options to reuse the API listings
115 | and responses from a previous run:
116 | 
117 | ```sh
118 | node benchmark/run.js --outputDir <report_dir> --package <package_dir> --api <api.json> --responses <prompts.json>
119 | ```
120 | 
121 | Note that by default replay will fail if any of the prompts are not found in the
122 | responses file. This typically happens if TestPilot is refining failing tests,
123 | since in this case the prompt to the model depends on the exact failure message,
124 | which can be system-specific (e.g., containing local file-system paths), or
125 | depend on the Node.js version or other factors.
126 | 
127 | To work around these limitations, you can pass the `--strictResponses false`
128 | flag handle treat missing prompts by treating them as getting no response from
129 | the model. This will not, in general, produce the same results as the initial
130 | run, but suffices in many cases.
131 | 
132 | ### Analyzing results
133 | 
134 | The CodeQL queries in `ql/queries` can be used to analyze the results of running
135 | an experiment. See `ql/CodeQL.md` for instructions on how to setup CodeQL and
136 | run the queries.
137 | 
138 | ## License
139 | 
140 | This project is licensed under the terms of the MIT open source license. Please refer to [MIT](./LICENSE.txt) for the full terms.
141 | 
142 | ## Maintainers
143 | 
144 | - Max Schaefer (@max-schaefer)
145 | - Frank Tip (@franktip)
146 | - Sarah Nadi (@snadi)
147 | 
148 | ## Support
149 | 
150 | TestPilot is a research prototype and is not officially supported. However, if
151 | you have questions or feedback, please file an issue and we will do our best to
152 | respond.
153 | 
154 | ## Acknowledgement
155 | 
156 | We thank Aryaz Eghbali (@aryaze) for his work on the initial version of
157 | TestPilot.
158 | 


--------------------------------------------------------------------------------
/src/mineSnippets.ts:
--------------------------------------------------------------------------------
  1 | import * as cp from "child_process";
  2 | import * as fs from "fs";
  3 | import * as os from "os";
  4 | import * as path from "path";
  5 | import yargs from "yargs";
  6 | import { hideBin } from "yargs/helpers";
  7 | import AdmZip from "adm-zip";
  8 | import { Snippets } from "./snippetHelper";
  9 | 
 10 | const snippetHelper = new Snippets();
 11 | 
 12 | /**
 13 |  * Extract raw information about usage snippets for the given methods from the
 14 |  * given CodeQL database.
 15 |  *
 16 |  * @param database The path to the CodeQL database.
 17 |  * @param methods The methods to extract usage snippets for.
 18 |  * @returns A stream of result tuples `{id, method, file, line}`, where `id` is
 19 |  *          the CodeQL ID of a call to `method`, and `file`:`line` belongs to
 20 |  *          the intraprocedural slice of this call.
 21 |  */
 22 | export function* getSnippetData(database: string, methods: string[]) {
 23 |   // create temporary CSV file to store relevant method names in
 24 |   const csvFile = `${os.tmpdir()}/targetMethod.csv`;
 25 |   const escapedMethodNames = methods.map(
 26 |     (method) => `"${method.replace(/"/g, '""')}"`
 27 |   );
 28 |   fs.writeFileSync(csvFile, escapedMethodNames.join("\n") + "\n");
 29 | 
 30 |   // run mining query
 31 |   const bqrsFile = `${os.tmpdir()}/results.bqrs`;
 32 |   cp.execFileSync(
 33 |     "codeql",
 34 |     [
 35 |       "query",
 36 |       "run",
 37 |       "-d",
 38 |       database,
 39 |       "-o",
 40 |       bqrsFile,
 41 |       "--external",
 42 |       `targetFunction=${csvFile}`,
 43 |       path.join(__dirname, "../../ql/queries/SnippetMining.ql"),
 44 |     ],
 45 |     { stdio: "inherit" }
 46 |   );
 47 | 
 48 |   // decode results into CSV format
 49 |   const outputFile = `${os.tmpdir()}/results.csv`;
 50 |   cp.execFileSync(
 51 |     "codeql",
 52 |     [
 53 |       "bqrs",
 54 |       "decode",
 55 |       "--format",
 56 |       "csv",
 57 |       "--no-titles",
 58 |       "--entities",
 59 |       "id",
 60 |       "--output",
 61 |       outputFile,
 62 |       bqrsFile,
 63 |     ],
 64 |     { stdio: "inherit" }
 65 |   );
 66 | 
 67 |   const results = fs.readFileSync(outputFile, "utf8");
 68 |   for (const data of results.split("\n")) {
 69 |     let [id, method, file, line] = data.split(",");
 70 |     if (!id) {
 71 |       continue;
 72 |     }
 73 |     yield {
 74 |       id: +id,
 75 |       method: method.slice(1, -1),
 76 |       file: file.slice(1, -1),
 77 |       line: +line,
 78 |     };
 79 |   }
 80 | }
 81 | 
 82 | type SnippetMap = [string, Map<string, number[]>][];
 83 | 
 84 | /**
 85 |  * Extract structured information about usage snippets for the given methods
 86 |  * from the given CodeQL database
 87 |  *
 88 |  * @param database The path to the CodeQL database.
 89 |  * @param methods The methods to extract usage snippets for.
 90 |  * @returns A sparse array indexed by CodeQL IDs. For each ID it records the
 91 |  *          name of the called method as well as a map from file names to
 92 |  *          relevant line numbers in that file.
 93 |  */
 94 | export function getSnippetsInfo(
 95 |   database: string,
 96 |   methods: string[]
 97 | ): SnippetMap {
 98 |   const snippets: SnippetMap = [];
 99 | 
100 |   for (const { id, method, file, line } of getSnippetData(database, methods)) {
101 |     if (!snippets[id]) {
102 |       snippets[id] = [method, new Map()];
103 |     }
104 |     const fileMap = snippets[id][1];
105 |     if (!fileMap.has(file)) {
106 |       fileMap.set(file, []);
107 |     }
108 |     const lineNumbers = fileMap.get(file)!;
109 |     lineNumbers.push(line);
110 |   }
111 | 
112 |   return snippets;
113 | }
114 | 
115 | /**
116 |  * Extract usage snippets for the given methods from the given CodeQL database.
117 |  *
118 |  * @param database The path to the CodeQL database.
119 |  * @param numSnippets The number of snippets to extract.
120 |  * @param methods The methods to extract usage snippets for.
121 |  * @param maxLength The maximum number of lines to include in each snippet.
122 |  * @returns A string array of usage snippets.
123 |  */
124 | 
125 | export function getSnippets(
126 |   database: string,
127 |   numSnippets: number,
128 |   methods: string[],
129 |   maxLength: number
130 | ): Map<string, string[]> {
131 |   let results = new Map<string, Set<string>>();
132 | 
133 |   // mine snippets
134 |   const snippets = getSnippetsInfo(database, methods);
135 | 
136 |   // now output them
137 |   const srcArchive = new AdmZip(path.join(database, "src.zip"));
138 |   for (const i in snippets) {
139 |     const [methodName, files] = snippets[i];
140 |     let currentSnippet = `for ${methodName}`;
141 |     for (const [file, lineNumbers] of files.entries()) {
142 |       const contents = srcArchive.readAsText(file.slice(1));
143 |       const lines = contents.split("\n");
144 | 
145 |       // pull out relevant lines from the file and record
146 |       // minimum indentation level
147 |       let relevantLineNumbers = lineNumbers.sort((a, b) => a - b);
148 |       if (maxLength !== -1) {
149 |         relevantLineNumbers = relevantLineNumbers.slice(-maxLength);
150 |       }
151 |       const relevantLines = [];
152 |       let minIndent = -1;
153 |       for (const lineNumber of relevantLineNumbers) {
154 |         const line = lines[lineNumber - 1] || "";
155 |         const indent = line.search(/\S/);
156 |         if (minIndent === -1 || indent < minIndent) {
157 |           minIndent = indent;
158 |         }
159 |         relevantLines.push(line);
160 |       }
161 |       if (minIndent === -1) {
162 |         minIndent = 0;
163 |       }
164 | 
165 |       // output relevant lines, outdenting them by the minimum indentation
166 |       for (const line of relevantLines) {
167 |         currentSnippet += `\n ${line}`;
168 |       }
169 |     }
170 |     if (results.has(methodName)) {
171 |       results.get(methodName)!.add(currentSnippet);
172 |     } else {
173 |       results.set(methodName, new Set([currentSnippet]));
174 |     }
175 |   }
176 | 
177 |   // select snippets that are dissimilar
178 |   let finalSnippets = new Map<string, string[]>();
179 |   for (let [method, snippets] of results) {
180 |     // if we have too many snippets, throw some away (snippet selection doesn't scale beyond ~50 snippets)
181 |     if (snippets.size > snippetHelper.MAX_SNIPPETS) {
182 |       snippets = new Set([...snippets].slice(0, snippetHelper.MAX_SNIPPETS));
183 |     }
184 |     let selectedSnippets = snippetHelper.selectSnippets(snippets, numSnippets);
185 |     finalSnippets.set(method, Array.from(selectedSnippets));
186 |     snippetHelper.distanceCache.clear();
187 |   }
188 |   return finalSnippets;
189 | }
190 | 
191 | if (require.main === module) {
192 |   (async () => {
193 |     const parser = yargs(hideBin(process.argv))
194 |       .usage("$0 [-n <num>] [-l <max-length>] <database> <method>")
195 |       .example(
196 |         "$0 ~/databases/memfs toJSON",
197 |         "extract three usage snippets for method toJSON from the memfs database"
198 |       )
199 |       .option("n", {
200 |         describe: "number of snippets to generate",
201 |         default: 3,
202 |         type: "number",
203 |       })
204 |       .option("l", {
205 |         alias: "length",
206 |         describe: "maximum length of each snippet in lines; -1 means no limit",
207 |         default: -1,
208 |         type: "number",
209 |       })
210 |       .demand(2);
211 |     const argv = await parser.argv;
212 |     const database = argv._[0] as string;
213 |     const methods = argv._.slice(1) as string[];
214 |     const numSnippets = argv.n;
215 |     const maxLength = argv.l;
216 |     const allSnippets = getSnippets(database, numSnippets, methods, maxLength);
217 |     for (const [method, snippets] of allSnippets) {
218 |       console.log(`${method}:`);
219 |       console.log(snippets.join("\n"));
220 |     }
221 |   })().catch((err) => {
222 |     console.error(err);
223 |     process.exit(1);
224 |   });
225 | }
226 | 


--------------------------------------------------------------------------------
/benchmark/editDistance.ts:
--------------------------------------------------------------------------------
  1 | import * as fs from "fs";
  2 | import levenshtein from "levenshtein";
  3 | import fg from "fast-glob";
  4 | import yargs from "yargs";
  5 | import { hideBin } from "yargs/helpers";
  6 | 
  7 | export interface Test {
  8 |   fileName: string; // in what file was the test found
  9 |   index: number; // the index of the test in the file
 10 |   contents: string; // the contents of the test
 11 | }
 12 | 
 13 | export interface SimilarityReportEntry {
 14 |   generatedTestName: string;
 15 |   generatedTestCode: string;
 16 |   mostSimilarTest: Test;
 17 |   similarity: number;
 18 | }
 19 | 
 20 | export interface SimilarityReport {
 21 |   numGeneratedTests: number;
 22 |   numExistingTests: number;
 23 |   similarities: SimilarityReportEntry[];
 24 |   maxSimilarity: number;
 25 | }
 26 | 
 27 | const testLoc: { [key: string]: string } = {
 28 |   glob: "test",
 29 |   "fs-extra": "lib/**/__tests__",
 30 |   "graceful-fs": "test",
 31 |   jsonfile: "test",
 32 |   bluebird: "test",
 33 |   q: "spec",
 34 |   rsvp: "test",
 35 |   memfs: "src/__tests__",
 36 |   "node-dir": "test",
 37 |   "zip-a-folder": "test",
 38 |   "js-sdsl": "test",
 39 |   "quill-delta": "test",
 40 |   "complex.js": "tests",
 41 |   "pull-stream": "test",
 42 |   "countries-and-timezones": "test",
 43 |   "simple-statistics": "test",
 44 |   plural: "test.js",
 45 |   dirty: "test",
 46 |   "geo-point": "src/geo-point.spec.ts",
 47 |   uneval: "test.js",
 48 |   omnitool: "test",
 49 |   core: "test",
 50 |   "image-downloader": "test",
 51 |   "crawler-url-parser": "test",
 52 |   "gitlab-js": "test",
 53 | };
 54 | 
 55 | /**
 56 |  * Parse a file and return all tests in it
 57 |  * @param fileName the name of the file
 58 |  * @param contents the contents of the file
 59 |  * @returns the set of tests in the file
 60 |  **/
 61 | export function parseTests(fileName: string, contents: string): Set<Test> {
 62 |   const tests = new Set<Test>();
 63 | 
 64 |   const callToIt = /\b(it|test)\s*\(\s*['`"].*['`"],/g; // pattern specifying where a tests starts, including its it description
 65 | 
 66 |   // find all index positions where this regexp matches and then figure out where it ends by counting parentheses and curly braces
 67 |   let match;
 68 |   while ((match = callToIt.exec(contents))) {
 69 |     const index = match.index;
 70 | 
 71 |     // find index of open curly brace defining test body, ignoring any open curly braces in the test description
 72 |     const indexToStartSearch = index + match[0].length;
 73 |     const openCurlyBraceIndex = contents.indexOf("{", indexToStartSearch);
 74 | 
 75 |     if (openCurlyBraceIndex === -1) {
 76 |       console.warn(
 77 |         "WARNING: No open curly brace found for test starting at index " +
 78 |           index +
 79 |           " in file " +
 80 |           fileName +
 81 |           ". Skipping test."
 82 |       );
 83 |       continue;
 84 |     }
 85 | 
 86 |     // find index of matching closing curly brace
 87 |     let openCurlyBraces = 1;
 88 |     let closeCurlyBraceIndex = openCurlyBraceIndex;
 89 |     for (let i = openCurlyBraceIndex + 1; i < contents.length; i++) {
 90 |       if (contents[i] === "{") {
 91 |         openCurlyBraces++;
 92 |       } else if (contents[i] === "}") {
 93 |         openCurlyBraces--;
 94 |         if (openCurlyBraces === 0) {
 95 |           closeCurlyBraceIndex = i;
 96 |           break;
 97 |         }
 98 |       }
 99 |     }
100 |     // find index of matching closing parenthesis
101 |     for (let i = closeCurlyBraceIndex + 1; i < contents.length; i++) {
102 |       if (contents[i] === ")") {
103 |         closeCurlyBraceIndex = i;
104 |         break;
105 |       }
106 |     }
107 | 
108 |     const testCode = contents.substring(index, closeCurlyBraceIndex + 1);
109 | 
110 |     tests.add({ fileName: fileName, index: tests.size, contents: testCode });
111 |   }
112 | 
113 |   return tests;
114 | }
115 | 
116 | /**
117 |  * find all tests in a directory and its subdirectories
118 |  * @param patterns the files and directories to search, specified as a glob pattern
119 |  * @returns an array of test names
120 |  */
121 | export function findTests(
122 |   pkgName: string,
123 |   testDir: string,
124 |   isGenerated: boolean = false
125 | ): Set<Test> {
126 |   var testFilePatterns = "tests/*.js";
127 | 
128 |   if (!isGenerated) {
129 |     testFilePatterns = testLoc[pkgName];
130 | 
131 |     testFilePatterns =
132 |       testFilePatterns.endsWith(".ts") || testFilePatterns.endsWith(".js")
133 |         ? testFilePatterns
134 |         : testFilePatterns + "/**/*.(js|ts)";
135 |   }
136 | 
137 |   const tests = new Set<Test>();
138 |   const testFiles = fg.sync(`${testDir}/${testFilePatterns}`, { dot: true });
139 | 
140 |   testFiles.forEach((f) => {
141 |     const contents = fs.readFileSync(`${f}`, "utf8");
142 |     const fileTests = parseTests(f, contents);
143 |     fileTests.forEach((t) => tests.add(t));
144 |   });
145 |   return tests;
146 | }
147 | 
148 | /**
149 |  * Generate a report on the similarity of tests in two directories
150 |  * @param existingTestsPatterns glob pattern of paths of existing tests
151 |  * @param generatedTestsPatterns glob pattern of paths of generated tests
152 |  */
153 | export function generateReport(
154 |   pkgName: string,
155 |   existingTestsDir: string,
156 |   generatedTestsDir: string
157 | ): SimilarityReport {
158 |   const existingTests = findTests(pkgName, existingTestsDir);
159 |   const generatedTests = findTests(pkgName, generatedTestsDir, true);
160 | 
161 |   console.log(
162 |     `Found ${existingTests.size} existing tests and ${generatedTests.size} generated tests.`
163 |   );
164 | 
165 |   const report = {} as SimilarityReport;
166 |   report.numExistingTests = existingTests.size;
167 |   report.numGeneratedTests = generatedTests.size;
168 |   report.similarities = [];
169 | 
170 |   var overallMaxSimilarity = 0;
171 | 
172 |   // for each test in the generated tests, find the maximum similarity to an existing test
173 |   generatedTests.forEach((generatedTest) => {
174 |     let maxSimilarity = 0;
175 |     let mostSimilarTest = {
176 |       fileName: "NOT_FOUND",
177 |       index: -1,
178 |       contents: "NOT_FOUND",
179 |     };
180 |     existingTests.forEach((existingTest) => {
181 |       const similarity =
182 |         1 -
183 |         new levenshtein(generatedTest.contents, existingTest.contents)
184 |           .distance /
185 |           Math.max(generatedTest.contents.length, existingTest.contents.length);
186 |       if (similarity > maxSimilarity) {
187 |         maxSimilarity = similarity;
188 |         mostSimilarTest = existingTest;
189 |       }
190 |     });
191 |     //console.log(`generated test ${generatedTest.fileName} has maximal similarity ${maxSimilarity} to existing test#${mostSimilarTest.index} in ${mostSimilarTest.fileName}`);
192 |     report.similarities.push({
193 |       generatedTestName: generatedTest.fileName,
194 |       generatedTestCode: generatedTest.contents,
195 |       mostSimilarTest: mostSimilarTest,
196 |       similarity: maxSimilarity,
197 |     });
198 | 
199 |     if (maxSimilarity > overallMaxSimilarity) {
200 |       overallMaxSimilarity = maxSimilarity;
201 |     }
202 |   });
203 | 
204 |   report.maxSimilarity = overallMaxSimilarity;
205 |   return report;
206 | }
207 | 
208 | if (require.main === module) {
209 |   (async () => {
210 |     // example usage: node benchmark/editDistance.js --pkgName countries-and-timezones --generatedTestsDir 'results/countries-and-timezones/tests' --existingTestsDir 'bencmarks/countries-and-timezones'
211 |     const parser = yargs(hideBin(process.argv))
212 |       .strict()
213 |       .options({
214 |         generatedTestsDir: {
215 |           type: "string",
216 |           demandOption: true,
217 |           description: "directory where the generated tests are",
218 |         },
219 |         existingTestsDir: {
220 |           type: "string",
221 |           demandOption: true,
222 |           description: "directory where the existing tests are",
223 |         },
224 |         pkgName: {
225 |           type: "string",
226 |           demandOption: true,
227 |           description: "name of the package",
228 |         },
229 |       });
230 | 
231 |     const argv = await parser.argv;
232 | 
233 |     const report = generateReport(
234 |       argv.pkgName,
235 |       argv.existingTestsDir,
236 |       argv.generatedTestsDir
237 |     );
238 | 
239 |     const json = JSON.stringify(report, null, 2);
240 |     fs.writeFileSync("similarityReport.json", json, "utf8");
241 |   })().catch((e) => {
242 |     console.error(e);
243 |     process.exit(1);
244 |   });
245 | }
246 | 


--------------------------------------------------------------------------------
/src/mochaValidator.ts:
--------------------------------------------------------------------------------
  1 | import path from "path";
  2 | import fs from "fs";
  3 | import os from "os";
  4 | import child_process from "child_process";
  5 | import { spawnSync } from "child_process";
  6 | import { TestValidator } from "./testValidator";
  7 | import { ITestFailureInfo, TestOutcome } from "./report";
  8 | import { ICoverageSummary, emptyCoverageSummary } from "./coverage";
  9 | import { performance } from "perf_hooks";
 10 | 
 11 | /**
 12 |  * A bare-bones type definition for a Mocha test result, only modelling the
 13 |  * fields we need.
 14 |  */
 15 | interface IMochaTestResult {
 16 |   err: {
 17 |     message?: string;
 18 |   };
 19 | }
 20 | 
 21 | /**
 22 |  * A bare-bones type definition for a Mocha test report, only modelling the
 23 |  * fields we need.
 24 |  */
 25 | interface IMochaReport {
 26 |   passes: IMochaTestResult[];
 27 |   failures: IMochaTestResult[];
 28 |   pending: IMochaTestResult[];
 29 | }
 30 | 
 31 | export class MochaValidator extends TestValidator {
 32 |   private readonly testDir: string;
 33 |   private readonly coverageDirs: string[] = [];
 34 | 
 35 |   constructor(private packageName: string, private packagePath: string) {
 36 |     super();
 37 |     this.testDir = fs.mkdtempSync(path.join(packagePath, "test-"));
 38 |   }
 39 | 
 40 |   private scrubTestDirFromError(error: ITestFailureInfo): ITestFailureInfo {
 41 |     if (!error || typeof error !== "object") {
 42 |       console.warn(`Unexpected error type: ${typeof error}`);
 43 |       return error;
 44 |     } else if (typeof error.message !== "string") {
 45 |       console.warn(`Unexpected error.message type: ${typeof error.message}`);
 46 |       return error;
 47 |     }
 48 |     error.message = error.message.replace(
 49 |       new RegExp(this.testDir, "g"),
 50 |       "/path/to/test"
 51 |     );
 52 |     return error;
 53 |   }
 54 | 
 55 |   public validateTest(testName: string, testSource: string): TestOutcome {
 56 |     const requirePattern = new RegExp(
 57 |       `require\\('${this.packageName}'\\)`,
 58 |       "g"
 59 |     );
 60 |     let testFile = path.join(this.testDir, testName);
 61 |     if (fs.existsSync(testFile)) {
 62 |       throw new Error(`Test file ${testFile} already exists`);
 63 |     }
 64 |     fs.writeFileSync(
 65 |       testFile,
 66 |       testSource.replace(requirePattern, `require('..')`)
 67 |     );
 68 | 
 69 |     const packagePath = path.resolve(this.testDir, "..");
 70 | 
 71 |     // temporary directory to store output from mocha and nyc
 72 |     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mocha-validator"));
 73 |     // directory to store nyc profile and coverage data
 74 |     const coverageDir = path.join(tmpDir, "coverage");
 75 |     // coverage report, produced by nyc
 76 |     const coverageReport = path.join(coverageDir, "coverage-final.json");
 77 |     // test report, produced by mocha
 78 |     const reportFile = path.join(tmpDir, "report.json");
 79 | 
 80 |     performance.mark(`start:${testName}`);
 81 |     const res = spawnSync(
 82 |       path.join(__dirname, "..", "node_modules", ".bin", "nyc"),
 83 |       [
 84 |         `--cwd=${packagePath}`,
 85 |         `--exclude=${path.basename(this.testDir)}`,
 86 |         "--reporter=json",
 87 |         `--report-dir=${coverageDir}`,
 88 |         `--temp-dir=${coverageDir}`,
 89 |         path.join(__dirname, "..", "node_modules", ".bin", "mocha"),
 90 |         "--full-trace",
 91 |         "--exit",
 92 |         "--allow-uncaught=false",
 93 |         "--reporter=json",
 94 |         "--reporter-option",
 95 |         `output=${reportFile}`,
 96 |         "--",
 97 |         testFile,
 98 |       ],
 99 |       {
100 |         timeout: 5000,
101 |         killSignal: "SIGKILL",
102 |       }
103 |     );
104 |     performance.measure(`duration:${testName}`, `start:${testName}`);
105 |     const stderr = res.stderr.toString();
106 |     const report = MochaValidator.tryParseReport(reportFile);
107 | 
108 |     // parse test results; this is a bit complicated since Mocha sometimes reports asynchroneous tests
109 |     // as both passed and failed; we want to make sure to count them as failed
110 |     let outcome: TestOutcome = TestOutcome.OTHER;
111 |     if (
112 |       res.status != 0 ||
113 |       stderr.includes("AssertionError") ||
114 |       !report ||
115 |       report.failures.length > 0
116 |     ) {
117 |       // we need to construct a ITestFailureInfo object
118 |       // first, try to get it from the report
119 |       if (
120 |         report &&
121 |         report.failures.length > 0 &&
122 |         report.failures[0].err.message
123 |       ) {
124 |         outcome = TestOutcome.FAILED(
125 |           this.scrubTestDirFromError(report.failures[0].err as ITestFailureInfo)
126 |         );
127 |       } else {
128 |         // if that fails, try to get it from stderr
129 |         const match = stderr.match(/(AssertionError: .*)/);
130 |         if (match) {
131 |           outcome = TestOutcome.FAILED(
132 |             this.scrubTestDirFromError({ message: match[1] })
133 |           );
134 |         } else {
135 |           // if that fails, just use the whole stderr or (if that's empty) the exit code
136 |           outcome = TestOutcome.FAILED(
137 |             this.scrubTestDirFromError({
138 |               message: stderr ?? `Mocha exited with code ${res.status}`,
139 |             })
140 |           );
141 |         }
142 |       }
143 |     } else {
144 |       // further sanity check: there should be exactly one result (either passed or pending)
145 |       const numResults = report.passes.length + report.pending.length;
146 |       if (numResults != 1) {
147 |         throw new Error(`Expected 1 test result, got ${numResults}`);
148 |       }
149 | 
150 |       if (report.passes.length > 0) {
151 |         outcome = TestOutcome.PASSED(coverageReport, coverageDir);
152 |         this.coverageDirs.push(coverageDir);
153 |       } else {
154 |         outcome = TestOutcome.PENDING;
155 |       }
156 |     }
157 | 
158 |     // no need to keep coverage data for invalid tests
159 |     if (outcome.status != "PASSED") {
160 |       fs.rmdirSync(coverageDir, { recursive: true });
161 |     }
162 |     return outcome;
163 |   }
164 | 
165 |   private static tryParseReport(reportFile: string): IMochaReport | undefined {
166 |     try {
167 |       return JSON.parse(fs.readFileSync(reportFile, "utf8"));
168 |     } catch (e: any) {
169 |       console.warn(`Error parsing coverage report: ${e}`);
170 |       return undefined;
171 |     }
172 |   }
173 | 
174 |   public computeCoverageSummary(): ICoverageSummary {
175 |     if (this.coverageDirs.length == 0) {
176 |       return emptyCoverageSummary();
177 |     }
178 | 
179 |     const testDir = fs.mkdtempSync(path.join(this.packagePath, "test-"));
180 |     try {
181 |       // create/clean .nyc_output directory
182 |       const nycOutput = path.join(this.packagePath, ".nyc_output");
183 |       if (fs.existsSync(nycOutput)) {
184 |         fs.rmdirSync(nycOutput, { recursive: true });
185 |       }
186 |       fs.mkdirSync(nycOutput);
187 | 
188 |       // copy all .json files from coverageDirs to nycOutput
189 |       for (const coverageDir of this.coverageDirs) {
190 |         MochaValidator.copyCoverageData(coverageDir, nycOutput);
191 |       }
192 | 
193 |       // create nyc report
194 |       child_process.spawnSync(
195 |         path.join(__dirname, "..", "node_modules", ".bin", "nyc"),
196 |         [
197 |           `--report-dir=${path.join(testDir, "coverage")}`,
198 |           "--reporter=json-summary",
199 |           "report",
200 |         ],
201 |         {
202 |           cwd: this.packagePath,
203 |           stdio: "inherit",
204 |         }
205 |       );
206 | 
207 |       const coverageSummaryFileName = path.join(
208 |         testDir,
209 |         "coverage",
210 |         "coverage-summary.json"
211 |       );
212 |       if (fs.existsSync(coverageSummaryFileName)) {
213 |         return JSON.parse(fs.readFileSync(coverageSummaryFileName, "utf8"));
214 |       } else {
215 |         throw new Error(
216 |           `Failed to generate coverage summary: ${coverageSummaryFileName} does not exist.`
217 |         );
218 |       }
219 |     } finally {
220 |       fs.rmdirSync(testDir, { recursive: true });
221 |     }
222 |   }
223 | 
224 |   /**
225 |    * Copy all .json files from `src` to `dest` (which must exist).
226 |    */
227 |   public static copyCoverageData(src: string, dest: string) {
228 |     for (const file of fs.readdirSync(src)) {
229 |       if (file.endsWith(".json") && file !== "coverage-final.json") {
230 |         fs.copyFileSync(path.join(src, file), path.join(dest, file));
231 |       }
232 |     }
233 |   }
234 | 
235 |   public cleanup(): void {
236 |     for (const coverageDir of this.coverageDirs) {
237 |       fs.rmdirSync(coverageDir, { recursive: true });
238 |     }
239 |   }
240 | }
241 | 


--------------------------------------------------------------------------------
/benchmark/generate_report.ts:
--------------------------------------------------------------------------------
  1 | import fs from "fs";
  2 | import {
  3 |   CoverageStats,
  4 |   FailureStats,
  5 |   parseReports,
  6 |   RefinerStats,
  7 |   SimilarityStats,
  8 | } from "./parse_reports";
  9 | 
 10 | function percentage(p: number | string) {
 11 |   if (typeof p === "number") {
 12 |     return `${p.toFixed(2)}%`;
 13 |   } else {
 14 |     return p;
 15 |   }
 16 | }
 17 | 
 18 | type DiffCoverageStats = {
 19 |   [packageName: string]: { [key: keyof CoverageStats]: string | number };
 20 | };
 21 | 
 22 | function printCoverageReport(
 23 |   title: string,
 24 |   stats: CoverageStats | DiffCoverageStats
 25 | ) {
 26 |   console.log(`
 27 | # ${title}
 28 | Project | # Snippets Available | # Tests | # Passing Tests |  Statement coverage | # Non-trivial tests | # Non-trivial passing tests | Statement coverage by non-trivial tests
 29 | --- |  --: | --: | --: | --: | --: | --: | --:`);
 30 |   for (const {
 31 |     proj,
 32 |     nrUniqueSnippets,
 33 |     numTests,
 34 |     numPassing,
 35 |     coverage,
 36 |     nonTrivialTests,
 37 |     nonTrivialPassing,
 38 |     nonTrivialCoverage,
 39 |   } of Object.values(stats)) {
 40 |     console.log(
 41 |       `${proj} | ${nrUniqueSnippets} | ${numTests} | ${numPassing} | ${percentage(
 42 |         coverage
 43 |       )} | ${nonTrivialTests} | ${nonTrivialPassing} | ${percentage(
 44 |         nonTrivialCoverage
 45 |       )}`
 46 |     );
 47 |   }
 48 | }
 49 | 
 50 | function printFailureReport(
 51 |   title: string,
 52 |   stats: FailureStats,
 53 |   showPercentages = true
 54 | ) {
 55 |   console.log(`
 56 | # ${title}
 57 | Project | # FailedTests | # AssertionErrors | # FileSysErrors | # CorrectnessErrors |  # Timeout | # Other
 58 | --- |  --: | --: | --: | --: | --: | --:|`);
 59 |   for (const {
 60 |     proj,
 61 |     numFailing,
 62 |     numAssertionErrors,
 63 |     numFileSysErrors,
 64 |     numCorrectnessErrors,
 65 |     numTimeoutErrors,
 66 |     numOther,
 67 |   } of Object.values(stats)) {
 68 |     console.log(
 69 |       `${proj} | ${numFailing} | ${formatNum(
 70 |         numAssertionErrors,
 71 |         numFailing,
 72 |         showPercentages
 73 |       )} | ${formatNum(
 74 |         numFileSysErrors,
 75 |         numFailing,
 76 |         showPercentages
 77 |       )} | ${formatNum(
 78 |         numCorrectnessErrors,
 79 |         numFailing,
 80 |         showPercentages
 81 |       )} | ${formatNum(
 82 |         numTimeoutErrors,
 83 |         numFailing,
 84 |         showPercentages
 85 |       )} | ${formatNum(numOther, numFailing, showPercentages)}`
 86 |     );
 87 |   }
 88 | }
 89 | 
 90 | function printRefinerReport(title: string, stats: RefinerStats) {
 91 |   const refinerNames = Array.from(stats.refinerNames).sort();
 92 |   console.log(`
 93 | # ${title}
 94 | Project | ${refinerNames.join(" | ")}
 95 | --- | ${"--: |".repeat(refinerNames.length)}`);
 96 |   for (const { proj, refinersData } of Object.values(stats.stats)) {
 97 |     if (!proj) continue;
 98 |     console.log(
 99 |       `${proj} | ${refinerNames
100 |         .map((name) =>
101 |           name in refinersData ? refinersData[name].coverage + "%" : "--"
102 |         )
103 |         .join(" | ")}`
104 |     );
105 |   }
106 | }
107 | 
108 | function printSimilarityReport(title: string, stats: SimilarityStats) {
109 |   console.log(`
110 | # ${title}
111 | Project | numGeneratedTests | numExistingTests | maxSimilarity
112 | --- |  --: | --: | --:`);
113 | 
114 |   for (const { proj, similarityReport } of Object.values(stats)) {
115 |     console.log(
116 |       `${proj} | ${similarityReport.numGeneratedTests} | ${similarityReport.numExistingTests} | ${similarityReport.maxSimilarity}`
117 |     );
118 |   }
119 | }
120 | 
121 | function formatNum(
122 |   number: number,
123 |   denominator: number,
124 |   showPercentages = true
125 | ) {
126 |   if (denominator == 0) return "--";
127 |   if (showPercentages)
128 |     return `${number} (${((number / denominator) * 100).toFixed(2)}%)`;
129 |   else return `${number}`;
130 | }
131 | 
132 | function coverageDiff(cov1: number | "unknown", cov2: number | "unknown") {
133 |   if (cov1 === "unknown" || cov2 === "unknown") {
134 |     return "unknown";
135 |   } else {
136 |     return (cov1 - cov2).toFixed(2);
137 |   }
138 | }
139 | 
140 | function compareCovToBaseline(baselineCovStats: CoverageStats) {
141 |   const diffStats: DiffCoverageStats = {};
142 |   for (const [packageName, projStats] of Object.entries(coverageStats)) {
143 |     const baseline = baselineCovStats[packageName];
144 | 
145 |     // print diff if the same config is in the baseline, otherwise, skip diff for this config
146 |     if (baseline) {
147 |       const nonTrivialTestDiff =
148 |         projStats.nonTrivialTests - baseline.nonTrivialTests;
149 |       const nonTrivialPassingDiff =
150 |         projStats.nonTrivialPassing - baseline.nonTrivialPassing;
151 |       diffStats[packageName] = {
152 |         proj: projStats.proj,
153 |         nrUniqueSnippets: ppDiff(
154 |           projStats.nrUniqueSnippets - baseline.nrUniqueSnippets
155 |         ),
156 |         numTests: ppDiff(projStats.numTests - baseline.numTests),
157 |         numPassing: ppDiff(projStats.numPassing - baseline.numPassing),
158 |         coverage: ppDiff(
159 |           coverageDiff(projStats.stmtCoverage, baseline.stmtCoverage)
160 |         ),
161 |         nonTrivialTests: ppDiff(nonTrivialTestDiff),
162 |         nonTrivialPassing: ppDiff(nonTrivialPassingDiff),
163 |         nonTrivialCoverage: ppDiff(
164 |           coverageDiff(
165 |             projStats.nonTrivialCoverage,
166 |             baseline.nonTrivialCoverage
167 |           )
168 |         ),
169 |       };
170 |     }
171 |   }
172 |   printCoverageReport("Coverage Comparison to baseline", diffStats);
173 | }
174 | 
175 | function compareFailuresToBaseline(baselineFailureStats: any) {
176 |   const diffStats: any = {};
177 |   for (const [packageName, projStats] of Object.entries(failureStats)) {
178 |     const baseline = baselineFailureStats[packageName];
179 | 
180 |     //print diff if the same config is in the baseline, otherwise, skip diff for this config
181 |     if (baseline) {
182 |       diffStats[packageName] = {
183 |         proj: projStats.proj,
184 |         numFailing: ppDiff(projStats.numFailing - baseline.numFailing, true),
185 |         numAssertionErrors: ppDiff(
186 |           projStats.numAssertionErrors - baseline.numAssertionErrors
187 |         ),
188 |         numFileSysErrors: ppDiff(
189 |           projStats.numFileSysErrors - baseline.numFileSysErrors
190 |         ),
191 |         numCorrectnessErrors: ppDiff(
192 |           projStats.numCorrectnessErrors - baseline.numCorrectnessErrors
193 |         ),
194 |         numTimeoutErrors: ppDiff(
195 |           projStats.numTimeoutErrors - baseline.numTimeoutErrors
196 |         ),
197 |         numOther: ppDiff(projStats.numOther - baseline.numOther),
198 |       };
199 |     }
200 |   }
201 |   printFailureReport("Failure Comparison to baseline", diffStats, false);
202 | }
203 | 
204 | function ppDiff(d: number | string, lowerIsBetter = false) {
205 |   let s;
206 |   if (d > 0) {
207 |     s = `+${d}`;
208 |   } else if (d == 0) {
209 |     s = "±0";
210 |   } else {
211 |     s = String(d);
212 |   }
213 |   if (lowerIsBetter ? d < 0 : d > 0) {
214 |     return `**${s}**`;
215 |   } else {
216 |     return s;
217 |   }
218 | }
219 | 
220 | if (process.argv.length < 3 || process.argv.length > 5) {
221 |   console.error(
222 |     "Usage: node generate_report.js [<config.json>] <artifact_dir> [<baseline_artifact_dir>]"
223 |   );
224 |   process.exit(1);
225 | }
226 | const hasConfig = fs.lstatSync(process.argv[2]).isFile();
227 | const config = hasConfig
228 |   ? JSON.parse(fs.readFileSync(process.argv[2], "utf8"))
229 |   : {};
230 | const artifactDir = hasConfig ? process.argv[3] : process.argv[2];
231 | const baselineArtifactDir = hasConfig ? process.argv[4] : process.argv[3];
232 | 
233 | console.log(`
234 | # Parameters
235 | - snippets from: ${config.snippetsFrom}
236 | - snippet length: ${config.snippetLength}
237 | - numSnippets: ${config.numSnippets}
238 | - temperatures: ${config.temperatures}
239 | - number of completions: ${config.numCompletions}`);
240 | 
241 | const { coverageStats, failureStats, refinersStats, similarityStats } =
242 |   parseReports(artifactDir);
243 | 
244 | printCoverageReport("Coverage report", coverageStats);
245 | printFailureReport("Failure report", failureStats);
246 | printRefinerReport("Coverage when excluding refiners", refinersStats);
247 | printSimilarityReport(
248 |   "Similarity of generated tests to existing tests",
249 |   similarityStats
250 | );
251 | 
252 | if (baselineArtifactDir) {
253 |   const baselineResults = parseReports(baselineArtifactDir);
254 |   const baselineCovStats = baselineResults.coverageStats;
255 |   const baselineFailureStats = baselineResults.failureStats;
256 |   compareCovToBaseline(baselineCovStats);
257 |   compareFailuresToBaseline(baselineFailureStats);
258 | }
259 | 


--------------------------------------------------------------------------------
/benchmark/run.ts:
--------------------------------------------------------------------------------
  1 | import fs from "fs";
  2 | import path from "path";
  3 | import { performance } from "perf_hooks";
  4 | import {
  5 |   APIFunction,
  6 |   Codex,
  7 |   exploreAPI,
  8 |   FunctionDescriptor,
  9 |   getDocSnippets,
 10 |   getSnippets,
 11 |   ICompletionModel,
 12 |   MochaValidator,
 13 |   MockCompletionModel,
 14 |   TestGenerator,
 15 |   TestValidator,
 16 | } from "..";
 17 | import yargs from "yargs";
 18 | import { hideBin } from "yargs/helpers";
 19 | import { PerformanceMeasurer } from "./performanceMeasurer";
 20 | import { TestResultCollector } from "./testResultCollector";
 21 | require("console-stamp")(console);
 22 | 
 23 | /**
 24 |  * Run an end-to-end experiment.
 25 |  * Given a package generate tests for its methods, run them, and generate a report.
 26 |  * @param model The completion model to use.
 27 |  * @param packageName The name of the package to use.
 28 |  * @param packagePath The path to the package to use.
 29 |  * @param functions The list of functions in the API.
 30 |  * @param snippetMap The snippets for package methods.
 31 |  * @param timeLimit The maximum time (in milliseconds) to run the experiment.
 32 |  */
 33 | export async function runExperiment(
 34 |   functions: APIFunction[],
 35 |   temperatures: number[],
 36 |   snippetMap: Map<string, string[]>,
 37 |   model: ICompletionModel,
 38 |   validator: TestValidator,
 39 |   collector: TestResultCollector,
 40 |   timeLimit: number
 41 | ): Promise<void> {
 42 |   const deadline = performance.now() + timeLimit;
 43 |   const generator = new TestGenerator(
 44 |     temperatures,
 45 |     (fn) => snippetMap.get(fn),
 46 |     model,
 47 |     validator,
 48 |     collector
 49 |   );
 50 | 
 51 |   // initialize the workList with all functions
 52 |   let workList = functions.map((f) => ({ fun: f, nrTimesExtended: 0 }));
 53 | 
 54 |   while (workList.length > 0) {
 55 |     if (performance.now() > deadline) {
 56 |       console.log(
 57 |         `Time limit reached, ${workList.length} worklist items ignored.`
 58 |       );
 59 |       break;
 60 |     }
 61 | 
 62 |     const { fun } = workList.shift()!;
 63 |     await generator.generateAndValidateTests(fun);
 64 |   }
 65 | 
 66 |   collector.recordCoverageInfo(validator.computeCoverageSummary());
 67 | }
 68 | 
 69 | if (require.main === module) {
 70 |   (async () => {
 71 |     const parser = yargs(hideBin(process.argv))
 72 |       .strict()
 73 |       .options({
 74 |         outputDir: {
 75 |           type: "string",
 76 |           demandOption: true,
 77 |           description: "directory where output files will be placed",
 78 |         },
 79 |         package: {
 80 |           type: "string",
 81 |           demandOption: true,
 82 |           description: "package source",
 83 |         },
 84 |         api: {
 85 |           type: "string",
 86 |           description:
 87 |             "JSON file with API to generate tests for (usually api.json from a previous run)",
 88 |         },
 89 |         snippets: {
 90 |           type: "string",
 91 |           choices: ["code", "doc", "both", "none"],
 92 |           default: "doc",
 93 |           description: "where to collect usage snippets from",
 94 |         },
 95 |         database: {
 96 |           type: "string",
 97 |           description:
 98 |             "CodeQL database; only required if collecting snippets from code",
 99 |         },
100 |         responses: {
101 |           type: "string",
102 |           description:
103 |             "file with simulated model responses (usually prompts.json from a previous run)",
104 |         },
105 |         timeLimit: {
106 |           type: "number",
107 |           default: 5 * 60 * 60,
108 |           description: "time limit in seconds (default is five hours)",
109 |         },
110 |         numSnippets: {
111 |           default: "all",
112 |           description:
113 |             'number of snippets to include in the prompt, or "all" to include all snippets',
114 |         },
115 |         snippetLength: {
116 |           type: "number",
117 |           default: 20,
118 |           description: "maximum length of each snippet in lines",
119 |         },
120 |         temperatures: {
121 |           type: "string",
122 |           default: "0.0",
123 |           description:
124 |             "whitespace-separated list of sampling temperatures to try when obtaining completions",
125 |         },
126 |         numCompletions: {
127 |           type: "number",
128 |           default: 5,
129 |           description: "number of completions to generate for each prompt",
130 |         },
131 |         strictResponses: {
132 |           type: "boolean",
133 |           default: true,
134 |           description:
135 |             "whether to require that all prompts are found when running with --responses; does not have any effect otherwise",
136 |         },
137 |         model: {
138 |           type: "string",
139 |           choices: ["gpt", "starcoder"],
140 |           default: "gpt",
141 |           description: "LLM api to use",
142 |         },
143 |       });
144 |     const argv = await parser.argv;
145 | 
146 |     var model: ICompletionModel;
147 |     if (!argv.responses) {
148 |       if (argv.strictResponses) {
149 |         console.warn(
150 |           "Warning: --strictResponses has no effect when not using --responses"
151 |         );
152 |       }
153 |       model = new Codex(argv.model === "starcoder", { n: argv.numCompletions });
154 |     } else {
155 |       model = MockCompletionModel.fromFile(
156 |         argv.responses,
157 |         argv.strictResponses
158 |       );
159 |     }
160 | 
161 |     const packagePath = argv.package;
162 |     const packageName = JSON.parse(
163 |       fs.readFileSync(path.join(packagePath, "package.json"), "utf8")
164 |     ).name;
165 |     const perf = new PerformanceMeasurer();
166 |     console.log(`Running experiment for ${packageName}`);
167 | 
168 |     let api: APIFunction[];
169 |     if (argv.api) {
170 |       console.log(`Loading API from ${argv.api}`);
171 |       const rawApi: {
172 |         accessPath: string;
173 |         descriptor: FunctionDescriptor;
174 |       }[] = JSON.parse(fs.readFileSync(argv.api, "utf8"));
175 |       api = rawApi.map(
176 |         ({ accessPath, descriptor }) =>
177 |           new APIFunction(accessPath, descriptor, packageName)
178 |       );
179 |     } else {
180 |       console.log("Exploring API");
181 |       api = Array.from(exploreAPI(packagePath).getFunctions(packageName));
182 |     }
183 | 
184 |     let numSnippets: number | "all" =
185 |       argv.numSnippets === "all" ? argv.numSnippets : +argv.numSnippets;
186 |     if (numSnippets !== "all" && !(numSnippets >= 0)) {
187 |       throw new Error(`Invalid value for --numSnippets: ${argv.numSnippets}`);
188 |     }
189 | 
190 |     performance.mark("snippet-extraction-start");
191 |     let allSnippets = new Map<string, string[]>();
192 |     if (numSnippets !== 0) {
193 |       console.log("Extracting snippets");
194 |       const functionNames = api.map((f) => f.functionName);
195 |       if (argv.snippets == "code") {
196 |         if (!argv.database) {
197 |           throw new Error("--database is required if --snippets is code");
198 |         }
199 |         if (numSnippets === "all") {
200 |           throw new Error(
201 |             "--numSnippets=all is not supported when collecting snippets from code"
202 |           );
203 |         }
204 |         allSnippets = getSnippets(
205 |           argv.database,
206 |           numSnippets,
207 |           functionNames,
208 |           argv.snippetLength
209 |         );
210 |       } else if (argv.snippets == "doc") {
211 |         if (argv.database) {
212 |           console.warn("--database is ignored if --snippets is doc");
213 |         }
214 |         allSnippets = getDocSnippets(
215 |           packagePath,
216 |           numSnippets,
217 |           functionNames,
218 |           argv.snippetLength
219 |         );
220 |       } else if (argv.snippets == "both") {
221 |         if (!argv.database) {
222 |           throw new Error("--database is required if --snippets is code");
223 |         }
224 |         if (numSnippets === "all") {
225 |           throw new Error(
226 |             "--numSnippets=all is not supported when collecting snippets from code"
227 |           );
228 |         }
229 |         const snippets = getSnippets(
230 |           argv.database,
231 |           numSnippets,
232 |           functionNames,
233 |           argv.snippetLength
234 |         );
235 |         const docSnippets = getDocSnippets(
236 |           packagePath,
237 |           numSnippets,
238 |           functionNames,
239 |           argv.snippetLength
240 |         );
241 |         for (const [key, value] of snippets.entries()) {
242 |           allSnippets.set(key, [...value, ...(docSnippets.get(key) || [])]);
243 |         }
244 |       } else {
245 |         if (argv.database) {
246 |           console.warn("--database is ignored if --snippets is none");
247 |         }
248 |       }
249 |     }
250 |     performance.measure("snippet-extraction", "snippet-extraction-start");
251 | 
252 |     console.log("Generating tests");
253 |     const collector = new TestResultCollector(
254 |       packageName,
255 |       packagePath,
256 |       argv.outputDir,
257 |       api,
258 |       allSnippets,
259 |       perf,
260 |       argv.snippets,
261 |       numSnippets,
262 |       argv.snippetLength,
263 |       argv.numCompletions
264 |     );
265 |     const validator = new MochaValidator(packageName, packagePath);
266 |     try {
267 |       await runExperiment(
268 |         api,
269 |         argv.temperatures.split(/\s+/).map(parseFloat),
270 |         allSnippets,
271 |         model,
272 |         validator,
273 |         collector,
274 |         argv.timeLimit * 1000
275 |       );
276 |       collector.report();
277 |       const report = collector.getReport();
278 |       const coverage = report.coverage?.total.statements.pct ?? 0;
279 |       console.log(`${coverage}% statement coverage`);
280 |     } finally {
281 |       validator.cleanup();
282 |     }
283 |   })().catch((e) => {
284 |     console.error(e);
285 |     process.exit(1);
286 |   });
287 | }
288 | 


--------------------------------------------------------------------------------
/src/promptCrafting.ts:
--------------------------------------------------------------------------------
  1 | import dedent from "dedent";
  2 | import { APIFunction, sanitizePackageName } from "./exploreAPI";
  3 | import { TestOutcome, TestStatus } from "./report";
  4 | import { closeBrackets, commentOut, trimAndCombineDocComment } from "./syntax";
  5 | 
  6 | /**
  7 |  * A strategy object for refining a prompt based on the outcome of a test
  8 |  * generated from it.
  9 |  */
 10 | export interface IPromptRefiner {
 11 |   /** A human-readable name for identifying this refiner. */
 12 |   get name(): string;
 13 | 
 14 |   /**
 15 |    * Refine the `original` prompt based on the `outcome` of a test generated
 16 |    * from it and the given `body`.
 17 |    */
 18 |   refine(original: Prompt, body: string, outcome: TestOutcome): Prompt[];
 19 | }
 20 | 
 21 | /**
 22 |  * Options for controlling prompt generation.
 23 |  */
 24 | type PromptOptions = {
 25 |   /** Whether to include usage snippets in the prompt. */
 26 |   includeSnippets: boolean;
 27 |   /** Whether to include the function's doc comment in the prompt. */
 28 |   includeDocComment: boolean;
 29 |   /** Whether to include the function's body in the prompt. */
 30 |   includeFunctionBody: boolean;
 31 | };
 32 | 
 33 | export function defaultPromptOptions(): PromptOptions {
 34 |   return {
 35 |     includeSnippets: false,
 36 |     includeDocComment: false,
 37 |     includeFunctionBody: false,
 38 |   };
 39 | }
 40 | 
 41 | /**
 42 |  * Structured representation of a prompt we send to the model.
 43 |  *
 44 |  * In general, our prompts look like this:
 45 |  *
 46 |  * ```js
 47 |  * let mocha = require('mocha');            // -+
 48 |  * let assert = require('assert');          //  | Imports
 49 |  * let pkg = require('pkg');                // -+
 50 |  *
 51 |  * // usage #1                              // -+
 52 |  * ...                                      //  |
 53 |  * // usage #2                              //  | Usage snippets
 54 |  * ...                                      // -+
 55 |  *
 56 |  * // this does...                          // -+
 57 |  * // @param foo                            //  |
 58 |  * // @returns bar                          //  | Doc comment
 59 |  * ...                                      // -+
 60 |  *
 61 |  * // fn(args)                              //    Signature of the function we're testing
 62 |  * // function fn(args) {                   // -+
 63 |  * //     ...                               //  | Function body (optional)
 64 |  * // }                                     // -+
 65 |  *
 66 |  * describe('test pkg', function() {        //    Test suite header
 67 |  *   it('test fn', function(done) {         //    Test case header
 68 |  * ```
 69 |  *
 70 |  * The structured representation keeps track of these parts and provides methods
 71 |  * to assemble them into a textual prompt and complete them into a test case.
 72 |  */
 73 | export class Prompt {
 74 |   private readonly imports: string;
 75 |   private readonly signature: string;
 76 |   private readonly docComment: string;
 77 |   private readonly functionBody: string;
 78 |   private readonly suiteHeader: string;
 79 |   protected readonly testHeader: string;
 80 |   public readonly provenance: PromptProvenance[] = [];
 81 | 
 82 |   constructor(
 83 |     public readonly fun: APIFunction,
 84 |     public readonly usageSnippets: string[],
 85 |     public readonly options: PromptOptions
 86 |   ) {
 87 |     const sanitizedPackageName = sanitizePackageName(fun.packageName);
 88 |     this.imports = dedent`
 89 |             let mocha = require('mocha');
 90 |             let assert = require('assert');
 91 |             let ${sanitizedPackageName} = require('${fun.packageName}');\n`;
 92 | 
 93 |     this.signature = commentOut(fun.signature);
 94 | 
 95 |     if (options.includeFunctionBody) {
 96 |       this.functionBody = commentOut(fun.descriptor.implementation);
 97 |     } else {
 98 |       this.functionBody = "";
 99 |     }
100 | 
101 |     this.suiteHeader = `describe('test ${sanitizedPackageName}', function() {\n`;
102 |     this.testHeader = `    it('test ${fun.accessPath}', function(done) {\n`;
103 | 
104 |     if (options.includeDocComment) {
105 |       this.docComment = trimAndCombineDocComment(
106 |         fun.descriptor.docComment ?? ""
107 |       );
108 |     } else {
109 |       this.docComment = "";
110 |     }
111 |   }
112 | 
113 |   /**
114 |    * Assemble the usage snippets into a single string.
115 |    */
116 |   private assembleUsageSnippets(): string {
117 |     if (!this.options.includeSnippets) {
118 |       return "";
119 |     } else {
120 |       return this.usageSnippets
121 |         .map((snippet, index) => {
122 |           const lines = snippet.split("\n");
123 |           const commentedLines = lines.map((line) => `// ${line}\n`);
124 |           return `// usage #${index + 1}\n` + commentedLines.join("");
125 |         })
126 |         .join("");
127 |     }
128 |   }
129 | 
130 |   /**
131 |    * Assemble a prompt to send to the model from the structured
132 |    * representation.
133 |    */
134 |   public assemble(): string {
135 |     return (
136 |       this.imports +
137 |       this.assembleUsageSnippets() +
138 |       this.docComment +
139 |       this.signature +
140 |       this.functionBody +
141 |       this.suiteHeader +
142 |       this.testHeader
143 |     );
144 |   }
145 | 
146 |   /**
147 |    * Given a test body suggested by the model, assemble a complete,
148 |    * syntactically correct test.
149 |    */
150 |   public completeTest(
151 |     body: string,
152 |     stubOutHeaders: boolean = true
153 |   ): string | undefined {
154 |     let fixed = closeBrackets(
155 |       this.imports +
156 |         (stubOutHeaders
157 |           ? // stub out suite header and test header so we don't double-count identical tests
158 |             "describe('test suite', function() {\n" +
159 |             "    it('test case', function(done) {\n"
160 |           : this.suiteHeader + this.testHeader) +
161 |         // add the body, making sure the first line is indented correctly
162 |         body.replace(/^(?=\S)/, " ".repeat(8)) +
163 |         "\n"
164 |     );
165 |     // beautify closing brackets
166 |     return fixed?.source.replace(/\}\)\}\)$/, "    })\n})");
167 |   }
168 | 
169 |   public withProvenance(...provenanceInfos: PromptProvenance[]): Prompt {
170 |     this.provenance.push(...provenanceInfos);
171 |     return this;
172 |   }
173 | 
174 |   public functionHasDocComment(): boolean {
175 |     return this.fun.descriptor.docComment !== undefined;
176 |   }
177 | }
178 | 
179 | /**
180 |  * A record of how a prompt was generated, including information about which
181 |  * `originalPrompt` it was generated from, information about the test that gave
182 |  * rise to the prompt refinement, and the name of the refiner.
183 |  */
184 | export type PromptProvenance = {
185 |   originalPrompt: Prompt;
186 |   testId: number;
187 |   refiner: string;
188 | };
189 | 
190 | /**
191 |  * A prompt refiner that adds usage snippets to the prompt.
192 |  */
193 | export class SnippetIncluder implements IPromptRefiner {
194 |   public get name(): string {
195 |     return "SnippetIncluder";
196 |   }
197 | 
198 |   public refine(
199 |     original: Prompt,
200 |     completion: string,
201 |     outcome: TestOutcome
202 |   ): Prompt[] {
203 |     if (
204 |       !original.options.includeSnippets &&
205 |       original.usageSnippets.length > 0
206 |     ) {
207 |       return [
208 |         new Prompt(original.fun, original.usageSnippets, {
209 |           ...original.options,
210 |           includeSnippets: true,
211 |         }),
212 |       ];
213 |     }
214 |     return [];
215 |   }
216 | }
217 | 
218 | /**
219 |  * A prompt refiner that adds a function's doc comments to the prompt.
220 |  */
221 | export class DocCommentIncluder implements IPromptRefiner {
222 |   public get name(): string {
223 |     return "DocCommentIncluder";
224 |   }
225 | 
226 |   public refine(
227 |     original: Prompt,
228 |     completion: string,
229 |     outcome: TestOutcome
230 |   ): Prompt[] {
231 |     if (
232 |       !original.options.includeDocComment &&
233 |       original.functionHasDocComment()
234 |     ) {
235 |       return [
236 |         new Prompt(original.fun, original.usageSnippets, {
237 |           ...original.options,
238 |           includeDocComment: true,
239 |         }),
240 |       ];
241 |     }
242 |     return [];
243 |   }
244 | }
245 | 
246 | export class RetryPrompt extends Prompt {
247 |   constructor(
248 |     prev: Prompt,
249 |     private body: string,
250 |     private readonly err: string
251 |   ) {
252 |     super(prev.fun, prev.usageSnippets, prev.options);
253 |   }
254 | 
255 |   public assemble() {
256 |     const rawFailingTest = super.assemble() + this.body + "\n";
257 |     const completedFailingTest = closeBrackets(rawFailingTest);
258 |     let failingTest;
259 |     if (completedFailingTest) {
260 |       failingTest = completedFailingTest.source.replace(
261 |         /\}\)\}\)$/,
262 |         "    })\n"
263 |       );
264 |     } else {
265 |       failingTest = rawFailingTest + "    })\n";
266 |     }
267 | 
268 |     return (
269 |       failingTest +
270 |       "    // the test above fails with the following error:\n" +
271 |       `    //   ${this.err}\n` +
272 |       "    // fixed test:\n" +
273 |       this.testHeader
274 |     );
275 |   }
276 | }
277 | 
278 | /**
279 |  * A prompt refiner that, for a failed test, adds the error message to the
280 |  * prompt and tries again.
281 |  */
282 | export class RetryWithError implements IPromptRefiner {
283 |   public get name(): string {
284 |     return "RetryWithError";
285 |   }
286 | 
287 |   public refine(
288 |     original: Prompt,
289 |     completion: string,
290 |     outcome: TestOutcome
291 |   ): Prompt[] {
292 |     if (
293 |       !(original instanceof RetryPrompt) &&
294 |       outcome.status === TestStatus.FAILED
295 |     ) {
296 |       return [new RetryPrompt(original, completion, outcome.err.message)];
297 |     }
298 |     return [];
299 |   }
300 | }
301 | 
302 | /**
303 |  * A prompt refiner that includes the body of the function in the prompt.
304 |  */
305 | export class FunctionBodyIncluder implements IPromptRefiner {
306 |   public get name(): string {
307 |     return "FunctionBodyIncluder";
308 |   }
309 | 
310 |   public refine(
311 |     original: Prompt,
312 |     completion: string,
313 |     outcome: TestOutcome
314 |   ): Prompt[] {
315 |     if (
316 |       !original.options.includeFunctionBody &&
317 |       original.fun.descriptor.implementation !== ""
318 |     ) {
319 |       return [
320 |         new Prompt(original.fun, original.usageSnippets, {
321 |           ...original.options,
322 |           includeFunctionBody: true,
323 |         }),
324 |       ];
325 |     }
326 |     return [];
327 |   }
328 | }
329 | 


--------------------------------------------------------------------------------
/benchmark/testResultCollector.ts:
--------------------------------------------------------------------------------
  1 | import * as fs from "fs";
  2 | import * as path from "path";
  3 | import {
  4 |   APIFunction,
  5 |   BaseTestResultCollector,
  6 |   IMetaData,
  7 |   ITestInfo,
  8 |   ITestReport,
  9 |   MochaValidator,
 10 |   ReportForTest,
 11 |   TestOutcome,
 12 |   TestStatus,
 13 | } from "..";
 14 | import { PerformanceMeasurer } from "./performanceMeasurer";
 15 | import {
 16 |   createUniqueStmtId,
 17 |   getCoveredStmtsForFile,
 18 | } from "./testCollectorHelper";
 19 | 
 20 | /**
 21 |  * A full-featured test-result collector that can be used to persist information
 22 |  * to disk.
 23 |  */
 24 | export class TestResultCollector extends BaseTestResultCollector {
 25 |   private readonly metaData: IMetaData;
 26 | 
 27 |   /**
 28 |    * constructor registers meta-data associated with a test run
 29 |    *
 30 |    * @param outputDir: the directory in which to write the report and other files
 31 |    * @param snippetsTypeAsString: the type of snippets used to generate the tests (code, doc, both, or none)
 32 |    * @param numSnippets: number of snippets to include in a prompt (default 3)
 33 |    * @param snippetLength: length of each snippet (maximum length of each snippet in lines (default 20 lines))
 34 |    * @param temperature: sampling temperature for obtaining completions (default 0)
 35 |    * @param numCompletions: number of completions to obtain for each prompt (default 5)
 36 |    */
 37 |   constructor(
 38 |     packageName: string,
 39 |     private readonly packagePath: string,
 40 |     private readonly outputDir: string,
 41 |     private readonly api: APIFunction[],
 42 |     private readonly snippetMap: Map<string, string[]>,
 43 |     private readonly perf: PerformanceMeasurer,
 44 |     snippetsTypeAsString: string,
 45 |     numSnippets: number | "all",
 46 |     snippetLength: number,
 47 |     numCompletions: number
 48 |   ) {
 49 |     super();
 50 |     this.metaData = {
 51 |       packageName,
 52 |       useDocSnippets:
 53 |         snippetsTypeAsString === "doc" || snippetsTypeAsString === "both",
 54 |       useCodeSnippets:
 55 |         snippetsTypeAsString === "code" || snippetsTypeAsString === "both",
 56 |       numSnippets,
 57 |       snippetLength,
 58 |       numCompletions,
 59 |     };
 60 |     this.createOutputDir();
 61 |   }
 62 | 
 63 |   private getTestsWithStatus(status: TestStatus) {
 64 |     return [...this.tests.values()].filter(
 65 |       (test) => test.outcome.status === status
 66 |     );
 67 |   }
 68 | 
 69 |   public getNrPasses() {
 70 |     return this.getTestsWithStatus(TestStatus.PASSED).length;
 71 |   }
 72 | 
 73 |   public getNrFailures() {
 74 |     return this.getTestsWithStatus(TestStatus.FAILED).length;
 75 |   }
 76 | 
 77 |   public getNrPending() {
 78 |     return this.getTestsWithStatus(TestStatus.PENDING).length;
 79 |   }
 80 | 
 81 |   public getNrOther() {
 82 |     return this.getTestsWithStatus(TestStatus.OTHER).length;
 83 |   }
 84 | 
 85 |   public getReport(): ITestReport {
 86 |     return {
 87 |       metaData: this.metaData,
 88 |       nrUniqueSnippets: this.computeNrUniqueSnippets(),
 89 |       stats: {
 90 |         nrTests: this.tests.size,
 91 |         nrPasses: this.getNrPasses(),
 92 |         nrFailures: this.getNrFailures(),
 93 |         nrPending: this.getNrPending(),
 94 |         nrOther: this.getNrOther(),
 95 |         apiExplorationTime: this.perf.getApiExplorationTime()!,
 96 |         docCommentExtractionTime: this.perf.getDocCommentExtractionTime()!,
 97 |         snippetExtractionTime: this.perf.getSnippetExtractionTime()!,
 98 |         codexQueryTime: this.perf.getTotalCodexQueryTime(),
 99 |         totalTime: this.perf.getTotalTime(),
100 |       },
101 |       tests: [...this.tests.values()].map(this.getReportForTest, this),
102 |       coverage: this.coverageSummary,
103 |     };
104 |   }
105 | 
106 |   private getReportForTest(test: ITestInfo): ReportForTest {
107 |     const promptIds = test.prompts.map(
108 |       (prompt) => this.prompts.get(prompt)!.id
109 |     );
110 |     const err =
111 |       test.outcome.status === TestStatus.FAILED ? test.outcome.err : {};
112 |     const coveredStatements = this.getCoveredStatements(test.outcome);
113 |     return {
114 |       testName: test.testName,
115 |       api: test.api,
116 |       testFile: test.testName,
117 |       promptIds: promptIds,
118 |       status: test.outcome.status as TestStatus,
119 |       err: err,
120 |       coveredStatements: coveredStatements,
121 |       duration: this.perf.getTestDuration(test.testName),
122 |     };
123 |   }
124 | 
125 |   /**
126 |    * Get the list of statements covered by the test with the given outcome.
127 |    *
128 |    * Tests that do not pass or that do not have a coverage summary are not
129 |    * considered to cover any statements. For passing tests, covered statements are
130 |    * represented in the form
131 |    * '<file>@<startLine>:<startColumn>-<endLine>:<endColumn>'.
132 |    */
133 |   private getCoveredStatements(outcome: TestOutcome) {
134 |     if (
135 |       outcome.status !== TestStatus.PASSED ||
136 |       outcome.coverageReport === undefined
137 |     ) {
138 |       return [];
139 |     }
140 |     const coveredStatements = [];
141 |     const coverage = JSON.parse(
142 |       fs.readFileSync(outcome.coverageReport, "utf8")
143 |     );
144 |     for (const file of Object.keys(coverage)) {
145 |       const relpath = path.relative(this.packagePath, coverage[file].path);
146 |       coveredStatements.push(
147 |         ...getCoveredStmtsForFile(coverage[file], relpath)
148 |       );
149 |     }
150 |     return coveredStatements;
151 |   }
152 | 
153 |   /**
154 |    * compute the number of unique snippets that are available in the snippet map
155 |    * @returns the number of unique snippets
156 |    */
157 |   private computeNrUniqueSnippets(): number {
158 |     const uniqueSnippets = new Set<string>();
159 |     for (const snippetGroup of this.snippetMap.values()) {
160 |       for (const snippet of snippetGroup) {
161 |         uniqueSnippets.add(snippet);
162 |       }
163 |     }
164 |     return uniqueSnippets.size;
165 |   }
166 | 
167 |   /**
168 |    * For passing tests, preprend a checkmark and make the text green.
169 |    * For failing tests, prepend an 'x' and make the text red.
170 |    * For other tests, prepend a '?' and make the text purple.
171 |    */
172 |   private getTestLabel(test: ITestInfo): string {
173 |     const testName = test.testName;
174 |     if (test.outcome.status === TestStatus.PASSED) {
175 |       return "\u001b[32m" + "\u2713" + testName + "\u001b[0m";
176 |     } else if (test.outcome.status === TestStatus.FAILED) {
177 |       return "\u001b[31m" + "\u2717" + testName + "\u001b[0m";
178 |     } else {
179 |       return "\u001b[35m" + "\u2753" + testName + "\u001b[0m";
180 |     }
181 |   }
182 | 
183 |   /**
184 |    * print summary of test results for each API method
185 |    */
186 |   private reportAPICoverage() {
187 |     console.log("API coverage:");
188 |     const testsPerAPI = new Map<string, Set<ITestInfo>>();
189 |     for (const test of this.tests.values()) {
190 |       const api = test.api;
191 |       if (!testsPerAPI.has(api)) {
192 |         testsPerAPI.set(api, new Set<ITestInfo>());
193 |       }
194 |       testsPerAPI.get(api)!.add(test);
195 |     }
196 |     for (const [api, tests] of testsPerAPI.entries()) {
197 |       const testLabels = [...tests].map((test) => this.getTestLabel(test));
198 |       console.log(`  ${api}: ${[...testLabels.values()].join(", ")}`);
199 |     }
200 |   }
201 | 
202 |   public report() {
203 |     // write report to 'report.json' in the specified output directory
204 |     const report = this.getReport();
205 |     fs.writeFileSync(
206 |       path.join(this.outputDir, "report.json"),
207 |       JSON.stringify(report, null, 2)
208 |     );
209 | 
210 |     // write out tests to 'tests' directory
211 |     const testOutputDir = path.join(this.outputDir, "tests");
212 |     const coverageDataDir = path.join(this.outputDir, "coverageData");
213 |     for (const { testName, testSource, outcome } of this.tests.values()) {
214 |       fs.writeFileSync(path.join(testOutputDir, testName), testSource);
215 | 
216 |       // copy coverage data if available
217 |       if (outcome.status === "PASSED" && outcome.coverageData) {
218 |         const destDir = path.join(
219 |           coverageDataDir,
220 |           path.basename(testName, ".js")
221 |         );
222 |         fs.mkdirSync(destDir, { recursive: true });
223 |         MochaValidator.copyCoverageData(outcome.coverageData, destDir);
224 |       }
225 |     }
226 | 
227 |     // write out prompts to 'prompts' directory, and summary of prompts to 'prompts.json'
228 |     const promptOutputDir = path.join(this.outputDir, "prompts");
229 |     for (const promptInfo of this.prompts.values()) {
230 |       fs.writeFileSync(
231 |         path.join(promptOutputDir, promptInfo.file),
232 |         promptInfo.prompt.assemble()
233 |       );
234 |     }
235 |     let prompts = {
236 |       metaData: this.metaData,
237 |       prompts: [...this.prompts.values()].map(
238 |         ({ prompt, id, file, temperature, completions }) => {
239 |           const tests = [...this.tests.values()]
240 |             .filter((test) => test.prompts.includes(prompt))
241 |             .map((test) => test.testName);
242 |           const provenance = prompt.provenance.map((p) => ({
243 |             originalPrompt: this.prompts.get(p.originalPrompt)!.id,
244 |             test: p.testId,
245 |             refiner: p.refiner,
246 |           }));
247 |           return {
248 |             id,
249 |             file,
250 |             temperature,
251 |             completions: [...completions.values()],
252 |             tests,
253 |             provenance,
254 |           };
255 |         }
256 |       ),
257 |     };
258 |     fs.writeFileSync(
259 |       path.join(this.outputDir, "prompts.json"),
260 |       JSON.stringify(prompts, null, 2)
261 |     );
262 | 
263 |     // write API info to 'api.json'
264 |     fs.writeFileSync(
265 |       path.join(this.outputDir, "api.json"),
266 |       JSON.stringify(this.api, null, 2)
267 |     );
268 | 
269 |     // write snippetMap to 'snippetMap.json'
270 |     fs.writeFileSync(
271 |       path.join(this.outputDir, "snippetMap.json"),
272 |       JSON.stringify([...this.snippetMap], null, 2)
273 |     );
274 | 
275 |     // write Codex query times to 'codexQueryTimes.json'
276 |     fs.writeFileSync(
277 |       path.join(this.outputDir, "codexQueryTimes.json"),
278 |       JSON.stringify(this.perf.getCodexQueryTimes(), null, 2)
279 |     );
280 | 
281 |     // print summary statistics
282 |     console.log(
283 |       `${this.getNrPasses()} passed, ${this.getNrFailures()} failed, ${this.getNrPending()} pending, ${this.getNrOther()} other`
284 |     );
285 | 
286 |     // print API coverage
287 |     this.reportAPICoverage();
288 |   }
289 | 
290 |   /**
291 |    * Create directory for output files if it does not exist. If it does exist, delete it and its contents and create a new one.
292 |    */
293 |   private createOutputDir() {
294 |     if (fs.existsSync(this.outputDir)) {
295 |       fs.rmdirSync(this.outputDir, { recursive: true });
296 |     }
297 |     fs.mkdirSync(this.outputDir, { recursive: true });
298 |     fs.mkdirSync(path.join(this.outputDir, "tests"));
299 |     fs.mkdirSync(path.join(this.outputDir, "prompts"));
300 |     fs.mkdirSync(path.join(this.outputDir, "coverageData"));
301 |   }
302 | 
303 |   public recordTestResult(
304 |     test: ITestInfo,
305 |     temperature: number,
306 |     outcome: TestOutcome
307 |   ) {
308 |     super.recordTestResult(test, temperature, outcome);
309 |     console.log(
310 |       `${test.testName} (for ${test.api} at temperature ${temperature}, ${test.prompts[0].usageSnippets.length} snippets available): ${outcome.status}`
311 |     );
312 |   }
313 | }
314 | 


--------------------------------------------------------------------------------
/.github/workflows/run-experiment.yml:
--------------------------------------------------------------------------------
  1 | name: Run TestPilot experiment
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       packages:
  7 |         description: "Packages to generate tests for"
  8 |         default: "+benchmarks.txt"
  9 |       snippetsFrom:
 10 |         description: "Code snippets source"
 11 |         default: "doc"
 12 |       numSnippets:
 13 |         description: 'Maximum number of snippets to include in each prompt, or "all"'
 14 |         default: "all"
 15 |       snippetLength:
 16 |         description: "Maximum length of each snippet in lines"
 17 |         default: "20"
 18 |       temperatures:
 19 |         description: "Sampling temperatures to try when obtaining completions (whitespace-separated)"
 20 |         default: "0.0"
 21 |       numCompletions:
 22 |         description: "Number of completions to generate for each prompt"
 23 |         default: "5"
 24 |       model:
 25 |         description: "Which LLM API to use"
 26 |         type: "choice"
 27 |         options:
 28 |           - "gpt"
 29 |           - "starcoder"
 30 |         default: "gpt"
 31 |       compareTo:
 32 |         description: "Run number of previous run to compare to (leave empty to skip comparison)"
 33 |         default: ""
 34 |       skipSlowBenchmarks:
 35 |         description: "Skip slow benchmarks"
 36 |         type: boolean
 37 |         default: false
 38 |       debug_enabled:
 39 |         type: boolean
 40 |         description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
 41 |         default: false
 42 |   # Run every weekday at 2:00 AM UTC
 43 |   # schedule:
 44 |   #   - cron: '0 2 * * 1-5'
 45 | 
 46 | jobs:
 47 |   setup:
 48 |     runs-on: ubuntu-latest
 49 |     outputs:
 50 |       packages: "${{ steps.parse_packages.outputs.packages }}"
 51 |       snippetsFrom: "${{ github.event.inputs.snippetsFrom || 'doc' }}"
 52 |       snippetLength: "${{ github.event.inputs.snippetLength || '20' }}"
 53 |       temperatures: "${{ github.event.inputs.temperatures || '0.0' }}"
 54 |       numSnippets: "${{ github.event.inputs.numSnippets || 'all' }}"
 55 |       numCompletions: "${{ github.event.inputs.numCompletions || '5' }}"
 56 |       model: "${{ github.event.inputs.model || 'gpt' }}"
 57 |     steps:
 58 |       - uses: actions/checkout@v3
 59 | 
 60 |       - uses: actions/setup-node@v3
 61 |         with:
 62 |           node-version: 12
 63 | 
 64 |       - id: parse_packages
 65 |         run: |
 66 |           packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \
 67 |             ${{ github.event.inputs.skipSlowBenchmarks == 'true' && '--skip-slow-benchmarks' || '' }} \
 68 |             "${{ github.event.inputs.packages || '+benchmarks.txt' }}")
 69 |           echo "packages=$packages" >> $GITHUB_OUTPUT
 70 | 
 71 |   benchmark:
 72 |     needs:
 73 |       - setup
 74 |     runs-on: ubuntu-latest
 75 |     continue-on-error: true
 76 |     strategy:
 77 |       fail-fast: false
 78 |       matrix:
 79 |         package: ${{ fromJson(needs.setup.outputs.packages) }}
 80 |     steps:
 81 |       - uses: actions/checkout@v3
 82 |         with:
 83 |           path: testpilot
 84 | 
 85 |       - name: Check out CodeQL repo
 86 |         uses: actions/checkout@v3
 87 |         with:
 88 |           repository: github/codeql
 89 |           ref: codeql-cli/v2.10.0
 90 |           path: codeql-repo
 91 | 
 92 |       - name: Install CodeQL 2.10.0
 93 |         run: |
 94 |           wget -q https://github.com/github/codeql-cli-binaries/releases/download/v2.10.0/codeql-linux64.zip
 95 |           unzip codeql-linux64.zip
 96 |           echo "$GITHUB_WORKSPACE/codeql" >> $GITHUB_PATH
 97 |         env:
 98 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 99 | 
100 |       - name: Set up Node.js
101 |         uses: actions/setup-node@v3
102 |         with:
103 |           node-version: 12
104 | 
105 |       - name: Set up TestPilot
106 |         run: |
107 |           cd testpilot
108 |           npm run build
109 | 
110 |       - name: Checkout github package repo
111 |         if: ${{ matrix.package.host == 'github.com' }}
112 |         uses: actions/checkout@v3
113 |         with:
114 |           repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }}
115 |           ref: ${{ matrix.package.sha }}
116 |           path: "source"
117 |       
118 |       - name: Checkout gitlab package repo
119 |         if: ${{ matrix.package.host == 'gitlab.com' }}
120 |         run: |
121 |           git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source
122 |           cd source
123 |           git checkout ${{ matrix.package.sha }}
124 | 
125 |       - name: Determine package name
126 |         id: pkg-name
127 |         run: |
128 |           # name of the package
129 |           TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name )
130 | 
131 |           # some packages have a / in their names (looking at you, gitlab-js!)
132 |           if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then
133 |             TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/}
134 |           fi
135 | 
136 |           # path to the package within the repo checkout
137 |           TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}"
138 |           # make sure there isn't already a directory with the same name
139 |           if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then
140 |             echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists"
141 |             exit 1
142 |           fi
143 |           # rename checkout, since some packages examine its name (looking at you, bluebird!)
144 |           mv source $TESTPILOT_PACKAGE_NAME
145 |           echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH"
146 |           # export environment variables
147 |           echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV
148 |           echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV
149 |           echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT
150 | 
151 |       - name: Install package, its dependencies, and test packages
152 |         run: |
153 |           cd $TESTPILOT_PACKAGE_PATH
154 |           npm i || npm i --legacy-peer-deps
155 |           # if matrix.package.dependencies is not empty, install them
156 |           if ! [ -z "${{ matrix.package.dependencies }}" ]; then
157 |             npm i ${{ matrix.package.dependencies }}
158 |           fi
159 |           npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack'
160 |           npm i --no-save mocha
161 | 
162 |       - name: Create CodeQL database
163 |         if: ${{ needs.setup.outputs.snippetsFrom == 'code' || needs.setup.outputs.snippetsFrom == 'both' }}
164 |         run: |
165 |           codeql database create --language=javascript "--source-root=$TESTPILOT_PACKAGE_PATH" -- ./db
166 | 
167 |       - name: Generate tests
168 |         env:
169 |           TESTPILOT_LLM_API_ENDPOINT: "https://model-6.openai.azure.com/openai/deployments/turbo/completions?api-version=2022-12-01"
170 |           TESTPILOT_LLM_AUTH_HEADERS: '{ "api-key": "${{ secrets.GPT35_API_KEY }}" }'
171 |           STARCODER_API_ENDPOINT: '${{ secrets.STARCODER_API_ENDPOINT3 }}'
172 |         run: |
173 |           cd testpilot
174 |           outputdir="results/$TESTPILOT_PACKAGE_NAME"
175 |           mkdir -p $outputdir 
176 |           echo "Computing package statistics"
177 |           node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json
178 |           echo "Generating tests for $TESTPILOT_PACKAGE_NAME"
179 |           node --max-old-space-size=6144 benchmark/run.js \
180 |             --outputDir $outputdir \
181 |             --database ../db \
182 |             --package "$TESTPILOT_PACKAGE_PATH" \
183 |             --snippets ${{ needs.setup.outputs.snippetsFrom }} \
184 |             --numSnippets ${{ needs.setup.outputs.numSnippets }} \
185 |             --snippetLength ${{ needs.setup.outputs.snippetLength }} \
186 |             --temperatures "${{ needs.setup.outputs.temperatures }}" \
187 |             --numCompletions ${{ needs.setup.outputs.numCompletions }} \
188 |             --model ${{ needs.setup.outputs.model }}
189 |           mv stats.json $outputdir
190 | 
191 |       - name: Calculate edit distance of generated tests
192 |         run: |
193 |           cd testpilot
194 |           outputdir="results/$TESTPILOT_PACKAGE_NAME"
195 |           node benchmark/editDistance.js --generatedTestsDir  $outputdir --existingTestsDir $TESTPILOT_PACKAGE_PATH --pkgName $TESTPILOT_PACKAGE_NAME
196 |           mv similarityReport.json $outputdir
197 | 
198 |       - name: Add non-trivial coverage data
199 |         run: |
200 |           cd testpilot
201 |           ./.github/non_trivial_coverage.sh "results/$TESTPILOT_PACKAGE_NAME"
202 | 
203 |       - name: Zip up results
204 |         run: |
205 |           cd testpilot
206 |           zip -r results.zip results
207 | 
208 |       - name: Upload artifacts
209 |         uses: actions/upload-artifact@v3
210 |         with:
211 |           name: results-${{ steps.pkg-name.outputs.pkgName }}
212 |           path: "testpilot/results.zip"
213 | 
214 |       - name: Setup tmate session
215 |         uses: mxschmitt/action-tmate@v3
216 |         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
217 | 
218 |   combine_output:
219 |     name: Combine output from all benchmarks
220 |     needs:
221 |       - setup
222 |       - benchmark
223 |     runs-on: ubuntu-latest
224 |     steps:
225 |       - name: Download output zips
226 |         uses: actions/download-artifact@v4.1.7
227 | 
228 |       - name: Setup tmate session
229 |         uses: mxschmitt/action-tmate@v3
230 |         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
231 | 
232 |       - name: Combine output zips
233 |         run: |
234 |           mkdir results
235 |           for zip in results-*/results.zip
236 |           do
237 |             unzip -oq $zip
238 |           done
239 |           zip -r results.zip results
240 |       - name: Upload combined output files
241 |         uses: actions/upload-artifact@v2
242 |         with:
243 |           name: results-all
244 |           path: results.zip
245 | 
246 |   generate-report:
247 |     needs:
248 |       - setup
249 |       - benchmark
250 |       - combine_output
251 |     runs-on: ubuntu-latest
252 |     steps:
253 |       - uses: actions/checkout@v3
254 | 
255 |       - name: Set up Node.js
256 |         uses: actions/setup-node@v3
257 |         with:
258 |           node-version: 12
259 | 
260 |       - name: Set up TestPilot
261 |         run: |
262 |           npm run build
263 | 
264 |       - name: Download artifacts for this run
265 |         uses: actions/download-artifact@v4.1.7
266 |         with:
267 |           name: results-all
268 |           path: results
269 | 
270 |       - name: Download artifacts for comparison run
271 |         if: ${{ github.event.inputs.compareTo != '' }}
272 |         uses: dawidd6/action-download-artifact@v2
273 |         with:
274 |           run_number: ${{ github.event.inputs.compareTo }}
275 |           name: results-all
276 |           path: baseline
277 | 
278 |       - name: Setup tmate session
279 |         uses: mxschmitt/action-tmate@v3
280 |         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
281 | 
282 |       - name: Generate report
283 |         run: |
284 |           cd results
285 |           unzip results.zip
286 |           cd ..
287 | 
288 |           echo '${{ toJson(needs.setup.outputs) }}' > config.json
289 |           if [ -d baseline ]; then
290 |             cd baseline
291 |             unzip results.zip
292 |             cd .. 
293 |             baseline_artifact=baseline/results
294 |           else
295 |             baseline_artifact=''
296 |           fi
297 |           node ${GITHUB_WORKSPACE}/benchmark/generate_report.js config.json results/results $baseline_artifact > $GITHUB_STEP_SUMMARY
298 | 


--------------------------------------------------------------------------------
/ql/queries/AssertionQuality.qll:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Classes and predicates for working with TestPilot-generated reports.
  3 |  */
  4 | 
  5 | import javascript
  6 | 
  7 | /**
  8 |  * A report.json file, representing all data collected for a particular
  9 |  * benchmark.
 10 |  */
 11 | class ReportJson extends JsonObject {
 12 |   ReportJson() {
 13 |     this.isTopLevel() and
 14 |     this.getFile().getBaseName() = "report.json"
 15 |   }
 16 | 
 17 |   /** Gets the `tests/` folder next to this file. */
 18 |   Folder getTestFolder() { result = this.getFile().getParentContainer().getFolder("tests") }
 19 | 
 20 |   GeneratedTest getTest(string name) {
 21 |     result.getReport() = this and
 22 |     result.getBaseName() = name
 23 |   }
 24 | 
 25 |   GeneratedTest getTestById(int id) {
 26 |     exists(string strid |
 27 |       result = this.getTest("test_" + strid + ".js") and
 28 |       id = strid.toInt()
 29 |     )
 30 |   }
 31 | 
 32 |   /** Gets the metadata in this report. */
 33 |   JsonObject getMetadata() { result = this.getPropValue("metaData") }
 34 | 
 35 |   /** Gets the package name for this benchmark run. */
 36 |   string getPackageName() { result = this.getMetadata().getPropStringValue("packageName") }
 37 | 
 38 |   /** Gets a prompt in this report. */
 39 |   Prompt getAPrompt() { result.getReport() = this }
 40 | 
 41 |   /** Gets a test in this report. */
 42 |   GeneratedTest getATest() { result.getReport() = this }
 43 | 
 44 |   /** Gets a non-trivial test in this report. */
 45 |   GeneratedTest getANonTrivialTest() {
 46 |     result.getReport() = this and
 47 |     result.isNonTrivial()
 48 |   }
 49 | 
 50 |   /** Gets the total number of statements in the project covered by this report. */
 51 |   int getNumberOfStatements() {
 52 |     result =
 53 |       getPropValue("coverage")
 54 |           .getPropValue("total")
 55 |           .getPropValue("statements")
 56 |           .getPropValue("total")
 57 |           .getIntValue()
 58 |   }
 59 | 
 60 |   /** Gets the total number of passing tests in this report. */
 61 |   int getNumberOfTests() { result = count(GeneratedTest test | test.getReport() = this) }
 62 | 
 63 |   /** Holds if test `testName` in this report covers statement `stmtId`. */
 64 |   predicate testCoversStmt(string testName, string stmtId) {
 65 |     exists(JsonObject test |
 66 |       test = this.getPropValue("tests").getElementValue(_) and
 67 |       test.getPropStringValue("testName") = testName and
 68 |       stmtId = test.getPropValue("coveredStatements").getElementValue(_).getStringValue()
 69 |     )
 70 |   }
 71 | 
 72 |   /** Gets the status of test `testName`. */
 73 |   string getTestStatus(string testName) {
 74 |     exists(JsonObject test |
 75 |       test = this.getPropValue("tests").getElementValue(_) and
 76 |       test.getPropStringValue("testName") = testName and
 77 |       result = test.getPropStringValue("status")
 78 |     )
 79 |   }
 80 | 
 81 |   /** Gets the error message of test `testName`, if any. */
 82 |   string getTestErrMsg(string testName) {
 83 |     exists(JsonObject test |
 84 |       test = this.getPropValue("tests").getElementValue(_) and
 85 |       test.getPropStringValue("testName") = testName and
 86 |       result = test.getPropValue("err").getPropValue("message").getStringValue()
 87 |     )
 88 |   }
 89 | 
 90 |   /** Gets the error stack trace of test `testName`, if any. */
 91 |   string getTestErrStack(string testName) {
 92 |     exists(JsonObject test |
 93 |       test = this.getPropValue("tests").getElementValue(_) and
 94 |       test.getPropStringValue("testName") = testName and
 95 |       result = test.getPropValue("err").getPropValue("stack").getStringValue()
 96 |     )
 97 |   }
 98 | 
 99 |   /** Gets the error code of test `testName`, if any. */
100 |   string getTestErrCode(string testName) {
101 |     exists(JsonObject test |
102 |       test = this.getPropValue("tests").getElementValue(_) and
103 |       test.getPropStringValue("testName") = testName and
104 |       result = test.getPropValue("err").getPropValue("code").getStringValue()
105 |     )
106 |   }
107 | 
108 |   override string toString() { result = getPackageName() }
109 | }
110 | 
111 | /** A TestPilot-generated test stored in the report. */
112 | class GeneratedTest extends File {
113 |   ReportJson report;
114 | 
115 |   GeneratedTest() { this.getParentContainer() = report.getTestFolder() }
116 | 
117 |   /** Gets the report to which this test belongs. */
118 |   ReportJson getReport() { result = report }
119 | 
120 |   /** Gets the name of the package for which this test was generated. */
121 |   string getPackageName() { result = report.getPackageName() }
122 | 
123 |   /**
124 |    * Holds if this test is non-trivial, i.e., it contains an assertion
125 |    * that semantically depends on the package under test.
126 |    */
127 |   predicate isNonTrivial() {
128 |     exists(AssertionInGeneratedTest a | a.getFile() = this and a.isNonTrivial())
129 |   }
130 | 
131 |   /** Holds if this test covers the given statement. */
132 |   predicate coversStmt(string stmtId) { report.testCoversStmt(this.getBaseName(), stmtId) }
133 | 
134 |   string getStatus() { result = report.getTestStatus(this.getBaseName()) }
135 | 
136 |   /** Holds if this test passes. */
137 |   predicate passes() { this.getStatus() = "PASSED" }
138 | 
139 |   /** Holds if this test fails. */
140 |   predicate fails() { this.getStatus() = "FAILED" }
141 | 
142 |   /** Holds if this test fails with the given error message. */
143 |   predicate failsWith(string msg) {
144 |     this.fails() and msg = report.getTestErrMsg(this.getBaseName())
145 |   }
146 | 
147 |   private predicate failsDueToInternal(ErrorCategory errorCategory) {
148 |     errorCategory = "AssertionError" and
149 |     report.getTestErrStack(this.getBaseName()).matches("%AssertionError%")
150 |     or
151 |     errorCategory = "FileSystemError" and
152 |     report.getTestErrCode(this.getBaseName()) in [
153 |         "EEXIST", "EISDIR", "ENOENT", "ENOTEMPTY", "EACCES"
154 |       ]
155 |     or
156 |     errorCategory = "CorrectnessError" and
157 |     report
158 |         .getTestErrStack(this.getBaseName())
159 |         .matches([
160 |             "%ReferenceError%", "%TypeError%", "%done() invoked with non-Error%",
161 |             "%Maximum call stack size exceeded%",
162 |           ])
163 |     or
164 |     errorCategory = "CorrectnessError" and
165 |     report.getTestErrMsg(this.getBaseName()).matches("%Invalid syntax%")
166 |     or
167 |     errorCategory = "TimeoutError" and
168 |     report.getTestErrCode(this.getBaseName()) = "ERR_MOCHA_TIMEOUT"
169 |   }
170 | 
171 |   predicate failsDueTo(ErrorCategory errorCategory) {
172 |     this.failsDueToInternal(errorCategory)
173 |     or
174 |     this.fails() and
175 |     not this.failsDueToInternal(_) and
176 |     errorCategory = "OtherError"
177 |   }
178 | }
179 | 
180 | /**
181 |  * An assertion in a TestPilot-generated test.
182 |  */
183 | class AssertionInGeneratedTest extends DataFlow::Node {
184 |   GeneratedTest test;
185 | 
186 |   AssertionInGeneratedTest() {
187 |     this = API::moduleImport("assert").getASuccessor*().getACall() and
188 |     test = this.getFile()
189 |   }
190 | 
191 |   /**
192 |    * Gets a node in the (intra-procedural) backwards slice of this assertions.
193 |    */
194 |   DataFlow::Node getANodeInBackwardsSlice() {
195 |     result = this
196 |     or
197 |     // follow data flow
198 |     DataFlow::localFlowStep(result, this.getANodeInBackwardsSlice())
199 |     or
200 |     // follow taint flow
201 |     TaintTracking::sharedTaintStep(result, this.getANodeInBackwardsSlice())
202 |     or
203 |     // follow syntactic nesting: if an expression is in the backwards slice,
204 |     // then so are all its subexpressions
205 |     result.asExpr().getParent+() = this.getANodeInBackwardsSlice().asExpr()
206 |     or
207 |     // heuristic to approximate flow through callbacks: for `foo(bar, cb)` we
208 |     // add both `foo` and `bar` to the backwards slice of any node in the callback
209 |     // function `cb` to approximate inter-procedural data and control dependencies
210 |     exists(DataFlow::InvokeNode call |
211 |       call.getABoundCallbackParameter(_, _) = this.getANodeInBackwardsSlice()
212 |       or
213 |       exists(Function cb | cb = call.getAnArgument().getAFunctionValue().getFunction() |
214 |         cb = this.getANodeInBackwardsSlice().getContainer()
215 |       )
216 |     |
217 |       result = call.getAnArgument() or
218 |       result = call.getCalleeNode()
219 |     )
220 |     or
221 |     // heuristic to approximate side effects: for `foo(bar)` we assume that
222 |     // `foo` may update any property of `bar`, and so we include `foo` in the
223 |     // backwards slice of any other uses of `bar`
224 |     exists(DataFlow::InvokeNode call, DataFlow::SsaDefinitionNode v |
225 |       call.getAnArgument().getAPredecessor() = v and
226 |       v = this.getANodeInBackwardsSlice() and
227 |       result = call.getCalleeNode()
228 |     )
229 |   }
230 | 
231 |   /**
232 |    * Holds if this assertion is non-trivial, i.e., it semantically depends on
233 |    * the package under test.
234 |    */
235 |   predicate isNonTrivial() {
236 |     exists(Require req | req = this.getANodeInBackwardsSlice().asExpr() |
237 |       req.getImportedPath().getValue() = test.getPackageName()
238 |     )
239 |   }
240 | }
241 | 
242 | class PromptJson extends JsonObject {
243 |   ReportJson report;
244 | 
245 |   PromptJson() {
246 |     this.isTopLevel() and
247 |     this.getFile().getBaseName() = "prompts.json" and
248 |     this.getFile().getParentContainer() = report.getFile().getParentContainer()
249 |   }
250 | 
251 |   /** Gets the report to which this prompt belongs. */
252 |   ReportJson getReport() { result = report }
253 | }
254 | 
255 | class Prompt extends JsonObject {
256 |   PromptJson prompts;
257 | 
258 |   Prompt() { this = prompts.getPropValue("prompts").(JsonArray).getElementValue(_) }
259 | 
260 |   ReportJson getReport() { result = prompts.getReport() }
261 | 
262 |   GeneratedTest getATest(boolean passes, boolean nontrivial) {
263 |     exists(string testName |
264 |       testName = this.getPropValue("tests").(JsonArray).getElementStringValue(_) and
265 |       result = getReport().getTest(testName)
266 |     ) and
267 |     (if result.passes() then passes = true else passes = false) and
268 |     (if result.isNonTrivial() then nontrivial = true else nontrivial = false)
269 |   }
270 | 
271 |   int getId() { result = this.getPropValue("id").getIntValue() }
272 | 
273 |   private JsonObject getProvenanceInfo() {
274 |     result = this.getPropValue("provenance") or
275 |     result = this.getPropValue("provenance").(JsonArray).getElementValue(_)
276 |   }
277 | 
278 |   predicate isRefinedFrom(Prompt originalPrompt, GeneratedTest test, string refiner) {
279 |     exists(JsonObject provenance | provenance = getProvenanceInfo() |
280 |       refiner = provenance.getPropStringValue("refiner") and
281 |       test = this.getReport().getTestById(provenance.getPropValue("test").getIntValue()) and
282 |       originalPrompt.getId() = provenance.getPropValue("originalPrompt").getIntValue() and
283 |       originalPrompt.getReport() = this.getReport()
284 |     )
285 |   }
286 | 
287 |   predicate isRefinedFrom(Prompt originalPrompt, string refiner) {
288 |     this.isRefinedFrom(originalPrompt, _, refiner)
289 |   }
290 | 
291 |   string getAProvenance() {
292 |     not this.isRefinedFrom(_, _) and
293 |     result = ""
294 |     or
295 |     exists(Prompt originalPrompt, string refiner | this.isRefinedFrom(originalPrompt, refiner) |
296 |       result = originalPrompt.getAProvenance() + "," + refiner
297 |     )
298 |   }
299 | 
300 |   /** Holds if this prompt can be generated without the given refiner. */
301 |   predicate doesNotNeed(Refiner refiner) {
302 |     exists(string provenance | provenance = this.getAProvenance() |
303 |       not provenance.regexpMatch(".*\\b\\Q" + refiner + "\\E\\b.*")
304 |     )
305 |   }
306 | 
307 |   override string toString() { result = prompts.getReport() + ":prompt" + this.getId() }
308 | }
309 | 
310 | class Refiner extends string {
311 |   Refiner() { any(Prompt p).isRefinedFrom(_, this) }
312 | }
313 | 
314 | /** A symbolic representation of a cause for test failure. */
315 | class ErrorCategory extends string {
316 |   ErrorCategory() {
317 |     this = "AssertionError" or
318 |     this = "FileSystemError" or
319 |     this = "CorrectnessError" or
320 |     this = "TimeoutError" or
321 |     this = "OtherError"
322 |   }
323 | }
324 | 


--------------------------------------------------------------------------------