├── ql ├── .gitignore ├── tests │ └── SnippetMining │ │ ├── TestSnippetMining.expected │ │ └── TestSnippetMining.ql ├── qlpack.yml ├── codeql-pack.lock.yml ├── queries │ ├── TrivialTest.ql │ ├── MissingDone.ql │ ├── TestFixedByRetry.ql │ ├── FailureClassification.ql │ ├── UnhelpfulRefinement.ql │ ├── RefinerContributions.ql │ └── AssertionQuality.qll └── README.md ├── benchmark ├── .gitignore ├── tsconfig.json ├── testCollectorHelper.ts ├── package_stats.ts ├── generate_diversity_report.ts ├── performanceMeasurer.ts ├── editDistance.ts ├── generate_report.ts ├── run.ts └── testResultCollector.ts ├── .gitignore ├── .npmignore ├── test ├── tsconfig.json ├── input │ ├── js-fencing-1.md │ ├── non-lang-fencing.md │ ├── ts-fencing-1.md │ └── coffee-fencing.md ├── mockModel.ts ├── editDistance.ts ├── exploreAPIs.ts ├── APIFunction.ts ├── test-generation.ts ├── syntax.ts └── docSnippets.ts ├── src ├── tsconfig.json ├── completionModel.ts ├── testValidator.ts ├── index.ts ├── extensionPoints.ts ├── coverage.ts ├── mockModel.ts ├── syntax.ts ├── testResultCollector.ts ├── report.ts ├── generateTests.ts ├── snippetHelper.ts ├── codex.ts ├── mineSnippets.ts ├── mochaValidator.ts └── promptCrafting.ts ├── CODEOWNERS ├── typings └── espree.d.ts ├── tsconfig-base.json ├── .github ├── workflows │ ├── ci.yml │ ├── release.yml │ ├── measure-coverage.yml │ └── run-experiment.yml ├── parse_packages.js ├── benchmarks.txt └── non_trivial_coverage.sh ├── SUPPORT.md ├── LICENSE ├── SECURITY.md ├── package.json ├── CONTRIBUTING.md ├── examples ├── testGenerationScript.ts └── momentjs_test_generation.md ├── CODE_OF_CONDUCT.md └── README.md /ql/.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | -------------------------------------------------------------------------------- /benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | *.js.map 3 | -------------------------------------------------------------------------------- /ql/tests/SnippetMining/TestSnippetMining.expected: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | *.tgz 4 | .DS_Store -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .github/ 2 | benchmark/ 3 | ql/ 4 | test/ 5 | -------------------------------------------------------------------------------- /benchmark/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig-base.json", 3 | "include": ["*.ts"] 4 | } 5 | -------------------------------------------------------------------------------- /test/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig-base.json", 3 | "include": ["../typings/*.d.ts", "./**/*.ts"] 4 | } 5 | -------------------------------------------------------------------------------- /ql/qlpack.yml: -------------------------------------------------------------------------------- 1 | name: testpilot-experiments 2 | version: 0.1.0 3 | dependencies: 4 | codeql/javascript-all: "*" 5 | extractor: javascript 6 | -------------------------------------------------------------------------------- /test/input/js-fencing-1.md: -------------------------------------------------------------------------------- 1 | ```js 2 | const vol = Volume.fromJSON({ 3 | "/app/index.js": "...", 4 | "/app/package.json": "...", 5 | }); 6 | ``` 7 | -------------------------------------------------------------------------------- /test/input/non-lang-fencing.md: -------------------------------------------------------------------------------- 1 | ``` 2 | const vol = Volume.fromJSON({ 3 | '/app/index.js': '...', 4 | '/app/package.json': '...', 5 | }); 6 | ``` 7 | -------------------------------------------------------------------------------- /test/input/ts-fencing-1.md: -------------------------------------------------------------------------------- 1 | ```ts 2 | const vol = Volume.fromJSON({ 3 | "/app/index.js": "...", 4 | "/app/package.json": "...", 5 | }); 6 | ``` 7 | -------------------------------------------------------------------------------- /test/input/coffee-fencing.md: -------------------------------------------------------------------------------- 1 | ```coffee 2 | const vol = Volume.fromJSON({ 3 | '/app/index.js': '...', 4 | '/app/package.json': '...', 5 | }); 6 | ``` 7 | -------------------------------------------------------------------------------- /src/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig-base.json", 3 | "compilerOptions": { 4 | "outDir": "../dist", 5 | "declaration": true 6 | }, 7 | "include": ["../typings/*.d.ts", "./**/*.ts"] 8 | } 9 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # For more information, see [docs](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#codeowners-syntax) 2 | 3 | # This repository is maintained by: 4 | * @max-schaefer @snadi @franktip 5 | -------------------------------------------------------------------------------- /typings/espree.d.ts: -------------------------------------------------------------------------------- 1 | // a minimal type definition file covering only what we need 2 | declare module "espree" { 3 | export interface Options { 4 | ecmaVersion?: number | "latest"; 5 | loc?: boolean; 6 | comment?: boolean; 7 | } 8 | export function parse(code: string, options?: Options): any; 9 | } 10 | -------------------------------------------------------------------------------- /ql/codeql-pack.lock.yml: -------------------------------------------------------------------------------- 1 | --- 2 | lockVersion: 1.0.0 3 | dependencies: 4 | codeql/javascript-all: 5 | version: 0.6.4 6 | codeql/regex: 7 | version: 0.0.15 8 | codeql/tutorial: 9 | version: 0.0.12 10 | codeql/util: 11 | version: 0.0.12 12 | codeql/yaml: 13 | version: 0.0.4 14 | compiled: false 15 | -------------------------------------------------------------------------------- /tsconfig-base.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2018", 4 | "module": "NodeNext", 5 | "sourceMap": true, 6 | "experimentalDecorators": true, 7 | "skipLibCheck": true, 8 | "strict": true, 9 | "lib": [ 10 | "es2018" 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /src/completionModel.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * An abstract representation of a model such as Codex that can provide 3 | * completions for a prompt. 4 | */ 5 | export interface ICompletionModel { 6 | /** 7 | * Get a set of completions for the given prompt with the given sampling temperature. 8 | */ 9 | completions(prompt: string, temperature: number): Promise>; 10 | } 11 | -------------------------------------------------------------------------------- /ql/queries/TrivialTest.ql: -------------------------------------------------------------------------------- 1 | /** 2 | * @name Trivial test 3 | * @description Highlight tests that do not contain non-trivial assertions. 4 | */ 5 | 6 | import AssertionQuality 7 | 8 | from GeneratedTest t, string reason 9 | where 10 | not exists(AssertionInGeneratedTest a | a.getFile() = t and a.isNonTrivial()) and 11 | ( 12 | if exists(AssertionInGeneratedTest a | a.getFile() = t) 13 | then reason = "only trivial assertions" 14 | else reason = "no assertions" 15 | ) 16 | select t, "Test contains " + reason + "." 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | 7 | workflow_dispatch: 8 | 9 | jobs: 10 | run-tests: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Check out this repo 15 | uses: actions/checkout@v3 16 | 17 | - name: Set up Node.js 18 | uses: actions/setup-node@v3 19 | 20 | - name: Check formatting and run tests 21 | run: | 22 | npm run build 23 | npm run autoformat:check 24 | npm run test 25 | -------------------------------------------------------------------------------- /src/testValidator.ts: -------------------------------------------------------------------------------- 1 | import { ICoverageSummary } from "./coverage"; 2 | import { TestOutcome } from "./report"; 3 | 4 | export abstract class TestValidator { 5 | /** Validate the given test, determining whether it passes or not. */ 6 | public abstract validateTest( 7 | testName: string, 8 | testSource: string 9 | ): TestOutcome; 10 | 11 | /** Compute a coverage summary for all passing tests this validator has seen. */ 12 | public abstract computeCoverageSummary(): ICoverageSummary; 13 | 14 | /** Clean up any temporary data this validator has accumulated. */ 15 | public cleanup(): void {} 16 | } 17 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub issues to track bugs and feature requests. Please search the existing issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new issue. 6 | 7 | For help or questions about using this project, please file an issue. 8 | 9 | TestPilot is not actively developed but is maintained by GitHub staff and the community. We will do our best to respond to support and community questions in a timely manner. 10 | 11 | ## GitHub Support Policy 12 | 13 | Support for this project is limited to the resources listed above. 14 | -------------------------------------------------------------------------------- /test/mockModel.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import { MockCompletionModel } from "../src/mockModel"; 3 | 4 | describe("test MockCompletionModel", () => { 5 | it("should be able to add and get completions", async () => { 6 | const model = new MockCompletionModel(true); 7 | model.addCompletions("foo", 0.5, ["bar", "baz"]); 8 | expect(await model.completions("foo", 0.5)).to.deep.equal( 9 | new Set(["bar", "baz"]) 10 | ); 11 | }); 12 | 13 | it("should throw an error if completions are not found", async () => { 14 | const model = new MockCompletionModel(true); 15 | try { 16 | await model.completions("foo", 0.5); 17 | expect.fail(); 18 | } catch (e: any) { 19 | expect(e.message).to.equal("Prompt not found at temperature 0.5: foo"); 20 | } 21 | }); 22 | 23 | it("should not throw an error if completions are not found and strictResponses is false", async () => { 24 | const model = new MockCompletionModel(false); 25 | expect(await model.completions("foo", 0.5)).to.deep.equal(new Set()); 26 | }); 27 | }); 28 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { Codex, PostOptions as CodexPostOptions } from "./codex"; 2 | export { ICompletionModel } from "./completionModel"; 3 | export { emptyCoverageSummary, ICoverageSummary } from "./coverage"; 4 | export { getDocSnippets } from "./docSnippets"; 5 | export { 6 | API, 7 | ApiElementDescriptor, 8 | APIFunction, 9 | exploreAPI, 10 | findDocComments, 11 | FunctionDescriptor, 12 | } from "./exploreAPI"; 13 | export { TestGenerator } from "./generateTests"; 14 | export { getSnippets } from "./mineSnippets"; 15 | export { MochaValidator } from "./mochaValidator"; 16 | export { MockCompletionModel } from "./mockModel"; 17 | export { Prompt, RetryPrompt } from "./promptCrafting"; 18 | export { 19 | IMetaData, 20 | ITestFailureInfo, 21 | ITestInfo, 22 | ITestReport, 23 | ReportForTest, 24 | TestOutcome, 25 | TestStatus, 26 | } from "./report"; 27 | export { trimCompletion } from "./syntax"; 28 | export { 29 | BaseTestResultCollector, 30 | IPromptInfo, 31 | ITestResultCollector, 32 | } from "./testResultCollector"; 33 | export { TestValidator } from "./testValidator"; 34 | -------------------------------------------------------------------------------- /ql/queries/MissingDone.ql: -------------------------------------------------------------------------------- 1 | /** 2 | * @name Test failure due to missing `done` callback 3 | * @description Find tests that fail because they do not call the `done` 4 | * callback. 5 | * @kind problem 6 | */ 7 | 8 | import AssertionQuality 9 | 10 | class TimedOutTest extends GeneratedTest { 11 | TimedOutTest() { this.failsDueTo("TimeoutError") } 12 | 13 | predicate isMissingDone() { 14 | exists(DataFlow::ParameterNode done | 15 | done = DataFlow::globalVarRef("it").getACall().getABoundCallbackParameter(1, 0) and 16 | done.getFile() = this and 17 | not exists(done.getACall()) 18 | ) 19 | } 20 | } 21 | 22 | query predicate stats(ReportJson report, int totalFailed, int totalTimeout, int totalMissingDone) { 23 | totalFailed = count(GeneratedTest t | t = report.getATest() and t.fails()) and 24 | totalTimeout = count(TimedOutTest t | t = report.getATest()) and 25 | totalMissingDone = count(TimedOutTest t | t = report.getATest() and t.isMissingDone()) 26 | } 27 | 28 | from TimedOutTest t 29 | where t.isMissingDone() 30 | select t, "Test failure due to missing call to `done`." 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 GitHub, Inc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ql/queries/TestFixedByRetry.ql: -------------------------------------------------------------------------------- 1 | /** 2 | * @name Test fixed after retrying 3 | * @description Find failing tests that pass after having been refined 4 | * with the `RetryWithError` refiner. 5 | * @kind problem 6 | */ 7 | 8 | import AssertionQuality 9 | 10 | predicate testFixedByRetry( 11 | ReportJson report, Prompt orig, GeneratedTest failing, Prompt refined, GeneratedTest passing 12 | ) { 13 | orig = report.getAPrompt() and 14 | failing = orig.getATest(false, _) and 15 | refined.isRefinedFrom(orig, failing, "RetryWithError") and 16 | passing = refined.getATest(true, _) 17 | } 18 | 19 | query predicate stats( 20 | ReportJson report, ErrorCategory errorCategory, int failed, int fixed 21 | ) { 22 | failed = count(GeneratedTest t | t = report.getATest() and t.failsDueTo(errorCategory)) and 23 | fixed = 24 | count(GeneratedTest t | testFixedByRetry(report, _, t, _, _) and t.failsDueTo(errorCategory)) 25 | } 26 | 27 | from Prompt orig, GeneratedTest failing, Prompt refined, GeneratedTest passing 28 | where testFixedByRetry(_, orig, failing, refined, passing) 29 | select failing, "This test was $@ by retrying.", passing, "fixed" 30 | -------------------------------------------------------------------------------- /ql/README.md: -------------------------------------------------------------------------------- 1 | # Setting up and using CodeQL 2 | 3 | ## Installation 4 | 5 | Install the CodeQL CLI as described in the [documentation](https://docs.github.com/en/code-security/codeql-cli/getting-started-with-the-codeql-cli/setting-up-the-codeql-cli). 6 | 7 | In this directory, run `codeql pack install` to install the CodeQL libraries for JavaScript. 8 | 9 | ## Analyzing the results of a benchmark run 10 | 11 | To analyze the results of a benchmark run, download the artifacts to some directory `$artifact_dir`, and then run the following command to build a database from the results in `$dbdir`: 12 | 13 | ```sh 14 | LGTM_INDEX_FILTERS='include:**/*.json 15 | exclude:**/coverageData/**/*.json' codeql database create --overwrite -l javascript --source-root $artifact_dir -- $dbdir 16 | ``` 17 | 18 | (Note that the environment variable `LGTM_INDEX_FILTERS` has to be set exactly as shown, with a _newline_ in between the `include:` and `exclude:` lines. Otherwise database creation will fail or result in an empty database.) 19 | 20 | If the artifacts contain very large (>10MB) JSON files, those files will be skipped by default. To include them in the database, set the environment variable `LGTM_MAX_FILE_SIZE` to a larger value, such as `100MB`. 21 | 22 | You can use either the CodeQL CLI or the CodeQL extension for VSCode to analyze the resulting database, using the queries in this repository. 23 | -------------------------------------------------------------------------------- /src/extensionPoints.ts: -------------------------------------------------------------------------------- 1 | // function getExtensionPoints(prompt: Prompt, ast: any): Prompt[] { 2 | // let extensionPoints: Prompt[] = []; 3 | // let fixedPart = prompt.prefix + prompt.snippets + prompt.sign; 4 | // estraverse.traverse(ast, { 5 | // enter: function (node: any, parent) { 6 | // if (node.type === 'FunctionDeclaration' || node.type === 'FunctionExpression') { 7 | // let cutoff = node.end - 1; 8 | // if (cutoff >= 0) { 9 | // extensionPoints.push({ 10 | // prefix: prompt.prefix, 11 | // sign: prompt.sign, 12 | // snippets: prompt.snippets, 13 | // code: (fixedPart + prompt.code).slice(fixedPart.length, cutoff), 14 | // suffix: (fixedPart + prompt.code).slice(cutoff) + prompt.suffix, 15 | // id: prompt.id 16 | // }); 17 | // } 18 | // } 19 | // }, 20 | // leave: function (node, parent) { 21 | // //nothing for now 22 | // } 23 | // }); 24 | // // The first element in extensionPoints is for the describe function, 25 | // // as it is the topmost function definition in the AST. 26 | // // So we remove it to not add multiple tests. 27 | // return extensionPoints.slice(1); 28 | // } 29 | -------------------------------------------------------------------------------- /ql/queries/FailureClassification.ql: -------------------------------------------------------------------------------- 1 | /** 2 | * @name Test failure classification 3 | * @description Classify the cause of test failures. 4 | * @kind problem 5 | */ 6 | 7 | import AssertionQuality 8 | 9 | /** Classify reasons for test failure. */ 10 | predicate testFailsDueTo(ReportJson report, GeneratedTest failing, ErrorCategory errorCategory) { 11 | failing = report.getATest() and 12 | failing.failsDueTo(errorCategory) 13 | } 14 | 15 | /** Compute statistics about reasons for test failure. */ 16 | query predicate stats(ReportJson report, ErrorCategory category, int numFailed) { 17 | numFailed = count(GeneratedTest t | testFailsDueTo(report, t, category)) 18 | } 19 | 20 | /** 21 | * Consistency check: a test should be assigned a single error category iff it 22 | * fails. 23 | * 24 | * This predicate should be empty. 25 | */ 26 | query predicate check(GeneratedTest t, string problem) { 27 | t.fails() and 28 | exists(int n | n = count(ErrorCategory err | t.failsDueTo(err)) | 29 | n != 1 and 30 | problem = "Test fails, but is assigned " + n + " error categories instead of one." 31 | ) 32 | or 33 | not t.fails() and 34 | exists(ErrorCategory err | t.failsDueTo(err) | 35 | problem = "Test does not fail, but is assigned error category " + err.toString() + "." 36 | ) 37 | } 38 | 39 | from GeneratedTest failing, ErrorCategory errorCategory 40 | where testFailsDueTo(_, failing, errorCategory) 41 | select failing, "This test fails due to " + errorCategory + "." 42 | -------------------------------------------------------------------------------- /ql/tests/SnippetMining/TestSnippetMining.ql: -------------------------------------------------------------------------------- 1 | import javascript 2 | import queries.SnippetMining 3 | import queries.NameBasedCallGraph 4 | 5 | /** For this test, we want to mine calls to functions named `target`. */ 6 | class FunctionToMine extends TargetFunction { 7 | FunctionToMine() { this = "target" } 8 | } 9 | 10 | /** 11 | * Looks for a comment of the form `// call #n` in the same file (`path`) 12 | * and on the same `line` as `invk`, and gets the identifier `#n`. 13 | */ 14 | string getId(InvokeExpr invk, string path, int line) { 15 | exists(Comment c | 16 | invk.getLocation().hasLocationInfo(path, _, _, line, _) and 17 | c.getLocation().hasLocationInfo(path, line, _, _, _) and 18 | result = c.getText().regexpFind("(?<=call )#\\d+", _, _) 19 | ) 20 | } 21 | 22 | /** 23 | * Hold if there is a comment `// relevant to call #n` on the given `line` 24 | * in the file with the given `path`, and the method call `invk` has identifier 25 | * `#n`. 26 | */ 27 | predicate expectedRelevantLine(InvokeExpr invk, string path, int line) { 28 | exists(getId(invk, path, line)) 29 | or 30 | exists(Comment c | 31 | c.getLocation().hasLocationInfo(path, line, _, _, _) and 32 | c.getText().regexpMatch(".*relevant to call .*" + getId(invk, _, _) + ".*") 33 | ) 34 | } 35 | 36 | from InvokeExpr invk, string path, int line, string msg 37 | where 38 | relevantLine(invk, path, line) and 39 | not expectedRelevantLine(invk, path, line) and 40 | msg = "unexpected relevant line" 41 | or 42 | not relevantLine(invk, path, line) and 43 | expectedRelevantLine(invk, path, line) and 44 | msg = "missing relevant line" 45 | select invk, msg, path, line 46 | -------------------------------------------------------------------------------- /src/coverage.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Represents the coverage information associated with each "category" of 3 | * coverage (e.g., "statements", "branches", "functions", "lines") 4 | */ 5 | interface ICoverageCategoryStats { 6 | total: number; 7 | covered: number; 8 | skipped: number; 9 | pct: number; 10 | nonTrivialPct?: number; 11 | } 12 | 13 | /** 14 | * Represents the coverage information associated with a generated test suite, 15 | * consisting of the coverage information for each "category" of coverage 16 | * (e.g., "statements", "branches", "functions", "lines") 17 | */ 18 | export interface ICoverageStats { 19 | lines: ICoverageCategoryStats; 20 | statements: ICoverageCategoryStats; 21 | functions: ICoverageCategoryStats; 22 | branches: ICoverageCategoryStats; 23 | branchesTrue: ICoverageCategoryStats; 24 | } 25 | 26 | /** 27 | * Represents a summary of the coverage information associated with a generated test suite, 28 | * consisting of both the total coverage information, and similar information on a per-file basis 29 | */ 30 | export interface ICoverageSummary { 31 | total: ICoverageStats; 32 | [file: string]: ICoverageStats; 33 | } 34 | 35 | /** 36 | * An empty coverage summary object 37 | */ 38 | export function emptyCoverageSummary(): ICoverageSummary { 39 | return { 40 | total: { 41 | lines: { total: 0, covered: 0, skipped: 0, pct: 0 }, 42 | statements: { total: 0, covered: 0, skipped: 0, pct: 0 }, 43 | functions: { total: 0, covered: 0, skipped: 0, pct: 0 }, 44 | branches: { total: 0, covered: 0, skipped: 0, pct: 0 }, 45 | branchesTrue: { total: 0, covered: 0, skipped: 0, pct: 0 }, 46 | }, 47 | }; 48 | } 49 | -------------------------------------------------------------------------------- /src/mockModel.ts: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import { ICompletionModel } from "./completionModel"; 3 | import { readFileSync } from "fs"; 4 | 5 | export class MockCompletionModel implements ICompletionModel { 6 | private completionMap: Map = new Map(); 7 | 8 | constructor(private strictResponses: boolean) {} 9 | 10 | static fromFile(file: string, strictResponses: boolean) { 11 | const data = JSON.parse(readFileSync(file, "utf8")); 12 | console.log("Loading completions from file"); 13 | const model = new MockCompletionModel(strictResponses); 14 | for (const { file: promptFile, temperature, completions } of data.prompts) { 15 | const prompt = readFileSync( 16 | path.join(path.dirname(file), "prompts", promptFile), 17 | "utf8" 18 | ); 19 | model.addCompletions(prompt, temperature, completions); 20 | } 21 | return model; 22 | } 23 | 24 | private key(prompt: string, temperature: number) { 25 | return JSON.stringify([prompt, temperature]); 26 | } 27 | 28 | public addCompletions( 29 | prompt: string, 30 | temperature: number, 31 | completions: string[] 32 | ) { 33 | this.completionMap.set(this.key(prompt, temperature), completions); 34 | } 35 | 36 | public async completions( 37 | prompt: string, 38 | temperature: number 39 | ): Promise> { 40 | const completions = this.completionMap.get(this.key(prompt, temperature)); 41 | if (!completions) { 42 | const err = `Prompt not found at temperature ${temperature}: ${prompt}`; 43 | if (this.strictResponses) { 44 | throw new Error(err); 45 | } else { 46 | console.warn(err); 47 | } 48 | } 49 | return new Set(completions); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | Thanks for helping make GitHub safe for everyone. 2 | 3 | # Security 4 | 5 | GitHub takes the security of our software products and services seriously, including all of the open source code repositories managed through our GitHub organizations, such as [GitHub](https://github.com/GitHub). 6 | 7 | Even though [open source repositories are outside of the scope of our bug bounty program](https://bounty.github.com/index.html#scope) and therefore not eligible for bounty rewards, we will ensure that your finding gets passed along to the appropriate maintainers for remediation. 8 | 9 | ## Reporting Security Issues 10 | 11 | If you believe you have found a security vulnerability in any GitHub-owned repository, please report it to us through coordinated disclosure. 12 | 13 | **Please do not report security vulnerabilities through public GitHub issues, discussions, or pull requests.** 14 | 15 | Instead, please send an email to opensource-security[@]github.com. 16 | 17 | Please include as much of the information listed below as you can to help us better understand and resolve the issue: 18 | 19 | * The type of issue (e.g., buffer overflow, SQL injection, or cross-site scripting) 20 | * Full paths of source file(s) related to the manifestation of the issue 21 | * The location of the affected source code (tag/branch/commit or direct URL) 22 | * Any special configuration required to reproduce the issue 23 | * Step-by-step instructions to reproduce the issue 24 | * Proof-of-concept or exploit code (if possible) 25 | * Impact of the issue, including how an attacker might exploit the issue 26 | 27 | This information will help us triage your report more quickly. 28 | 29 | ## Policy 30 | 31 | See [GitHub's Safe Harbor Policy](https://docs.github.com/en/github/site-policy/github-bug-bounty-program-legal-safe-harbor#1-safe-harbor-terms) 32 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "testpilot", 3 | "version": "0.0.1", 4 | "description": "Test generation using large language models", 5 | "main": "dist/index.js", 6 | "author": "Max Schaefer ", 7 | "contributors": [ 8 | "Frank Tip ", 9 | "Sarah Nadi ", 10 | "Aryaz Eghbali " 11 | ], 12 | "license": "MIT", 13 | "scripts": { 14 | "prebuild": "npm i", 15 | "build": "tsc -p src && tsc -p benchmark", 16 | "build:watch": "tsc --watch -p src && tsc --watch -p benchmark", 17 | "pretest": "npm run build", 18 | "test": "ts-mocha --forbid-only -p test/tsconfig.json test/*.ts", 19 | "autoformat": "prettier --write src test typings benchmark examples", 20 | "autoformat:check": "prettier --check src test/*.ts typings benchmark/*.ts examples/*.ts", 21 | "prepack": "npm run build" 22 | }, 23 | "devDependencies": { 24 | "@types/adm-zip": "^0.5.0", 25 | "@types/chai": "^4.3.1", 26 | "@types/dedent": "^0.7.0", 27 | "@types/deep-equal-in-any-order": "^1.0.1", 28 | "@types/estraverse": "^5.1.1", 29 | "@types/levenshtein": "^1.0.1", 30 | "@types/mocha": "^9.1.1", 31 | "@types/node": "^12.20.55", 32 | "@types/yargs": "^17.0.10", 33 | "chai": "^4.3.6", 34 | "deep-equal-in-any-order": "^2.0.0", 35 | "prettier": "^2.7.1", 36 | "source-map-support": "^0.5.21", 37 | "ts-mocha": "^10.0.0", 38 | "typescript": "^4.9.3" 39 | }, 40 | "dependencies": { 41 | "adm-zip": "^0.5.9", 42 | "axios": "^1.7.4", 43 | "common-js-file-extensions": "^1.0.4", 44 | "console-stamp": "^3.1.0", 45 | "dedent": "^0.7.0", 46 | "espree": "^9.3.2", 47 | "estraverse": "^5.3.0", 48 | "fast-glob": "^3.2.12", 49 | "levenshtein": "^1.0.5", 50 | "mocha": "^10.0.0", 51 | "nyc": "^15.1.0", 52 | "pirates": "^4.0.5", 53 | "simple-git": "^3.16.0", 54 | "yargs": "^17.6.2" 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /ql/queries/UnhelpfulRefinement.ql: -------------------------------------------------------------------------------- 1 | /** 2 | * @name Unhelpful refinement 3 | * @description Find a prompt refinement where the original prompt produced 4 | * a test that passed, but the refined prompt does not. 5 | * @kind problem 6 | */ 7 | 8 | import AssertionQuality 9 | 10 | predicate unhelpfulRefinement( 11 | ReportJson report, Refiner refiner, Prompt orig, GeneratedTest passing, Prompt refined 12 | ) { 13 | orig = report.getAPrompt() and 14 | passing = orig.getATest(true, true) and 15 | refined.isRefinedFrom(orig, refiner) and 16 | not exists(refined.getATest(true, _)) 17 | } 18 | 19 | query predicate stats(string package, string refiner, int totalRefinements, int totalUnhelpful, float ratio) { 20 | exists(ReportJson report | package = report.getPackageName() | 21 | totalRefinements = strictcount(Prompt p | p = report.getAPrompt() and p.isRefinedFrom(_, refiner)) and 22 | totalUnhelpful = count(Prompt p | unhelpfulRefinement(report, refiner, p, _, _)) and 23 | ratio = totalUnhelpful.(float) / totalRefinements 24 | ) 25 | or 26 | package = "all" and 27 | totalRefinements = strictcount(Prompt p | p.isRefinedFrom(_, refiner)) and 28 | totalUnhelpful = count(Prompt p | unhelpfulRefinement(_, refiner, p, _, _)) and 29 | ratio = totalUnhelpful.(float) / totalRefinements 30 | or 31 | refiner = "any" and 32 | package = "all" and 33 | totalRefinements = strictcount(Prompt p | p.isRefinedFrom(_, _)) and 34 | totalUnhelpful = count(Prompt p | unhelpfulRefinement(_, _, p, _, _)) and 35 | ratio = totalUnhelpful.(float) / totalRefinements 36 | } 37 | 38 | from Prompt orig, GeneratedTest passing, Refiner refiner, Prompt refined 39 | where unhelpfulRefinement(_, refiner, orig, passing, refined) 40 | select orig, 41 | "This prompt produced a $@, but after $@ with " + refiner + 42 | " only failing tests were produced, for example $@.", passing, "passing test", refined, 43 | "refining", refined.getATest(false, _), "this one" 44 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | [fork]: https://github.com/githubnext/testpilot/fork 4 | [pr]: https://github.com/githubnext/testpilot/compare 5 | [code-of-conduct]: CODE_OF_CONDUCT.md 6 | 7 | Hi there! We're thrilled that you'd like to contribute to this project. Your help is essential for keeping it great. 8 | 9 | Contributions to this project are [released](https://help.github.com/articles/github-terms-of-service/#6-contributions-under-repository-license) to the public under the [project's open source license](LICENSE). 10 | 11 | Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. 12 | 13 | ## Submitting a pull request 14 | 15 | 1. [Fork][fork] and clone the repository 16 | 2. Configure and install the dependencies: `npm install` 17 | 3. Build the project: `npm run build` 18 | 4. Make sure the tests pass on your machine: `npm run test` 19 | 5. Make sure the code is formatted correctly: `npm run autoformat:check`; if it is not, format it: `npm run autoformat` 20 | 6. Create a new branch: `git checkout -b my-branch-name` 21 | 7. Make your change, add tests, and make sure the tests and format checks still pass 22 | 8. Push to your fork and [submit a pull request][pr] 23 | 9. Pat yourself on the back and wait for your pull request to be reviewed and merged. 24 | 25 | Here are a few things you can do that will increase the likelihood of your pull request being accepted: 26 | 27 | - Write tests. 28 | - Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests. 29 | - Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html). 30 | 31 | ## Resources 32 | 33 | - [How to Contribute to Open Source](https://opensource.guide/how-to-contribute/) 34 | - [Using Pull Requests](https://help.github.com/articles/about-pull-requests/) 35 | - [GitHub Help](https://help.github.com) 36 | -------------------------------------------------------------------------------- /.github/parse_packages.js: -------------------------------------------------------------------------------- 1 | const { readFileSync } = require("fs"); 2 | const { join } = require("path"); 3 | 4 | function parsePackage(packageURL) { 5 | let pkg = new URL(packageURL); 6 | 7 | // pathname is ///tree// 8 | // gitlab urls sometimes have an extra entry e.g., https://gitlab.com/nerd-vision/opensource/gitlab-js/tree/c2c9ef54b1ea0fc82b284bc72dc2ff0935983f4c 9 | const components = pkg.pathname.split("/"); 10 | if ( 11 | components.length < 5 || 12 | (components[3] !== "tree" && components[4] !== "tree") 13 | ) { 14 | throw new Error(`Invalid package URL: ${packageURL}`); 15 | } 16 | 17 | var sliceIndex = 5; 18 | var owner = components[1]; 19 | var repo = components[2]; 20 | var sha = components[4]; 21 | 22 | if (pkg.host === "gitlab.com") { 23 | if (components.length > 5) { 24 | owner = components[1].concat("/", components[2]); 25 | repo = components[3]; 26 | sha = components[5]; 27 | sliceIndex = 6; 28 | } 29 | } 30 | 31 | return { 32 | host: pkg.host, 33 | owner: owner, 34 | repo: repo, 35 | sha: sha, 36 | path: components.slice(sliceIndex).join("/"), 37 | }; 38 | } 39 | 40 | const packages = []; 41 | const args = process.argv.slice(2); 42 | const skipSlowBenchmarks = 43 | args[0] === "--skip-slow-benchmarks" ? (args.shift(), true) : false; 44 | const pkg = args[0].trim(); 45 | const lines = []; 46 | if (pkg.startsWith("+")) { 47 | const file = join(__dirname, pkg.slice(1)); 48 | lines.push(...readFileSync(file, "utf8").split("\n")); 49 | } else { 50 | lines.push(pkg); 51 | } 52 | for (const line of lines) { 53 | if (line.startsWith("#") || line.trim() === "") continue; 54 | if (skipSlowBenchmarks && line.includes("# slow")) continue; 55 | const parsedPackage = parsePackage(line.split(/\s/)[0]); 56 | // look for `dependencies: ...` in the comment 57 | const m = line.match(/#.*dependencies:\s*(.*)/); 58 | const deps = m ? m[1].trim() : ""; 59 | parsedPackage.dependencies = deps; 60 | packages.push(parsedPackage); 61 | } 62 | console.log(JSON.stringify(packages)); 63 | -------------------------------------------------------------------------------- /benchmark/testCollectorHelper.ts: -------------------------------------------------------------------------------- 1 | /*** 2 | * Create a unique statement id from path and start/end location for a given statement 3 | */ 4 | export function createUniqueStmtId( 5 | relpath: string, 6 | startLine: number, 7 | startColumn: number, 8 | endLine: number, 9 | endColumn: number 10 | ) { 11 | return `${relpath}@${startLine}:${startColumn}-${endLine}:${endColumn}`; 12 | } 13 | 14 | /** 15 | * Get a map from statement index to unique statement id for a given file in the coverage report 16 | * @param recordedStmtMap: the statement map recorded in the coverage report 17 | * @param fileRelPath: the relative path of the file in the coverage report 18 | * @returns a map from statement index to unique statement id (in same format as createUniqueStmtId) 19 | */ 20 | export function getFileStmts(recordedStmtMap: any, fileRelPath: string) { 21 | const statementMap = new Map(); 22 | for (const key of Object.keys(recordedStmtMap)) { 23 | const { 24 | start: { line: startLine, column: startColumn }, 25 | end: { line: endLine, column: endColumn }, 26 | } = recordedStmtMap[key]; 27 | const statementId = createUniqueStmtId( 28 | fileRelPath, 29 | startLine, 30 | startColumn, 31 | endLine, 32 | endColumn 33 | ); 34 | statementMap.set(key, statementId); 35 | } 36 | return statementMap; 37 | } 38 | 39 | /** 40 | * Get the list of statements covered from a given file in the coverage report 41 | * @param fileCoverage: the coverage report for a given file 42 | * @param relpath: the relative path of the file in the coverage report 43 | * @returns a list of covered statements (in same format as createUniqueStmtId) 44 | */ 45 | export function getCoveredStmtsForFile(fileCoverage: any, relpath: string) { 46 | const statementMap = getFileStmts(fileCoverage.statementMap, relpath); 47 | const coveredStmtIds = []; 48 | for (const stmtIndx of Object.keys(fileCoverage.s)) { 49 | const isCovered = fileCoverage.s[stmtIndx]; 50 | if (isCovered) { 51 | coveredStmtIds.push(statementMap.get(stmtIndx)!); 52 | } 53 | } 54 | return coveredStmtIds; 55 | } 56 | -------------------------------------------------------------------------------- /examples/testGenerationScript.ts: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import { 3 | APIFunction, 4 | FunctionDescriptor, 5 | Codex, 6 | TestGenerator, 7 | MochaValidator, 8 | BaseTestResultCollector, 9 | } from "./"; 10 | 11 | (async () => { 12 | // FunctionDescriptor 13 | const functionDescriptor: FunctionDescriptor = { 14 | type: "function", 15 | signature: "(amount: number, unit: string)", 16 | isAsync: false, 17 | implementation: ` 18 | // Pseudo-implementation for moment().add 19 | `, 20 | isConstructor: false, 21 | docComment: 22 | "Adds the specified amount of time to the moment object. The unit can be years, months, weeks, days, hours, minutes, seconds, or milliseconds. This function modifies the original moment object and returns it for chaining.", 23 | }; 24 | 25 | const apiFunction = new APIFunction( 26 | "moment().add", 27 | functionDescriptor, 28 | "moment" 29 | ); 30 | 31 | // LLM 32 | const model = new Codex(false, { 33 | n: 5, 34 | max_tokens: 150, 35 | temperature: 0.7, 36 | }); 37 | 38 | // Validator + Collector 39 | const momentPath = path.join(require.resolve("moment"), "../"); 40 | const validator = new MochaValidator("moment", momentPath); 41 | const collector = new BaseTestResultCollector(); 42 | 43 | const temperatures = [0.7]; 44 | const snippetMap = new Map([ 45 | [ 46 | apiFunction.functionName, 47 | ["moment().add(10, 'days')", "moment().add(1, 'year').format('YYYY')"], 48 | ], 49 | ]); 50 | 51 | // TestGenerator 52 | const generator = new TestGenerator( 53 | temperatures, 54 | (fn) => snippetMap.get(fn), 55 | model, 56 | validator, 57 | collector 58 | ); 59 | 60 | // Generate the test 61 | console.log("Generating test for moment().format()"); 62 | await generator.generateAndValidateTests(apiFunction); 63 | 64 | // Collect Results 65 | const testInfos = collector.getTestInfos(); 66 | 67 | console.log("Test generation complete. Test Details:"); 68 | testInfos.forEach((test) => { 69 | console.log( 70 | `Test ID: ${test.id}, Test Name: ${test.testName}, Outcome: ${test.outcome.status}` 71 | ); 72 | }); 73 | })(); 74 | -------------------------------------------------------------------------------- /benchmark/package_stats.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import * as child_process from "child_process"; 3 | import * as fs from "fs"; 4 | import * as os from "os"; 5 | import * as path from "path"; 6 | import simpleGit from "simple-git"; 7 | 8 | if (process.argv.length !== 3) { 9 | console.error("Usage: package_stats.js "); 10 | console.error(" package_dir: Directory containing package.json"); 11 | console.error(); 12 | console.error("This script computes statistics for a package."); 13 | process.exit(1); 14 | } 15 | const pkgDir = process.argv[2]; 16 | const packageName = JSON.parse( 17 | fs.readFileSync(path.join(pkgDir, "package.json"), "utf8") 18 | ).name; 19 | 20 | (async () => { 21 | const git = simpleGit(pkgDir); 22 | const weeklyDownloadsUrl = `https://api.npmjs.org/downloads/point/last-week/${packageName}`; 23 | let weeklyDownloads = 0; 24 | try { 25 | weeklyDownloads = (await axios.get(weeklyDownloadsUrl)).data.downloads; 26 | } catch (e) { 27 | console.warn(`Failed to get weekly downloads for ${packageName}: ${e}`); 28 | console.warn("Weekly downloads will be set to 0."); 29 | } 30 | const nyc = path.join(__dirname, "..", "node_modules", ".bin", "nyc"); 31 | const tmpdir = fs.mkdtempSync(path.join(os.tmpdir(), "package_stats")); 32 | child_process.execFileSync( 33 | nyc, 34 | [ 35 | "--reporter=json-summary", 36 | `--report-dir=${tmpdir}`, 37 | `--temp-dir=${tmpdir}`, 38 | "node", 39 | "-e", 40 | 'require(".")', 41 | ], 42 | { cwd: pkgDir } 43 | ); 44 | const coverageFromLoading = JSON.parse( 45 | fs.readFileSync(path.join(tmpdir, "coverage-summary.json"), "utf8") 46 | ).total; 47 | const loc = coverageFromLoading.lines.total; 48 | const repository = (await git.listRemote(["--get-url"])).trim(); 49 | const sha = (await git.revparse(["HEAD"])).trim(); 50 | console.log( 51 | JSON.stringify( 52 | { 53 | packageName, 54 | repository, 55 | sha, 56 | loc, 57 | weeklyDownloads, 58 | coverageFromLoading, 59 | }, 60 | null, 61 | 2 62 | ) 63 | ); 64 | })().catch((e) => { 65 | console.error(e); 66 | process.exit(1); 67 | }); 68 | -------------------------------------------------------------------------------- /.github/benchmarks.txt: -------------------------------------------------------------------------------- 1 | https://github.com/jprichardson/node-fs-extra/tree/6bffcd81881ae474d3d1765be7dd389b5edfd0e0 2 | https://github.com/jprichardson/node-jsonfile/tree/9c6478a85899a9318547a6e9514b0403166d8c5c 3 | https://github.com/fshost/node-dir/tree/a57c3b1b571dd91f464ae398090ba40f64ba38a2 4 | https://github.com/petkaantonov/bluebird/tree/6c8c069c34829557abfaca66d7f22383b389a4b5 5 | https://github.com/kriskowal/q/tree/6bc7f524eb104aca8bffde95f180b5210eb8dd4b 6 | https://github.com/isaacs/node-graceful-fs/tree/c1b377782112ae0f25b2abe561fbbea6cfb6f876 7 | https://github.com/tildeio/rsvp.js/tree/21e0c9720e08ffa53d597c54fed17119899a9a83 8 | https://github.com/isaacs/node-glob/tree/8315c2d576f9f3092cdc2f2cc41a398bc656035a 9 | https://github.com/maugenst/zip-a-folder/tree/5089113647753d5086ea20f052f9d29840866ee1 10 | https://github.com/streamich/memfs/tree/ec83e6fe1f57432eac2ab61c5367ba9ec3a775a1 # slow; dependencies: typescript@4.9.5 11 | https://github.com/chakrit/node-uneval/tree/7578dc67090f650a171610a08ea529eba9d27438 12 | https://github.com/felixge/node-dirty/tree/d7fb4d4ecf0cce144efa21b674965631a7955e61 13 | https://github.com/pull-stream/pull-stream/tree/29b4868bb3864c427c3988855c5d65ad5cb2cb1c 14 | https://github.com/simple-statistics/simple-statistics/tree/31f037dd5550d554c4a96c3ee35b12e10a1c9cb7 15 | https://github.com/swang/plural/tree/f0027d66ecb37ce0108c8bcb4a6a448d1bf64047 16 | https://github.com/js-sdsl/js-sdsl/tree/055866ad5515037c724a529fecb2d3c2b35b2075 17 | https://github.com/infusion/Complex.js/tree/d995ca105e8adef4c38d0ace50643daf84e0dd1c 18 | https://github.com/quilljs/delta/tree/5ffb853d645aa5b4c93e42aa52697e2824afc869 19 | https://github.com/manuelmhtr/countries-and-timezones/tree/e34cb4b6832795cbac8d44f6f9c97eb1038b831b 20 | https://github.com/rainder/node-geo-point/tree/c839d477ff7a48d1fc6574495cbbc6196161f494 21 | https://gitlab.com/nerd-vision/opensource/gitlab-js/tree/c2c9ef54b1ea0fc82b284bc72dc2ff0935983f4c 22 | https://gitlab.com/comfort-stereo/omnitool/tree/0edf7d148337051c7c2307738423f0ff3db494c7 # slow 23 | https://gitlab.com/demsking/image-downloader/tree/19a53f652824bd0c612cc5bcd3a2eb173a16f938 24 | https://gitlab.com/autokent/crawler-url-parser/tree/202c5b25ad693d284804261e2b3815fe66e0723e 25 | https://gitlab.com/cptpackrat/spacl-core/tree/fcb8511a0d01bdc206582cfacb3e2b01a0288f6a 26 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release a new version of TestPilot 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | prerelease: 6 | description: > 7 | Create a pre-release instead of a full release. 8 | type: boolean 9 | default: true 10 | name: 11 | description: > 12 | Name of the release to create. If not specified, the name of the 13 | release will be the version number specified in the package.json file, 14 | plus the HEAD commit SHA for pre-releases. 15 | default: "" 16 | description: 17 | description: > 18 | Description of this release. 19 | default: "" 20 | jobs: 21 | release: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v2 26 | 27 | - name: Setup Node.js 28 | uses: actions/setup-node@v2 29 | 30 | - name: Build 31 | run: npm pack 32 | 33 | - name: Determine release name 34 | run: | 35 | if ! [ -z "${{ github.event.inputs.name }}" ]; then 36 | release_name="${{ github.event.inputs.name }}" 37 | else 38 | release_name="v$(jq -r '.version' package.json)" 39 | # if this is a pre-release, append the commit SHA 40 | if [ "${{ github.event.inputs.prerelease }}" = "true" ]; then 41 | release_name="${release_name}-$(git rev-parse --short HEAD)" 42 | fi 43 | fi 44 | # check if a tag with this name already exists 45 | if git rev-parse -q --verify "refs/tags/${release_name}"; then 46 | echo "Tag ${release_name} already exists. Aborting." 47 | exit 1 48 | fi 49 | echo "Release name: ${release_name}" 50 | echo "release_name=${release_name}" >> $GITHUB_ENV 51 | 52 | - name: Release 53 | uses: softprops/action-gh-release@v1 54 | with: 55 | name: "${{ env.release_name }}" 56 | body: "${{ github.event.inputs.description }}" 57 | tag_name: "${{ env.release_name }}" 58 | prerelease: "${{ github.event.inputs.prerelease }}" 59 | fail_on_unmatched_files: true 60 | files: | 61 | *.tgz 62 | env: 63 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 64 | -------------------------------------------------------------------------------- /ql/queries/RefinerContributions.ql: -------------------------------------------------------------------------------- 1 | import AssertionQuality 2 | 3 | /** 4 | * A pseudo-refiner, that is, either a concrete refiner, "all" (meaning all 5 | * refiners), or "none" (meaning no refiners). 6 | */ 7 | class PseudoRefiner extends string { 8 | PseudoRefiner() { 9 | this instanceof Refiner or 10 | this = "all" or 11 | this = "none" 12 | } 13 | } 14 | 15 | /** 16 | * Gets a prompt from `report` that does _not_ depend on the given `refiner`. 17 | * 18 | * If `refiner` is '"all"', all initial, unrefined prompts (which do not depend 19 | * on any refiner) are returned. 20 | * If `refiner` is '"none"', all prompts are returned. 21 | */ 22 | Prompt promptWithout(ReportJson report, PseudoRefiner refiner) { 23 | result = report.getAPrompt() and 24 | ( 25 | result.doesNotNeed(refiner) 26 | or 27 | refiner = "all" and not result.isRefinedFrom(_, _) 28 | or 29 | refiner = "none" 30 | ) 31 | } 32 | 33 | GeneratedTest testWithout( 34 | ReportJson report, PseudoRefiner refiner, boolean passes, boolean nontrivial 35 | ) { 36 | result = promptWithout(report, refiner).getATest(passes, nontrivial) 37 | } 38 | 39 | /** 40 | * Gets the number of passing tests in `report` that do not depend on 41 | * `refiner`. 42 | */ 43 | int getPassingTestsWithout(ReportJson report, PseudoRefiner refiner) { 44 | result = count(testWithout(report, refiner, true, _)) 45 | } 46 | 47 | /** 48 | * Gets the number of statements covered by passing tests in `report` that do 49 | * not depend on `refiner`. 50 | */ 51 | int getStatementsCoveredWithout(ReportJson report, PseudoRefiner refiner) { 52 | result = count(string stmtId | testWithout(report, refiner, true, _).coversStmt(stmtId)) 53 | } 54 | 55 | /** 56 | * Gets the number of statements covered by non-trivial passing tests in 57 | * `report` that do not depend on `refiner`. 58 | */ 59 | int getStatementsNonTriviallyCoveredWithout(ReportJson report, PseudoRefiner refiner) { 60 | result = count(string stmtId | testWithout(report, refiner, true, true).coversStmt(stmtId)) 61 | } 62 | 63 | /** 64 | * Computes a percentage value with two decimal places (using floor, not 65 | * rounding, for consistency with nyc). 66 | */ 67 | bindingset[numerator, denominator] 68 | float perc(float numerator, float denominator) { 69 | result = ((numerator / denominator * 100) * 100).floor() / 100.0 70 | } 71 | 72 | from 73 | ReportJson report, string refiner, int numTests, int numStatements, float passingTestPercWithout, 74 | float coveragePercWithout, float nonTrivialCoveragePercWithout 75 | where 76 | numTests = report.getNumberOfTests() and 77 | numStatements = report.getNumberOfStatements() and 78 | passingTestPercWithout = perc(getPassingTestsWithout(report, refiner), numTests) and 79 | coveragePercWithout = perc(getStatementsCoveredWithout(report, refiner), numStatements) and 80 | nonTrivialCoveragePercWithout = 81 | perc(getStatementsNonTriviallyCoveredWithout(report, refiner), numStatements) 82 | select report, refiner, passingTestPercWithout, coveragePercWithout, nonTrivialCoveragePercWithout 83 | -------------------------------------------------------------------------------- /.github/non_trivial_coverage.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -e 4 | MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | # Usage: non_trivial_coverage.sh 7 | if [ $# -ne 1 ] || [ ! -d "$1" ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then 8 | echo "Usage: $0 " 9 | echo " report_dir: Directory containing coverage reports" 10 | echo 11 | echo "This script identifies non-trivial tests and adds corresponding coverage information to the report." 12 | exit 1 13 | fi 14 | report_dir=$1 15 | output=$1/report.json 16 | 17 | if [ ! -f $output ]; then 18 | echo "No coverage report found at $output" 19 | exit 1 20 | fi 21 | 22 | dbdir=`mktemp -d` 23 | trap "rm -rf $dbdir" EXIT 24 | 25 | echo "Creating database in $dbdir..." 26 | # make sure there is at least one JavaScript file to avoid extractor error 27 | echo ';' >$report_dir/dummy.js 28 | LGTM_INDEX_FILTERS='include:**/*.json 29 | exclude:**/coverageData/**/*.json' codeql database create -l javascript -s $report_dir $dbdir 30 | 31 | echo "Running query for identifying non-trivial tests..." 32 | codeql query run --output $dbdir/TrivialTest.bqrs -d $dbdir $MY_DIR/../ql/queries/TrivialTest.ql 33 | 34 | echo "Marking non-trivial tests and computing coverage information..." 35 | codeql bqrs decode --format csv --no-titles $dbdir/TrivialTest.bqrs | sed 's/"//g' | cut -d, -f1 | xargs -r -n 1 basename >$dbdir/trivial_tests.txt 36 | node <$1/refiners.json -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at opensource@github.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /src/syntax.ts: -------------------------------------------------------------------------------- 1 | import * as espree from "espree"; 2 | 3 | /** A map from opening brackets to their corresponding closing brackets. */ 4 | const closing = new Map([ 5 | ["(", ")"], 6 | ["{", "}"], 7 | ["[", "]"], 8 | ]); 9 | 10 | /** All closing brackets. */ 11 | const closers = new Set(closing.values()); 12 | 13 | /** 14 | * Fix the given code by adding missing closing brackets. 15 | * 16 | * @param code The incomplete code. 17 | * @returns Fixed code with closing brackets, or undefined if it cannot be fixed 18 | * with closing brackets. 19 | */ 20 | export function closeBrackets( 21 | code: string 22 | ): { source: string; ast: any } | undefined { 23 | let brackets = ""; // all outstanding closing brackets, in order 24 | for (let i = 0; i < code.length; ++i) { 25 | if (code[i] === "/" && code[i + 1] === "/") { 26 | // skip line comment 27 | const nl = code.indexOf("\n", i); 28 | if (nl === -1) { 29 | break; 30 | } 31 | i = nl; 32 | } else if (closing.has(code[i])) { 33 | // when we see an opening bracket, add the corresponding closing bracket 34 | brackets = closing.get(code[i]) + brackets; 35 | } else if (closers.has(code[i])) { 36 | if (brackets[0] === code[i]) { 37 | // closing brackets matches, so remove it 38 | brackets = brackets.slice(1); 39 | } else { 40 | // closing brackets does not match, so we cannot fix this code 41 | return undefined; 42 | } 43 | } 44 | } 45 | try { 46 | const ast = espree.parse(code + brackets, { ecmaVersion: "latest" }); 47 | return { source: code + brackets, ast }; 48 | } catch (err) {} 49 | return undefined; 50 | } 51 | 52 | /** 53 | * Trim a completion to avoid incomplete lines and extra whitespace, and make 54 | * sure it does not break out of enclosing syntactic scopes by closing more 55 | * brackets than it opens. 56 | * 57 | * @param completion The completion. 58 | * @returns The trimmed completion. 59 | */ 60 | export function trimCompletion(completion: string): string { 61 | let endOfLastLine = completion.includes("\n") 62 | ? completion.lastIndexOf("\n") 63 | : 0; 64 | 65 | // Avoid incomplete lines 66 | if (!completion.match(/[;})]\s*$/)) { 67 | completion = completion.slice(0, endOfLastLine); 68 | } 69 | 70 | // check if more brackets are closed than opened 71 | let stack = []; 72 | for (let i = 0; i < completion.length; ++i) { 73 | if (completion[i] === "{" || completion[i] === "(") { 74 | stack.push(completion[i]); 75 | } else if (completion[i] === "}" || completion[i] === ")") { 76 | if (stack.length === 0) { 77 | completion = completion.slice(0, i); 78 | break; 79 | } 80 | stack.pop(); 81 | } 82 | } 83 | 84 | return completion.trim(); 85 | } 86 | 87 | /** 88 | * Combine a function's doc comment into a single trimmed commented string 89 | * @param docComment the original doc comment, as extracted by exploreAPI 90 | * @returns the doc comment with all non-empty lines starting with // (instead of '* ') 91 | */ 92 | export function trimAndCombineDocComment(docComment: string): string { 93 | return commentOut( 94 | docComment 95 | .split("\n") 96 | .map((line) => line.replace("*", "").trim()) 97 | .filter((line) => line !== "") 98 | .join("\n") 99 | ); 100 | } 101 | 102 | /** 103 | * Comment out the given code line by line. 104 | */ 105 | export function commentOut(code: string): string { 106 | let lines = code.split("\n"); 107 | // remove trailing empty line 108 | if (lines[lines.length - 1] === "") { 109 | lines.pop(); 110 | } 111 | return lines.map((line) => `// ${line}\n`).join(""); 112 | } 113 | -------------------------------------------------------------------------------- /src/testResultCollector.ts: -------------------------------------------------------------------------------- 1 | import { emptyCoverageSummary, ICoverageSummary } from "./coverage"; 2 | import { Prompt } from "./promptCrafting"; 3 | import { ITestInfo, TestOutcome } from "./report"; 4 | 5 | export interface IPromptInfo { 6 | /** The prompt. */ 7 | prompt: Prompt; 8 | /** A unique ID for this prompt. */ 9 | id: number; 10 | /** The file to store the prompt in. */ 11 | file: string; 12 | /** The sampling temperature for this prompt. */ 13 | temperature: number; 14 | /** The set of completions obtained for this prompt. */ 15 | completions: Set; 16 | } 17 | 18 | export interface ITestResultCollector { 19 | /** 20 | * Record information about a test for the given API function from the given 21 | * prompt. If the test was already recorded, the existing test info is returned, 22 | * with the new prompt added to the list of prompts. 23 | */ 24 | recordTestInfo(testSource: string, prompt: Prompt, api: string): ITestInfo; 25 | 26 | /** 27 | * Record a test result. 28 | * 29 | * @param test the test that was run 30 | * @param temperature the sampling temperature used to generate the test 31 | * @param outcome the outcome of the test 32 | */ 33 | recordTestResult( 34 | test: ITestInfo, 35 | temperature: number, 36 | outcome: TestOutcome 37 | ): void; 38 | 39 | /** 40 | * Record information about a prompt. 41 | * 42 | * @param prompt the prompt 43 | * @param temperature the sampling temperature 44 | * @param completions the set of completions for the prompt 45 | */ 46 | recordPromptInfo( 47 | prompt: Prompt, 48 | temperature: number, 49 | completions: Set 50 | ): void; 51 | 52 | /** 53 | * Record coverage information. 54 | * 55 | * @param coverageSummary the coverage information 56 | */ 57 | recordCoverageInfo(coverageSummary: ICoverageSummary): void; 58 | } 59 | 60 | export /** 61 | * A simple result collector that keeps track of tests and prompts, but does not 62 | * do anything with them. 63 | */ 64 | class BaseTestResultCollector implements ITestResultCollector { 65 | protected readonly tests: Map = new Map(); 66 | protected readonly prompts: Map = new Map(); 67 | protected coverageSummary: ICoverageSummary = emptyCoverageSummary(); 68 | 69 | public recordTestInfo( 70 | testSource: string, 71 | prompt: Prompt, 72 | api: string 73 | ): ITestInfo { 74 | let testInfo = this.tests.get(testSource); 75 | if (testInfo) { 76 | testInfo.prompts.push(prompt); 77 | } else { 78 | const id = this.tests.size; 79 | testInfo = { 80 | id, 81 | testName: `test_${id}.js`, 82 | outcome: TestOutcome.OTHER, 83 | testSource: testSource, 84 | prompts: [prompt], 85 | api, 86 | }; 87 | this.tests.set(testSource, testInfo); 88 | } 89 | return testInfo; 90 | } 91 | 92 | public recordTestResult( 93 | test: ITestInfo, 94 | temperature: number, 95 | outcome: TestOutcome 96 | ) { 97 | test.outcome = outcome; 98 | } 99 | 100 | public recordPromptInfo( 101 | prompt: Prompt, 102 | temperature: number, 103 | completions: Set 104 | ) { 105 | const id = this.prompts.size; 106 | const file = `prompt_${id}.js`; 107 | this.prompts.set(prompt, { prompt, id, file, temperature, completions }); 108 | } 109 | 110 | public recordCoverageInfo(coverageSummary: ICoverageSummary) { 111 | this.coverageSummary = coverageSummary; 112 | } 113 | 114 | public getPromptInfos(): IPromptInfo[] { 115 | return Array.from(this.prompts.values()); 116 | } 117 | 118 | public getTestInfos(): ITestInfo[] { 119 | return Array.from(this.tests.values()); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /test/editDistance.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import dedent from "dedent"; 3 | import deepEqualInAnyOrder from "deep-equal-in-any-order"; 4 | import { parseTests } from "../benchmark/editDistance"; 5 | 6 | const chai = require("chai"); 7 | chai.use(deepEqualInAnyOrder); 8 | 9 | const testFileName = "testFileName.js"; 10 | /** 11 | * helper function to create expected tests from an array of input tests 12 | * @param tests 13 | * @param testFileName 14 | * @returns Set of Test objects 15 | */ 16 | function createExpectedTests(tests: string[], testFileName: string) { 17 | const expectedTests = new Set(); 18 | 19 | //add tests to expectedTests with index and fileName 20 | tests.forEach(function (test, index) { 21 | expectedTests.add({ 22 | fileName: testFileName, 23 | index: index, 24 | contents: dedent(test), 25 | }); 26 | }); 27 | return expectedTests; 28 | } 29 | 30 | function creatTestFileContent(tests: string[]) { 31 | return tests 32 | .map(function (test) { 33 | return dedent(test); 34 | }) 35 | .join("\n\n"); 36 | } 37 | 38 | function setupAndExecuteTest(tests: string[]) { 39 | const testFileContent = creatTestFileContent(tests); 40 | 41 | const expectedTests = createExpectedTests(tests, testFileName); 42 | const extractedTests = parseTests(testFileName, testFileContent); 43 | 44 | expect(expectedTests).to.deep.equal(extractedTests); 45 | } 46 | 47 | describe("editDistance parseTests", () => { 48 | it("should detect multiple tests", () => { 49 | const tests = [ 50 | 'it("should eat its own dog food", function () {\n\n var a = Complex(1, -5).toString();\n}) ', 51 | "it('test case', function(done) {\n let complex = complex_js.ZERO.asin();\n})", 52 | ]; 53 | 54 | setupAndExecuteTest(tests); 55 | }); 56 | 57 | it("should handle { or ) in it description", () => { 58 | const tests = [ 59 | "it(\"sends { index, value } progress updates\", function () {\n var test = '';})", 60 | "it( 'sends ) index, value } progress updates', function () {\n var test = '';})", 61 | "it('sends ( index, value } progress updates', function () {\n var test = '';})", 62 | "it('sends } index, value } progress updates', function () {\n var test = '';})", 63 | ]; 64 | 65 | setupAndExecuteTest(tests); 66 | }); 67 | 68 | it("should detect arrow functions", () => { 69 | const tests = [ 70 | dedent` 71 | it('my test', () => { 72 | // should set the timeout of this test to 1000 ms; instead will fail 73 | this.timeout(1000); 74 | assert.ok(true); 75 | })`, 76 | ]; 77 | 78 | setupAndExecuteTest(tests); 79 | }); 80 | 81 | it("should not match split", () => { 82 | const tests = [ 83 | dedent` 84 | split('my test', () => { 85 | // should set the timeout of this test to 1000 ms; instead will fail 86 | this.timeout(1000); 87 | assert.ok(true); 88 | })`, 89 | ]; 90 | 91 | const testFileContent = creatTestFileContent(tests); 92 | const extractedTests = parseTests(testFileName, testFileContent); 93 | expect(extractedTests.size).equal(0); 94 | }); 95 | 96 | it("should handle malformed tests", () => { 97 | const tests = [ 98 | dedent` 99 | it('my test', () => ()`, 100 | ]; 101 | 102 | const testFileContent = creatTestFileContent(tests); 103 | const extractedTests = parseTests(testFileName, testFileContent); 104 | expect(extractedTests.size).equal(0); 105 | }); 106 | 107 | it("should detect jtests", () => { 108 | const tests = [ 109 | "test('HashSet hash function test', () => { new HashMap(arr.map(x => [Math.floor(Number(x)), 1]));)}", 110 | ]; 111 | setupAndExecuteTest(tests); 112 | }); 113 | }); 114 | -------------------------------------------------------------------------------- /test/exploreAPIs.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import * as exploreAPI from "../src/exploreAPI"; 3 | import dedent from "dedent"; 4 | import * as espree from "espree"; 5 | import { connect } from "http2"; 6 | 7 | describe("test source code normalization", () => { 8 | it("should normalize regular functions", () => { 9 | const code = dedent` 10 | function someNumbers () { 11 | yield 0; 12 | yield 1; 13 | yield -1; 14 | } 15 | `; 16 | const expected = dedent` 17 | function someNumbers(){yield 0;yield 1;yield-1;} 18 | `; 19 | expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected); 20 | }); 21 | 22 | it("should normalize generator functions", () => { 23 | const code = dedent` 24 | function *someNumbers () { 25 | yield 0; 26 | yield 1; yield -1; 27 | } 28 | `; 29 | const expected = dedent` 30 | function*someNumbers(){yield 0;yield 1;yield-1;} 31 | `; 32 | expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected); 33 | }); 34 | 35 | it("should normalize class methods", () => { 36 | const code = dedent` 37 | simpleMethod () {const x = 1;} 38 | `; 39 | const expected = dedent` 40 | simpleMethod(){const x=1;} 41 | `; 42 | expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected); 43 | }); 44 | 45 | it("should normalize async class methods", () => { 46 | const code = dedent` 47 | async simpleMethod (foo: string) {const x = 1;} 48 | `; 49 | const expected = dedent` 50 | async simpleMethod(foo:string){const x=1;} 51 | `; 52 | expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected); 53 | }); 54 | 55 | it("should normalize async generator class methods", () => { 56 | const code = dedent` 57 | async *simpleMethod (foo: string) {const x = 1;} 58 | `; 59 | const expected = dedent` 60 | async*simpleMethod(foo:string){const x=1;} 61 | `; 62 | expect(exploreAPI.normalizeFunctionSource(code)).to.equal(expected); 63 | }); 64 | }); 65 | 66 | describe("test finding doc comments", () => { 67 | it("should correctly match doc comments", () => { 68 | const docComment = dedent` 69 | /** 70 | * Test Doc Comment 71 | * @param foo a parameter 72 | */ 73 | `; 74 | 75 | const function1Def = dedent` 76 | function simpleMethod(foo) { 77 | const x = 1; 78 | } 79 | `; 80 | 81 | const function2Def = dedent` 82 | function otherMethod(param) { 83 | const x = 1; 84 | } 85 | `; 86 | 87 | const code = docComment.concat("\n", function1Def, "\n", function2Def); 88 | const docComments = new Map(); 89 | exploreAPI.findDocComments(code, docComments); 90 | 91 | expect( 92 | docComments.get(exploreAPI.normalizeFunctionSource(function1Def)) 93 | ).to.equal(docComment.slice(2, -2)); 94 | expect( 95 | docComments.get(exploreAPI.normalizeFunctionSource(function2Def)) 96 | ).to.equal(undefined); 97 | }); 98 | 99 | it("should be robust against failed parsing", () => { 100 | const docComment = dedent` 101 | /** 102 | * Test Doc Comment 103 | */ 104 | `; 105 | 106 | const functionDef = dedent` 107 | functoin simpleMethod(param) { 108 | const x = 1; 109 | } 110 | `; 111 | 112 | const code = docComment.concat("\n", functionDef); 113 | const docComments = new Map(); 114 | exploreAPI.findDocComments(code, docComments); 115 | expect( 116 | docComments.get(exploreAPI.normalizeFunctionSource(functionDef)) 117 | ).to.equal(undefined); 118 | }); 119 | }); 120 | -------------------------------------------------------------------------------- /test/APIFunction.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import { APIFunction } from "../src/exploreAPI"; 3 | 4 | describe("test APIFunction", () => { 5 | it("should be possible to construct an API function from a short access path directly", () => { 6 | const apiFunction = new APIFunction( 7 | "zip-a-folder", 8 | { 9 | type: "function", 10 | signature: "(srcFolder, zipFilePath)", 11 | isAsync: false, 12 | isConstructor: true, 13 | implementation: "", 14 | }, 15 | "zip-a-folder" 16 | ); 17 | expect(apiFunction.packageName).to.equal("zip-a-folder"); 18 | expect(apiFunction.accessPath).to.equal("zip-a-folder"); 19 | expect(apiFunction.functionName).to.equal("zip-a-folder"); 20 | 21 | const sig = "class zip-a-folder(srcFolder, zipFilePath)"; 22 | expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig)); 23 | expect(apiFunction.signature).to.equal(sig); 24 | }); 25 | 26 | it("should be possible to construct an API function from a typical access path directly", () => { 27 | const apiFunction = new APIFunction( 28 | "plural.addRule", 29 | { 30 | type: "function", 31 | signature: "(match, result)", 32 | isAsync: false, 33 | isConstructor: false, 34 | implementation: "", 35 | }, 36 | "plural" 37 | ); 38 | expect(apiFunction.packageName).to.equal("plural"); 39 | expect(apiFunction.accessPath).to.equal("plural.addRule"); 40 | expect(apiFunction.functionName).to.equal("addRule"); 41 | 42 | const sig = "plural.addRule(match, result)"; 43 | expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig)); 44 | expect(apiFunction.signature).to.equal(sig); 45 | }); 46 | 47 | it("should be possible to construct an APIFunction from a longer access path directly", () => { 48 | const apiFunction = new APIFunction( 49 | "zip-a-folder.ZipAFolder.tar", 50 | { 51 | type: "function", 52 | signature: "(srcFolder, tarFilePath, zipAFolderOptions)", 53 | isAsync: true, 54 | isConstructor: false, 55 | implementation: "", 56 | }, 57 | "zip-a-folder" 58 | ); 59 | expect(apiFunction.packageName).to.equal("zip-a-folder"); 60 | expect(apiFunction.accessPath).to.equal("zip-a-folder.ZipAFolder.tar"); 61 | expect(apiFunction.functionName).to.equal("tar"); 62 | 63 | const sig = 64 | "zip-a-folder.ZipAFolder.tar(srcFolder, tarFilePath, zipAFolderOptions) async"; 65 | expect(apiFunction).to.deep.equal(APIFunction.fromSignature(sig)); 66 | expect(apiFunction.signature).to.equal(sig); 67 | }); 68 | 69 | it("should handle invalid signatures correctly", () => { 70 | expect(() => APIFunction.fromSignature("")).to.throw(); 71 | expect(() => APIFunction.fromSignature("zip-a-folder")).to.throw(); 72 | expect(() => 73 | APIFunction.fromSignature("zip-a-folder(srcFolder, zipFilePath) asnyc") 74 | ).to.throw(); 75 | }); 76 | 77 | it("should handle package names containing a dot correctly", () => { 78 | const apiFunction = new APIFunction( 79 | "zip.a-folder.ZipAFolder.tar", 80 | { 81 | type: "function", 82 | signature: "(srcFolder, tarFilePath, zipAFolderOptions)", 83 | isAsync: true, 84 | isConstructor: false, 85 | implementation: "", 86 | }, 87 | "zip.a-folder" 88 | ); 89 | expect(apiFunction.packageName).to.equal("zip.a-folder"); 90 | expect(apiFunction.accessPath).to.equal("zip.a-folder.ZipAFolder.tar"); 91 | expect(apiFunction.functionName).to.equal("tar"); 92 | 93 | const sig = 94 | "zip.a-folder.ZipAFolder.tar(srcFolder, tarFilePath, zipAFolderOptions) async"; 95 | expect(apiFunction.signature).to.equal(sig); 96 | }); 97 | 98 | it("should be possible to serialize and deserialize API functions", () => { 99 | const apiFunction = APIFunction.fromSignature( 100 | "zip-a-folder(srcFolder, zipFilePath)" 101 | ); 102 | const serialized = JSON.stringify(apiFunction); 103 | const deserialized = APIFunction.fromJSON(JSON.parse(serialized)); 104 | expect(deserialized).to.deep.equal(apiFunction); 105 | }); 106 | }); 107 | -------------------------------------------------------------------------------- /benchmark/generate_diversity_report.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { ITestReport } from ".."; 4 | 5 | function formatNum(numerator: number, denominator: number) { 6 | if (denominator == 0) return "--"; 7 | return `${numerator} (${((numerator / denominator) * 100).toFixed(0)} %)`; 8 | } 9 | 10 | type CoverageStats = { 11 | [packageName: string]: { 12 | proj: string; 13 | numPassing: number; 14 | coverage: number; 15 | numCoveredStmts: number; 16 | stmtCovMap: Map; 17 | }; 18 | }; 19 | 20 | function parseReports(root: string) { 21 | const coverageStats: CoverageStats = {}; 22 | 23 | for (const proj of fs.readdirSync(root)) { 24 | const projDir = path.join(root, proj); 25 | if (!fs.lstatSync(projDir).isDirectory()) continue; 26 | 27 | const stmtCovMap = new Map(); // map from statement to list of tests covering that statement 28 | const reportData = JSON.parse( 29 | fs.readFileSync(path.join(projDir, "report.json"), "utf8") 30 | ) as ITestReport; 31 | const packageName = reportData.metaData.packageName; 32 | const numCoveredStmts = reportData.coverage?.total.statements?.covered ?? 0; 33 | const coverage = reportData.coverage?.total.statements?.pct ?? 0; 34 | const numPassing = reportData.stats?.nrPasses ?? 0; 35 | 36 | for (const test of reportData.tests) { 37 | for (const coveredStmt of test.coveredStatements ?? []) { 38 | if (!stmtCovMap.has(coveredStmt)) { 39 | stmtCovMap.set(coveredStmt, []); 40 | } 41 | stmtCovMap.get(coveredStmt).push(test.testName); 42 | } 43 | } 44 | 45 | coverageStats[packageName] = { 46 | proj, 47 | numPassing, 48 | coverage, 49 | numCoveredStmts, 50 | stmtCovMap, 51 | }; 52 | } 53 | return coverageStats; 54 | } 55 | 56 | function printTestDiversityReport(title: string, coverageStats: CoverageStats) { 57 | console.log(` 58 | # ${title} 59 | 60 | Project| # Passing Tests| Coverage | # Covered Stmts | Avg. num tests/stmt | # Uniquely Covered Stmts | # Uniquely Covering Tests 61 | --- | ---: | ---: | ---: | ---: | ---: | ---:`); 62 | 63 | for (const { 64 | proj, 65 | numPassing, 66 | coverage, 67 | numCoveredStmts, 68 | stmtCovMap, 69 | } of Object.values(coverageStats)) { 70 | const coveringTestPerStmt = Array.from(stmtCovMap.values()); 71 | const averageTestsPerStmt = ( 72 | coveringTestPerStmt 73 | .map((coveringTests) => coveringTests.length) 74 | .reduce((a, b) => a + b, 0) / coveringTestPerStmt.length 75 | ).toFixed(2); 76 | 77 | let numUniquelyCoveredStmts = 0; 78 | const uniquelyCoveringTests = new Set(); 79 | for (const coveringTests of stmtCovMap.values()) { 80 | if (coveringTests.length == 1) { 81 | numUniquelyCoveredStmts++; 82 | uniquelyCoveringTests.add(coveringTests[0]); 83 | } 84 | } 85 | const numUniquelyCoveringTests = formatNum( 86 | uniquelyCoveringTests.size, 87 | numPassing 88 | ); 89 | 90 | console.log( 91 | `${proj}| ${numPassing} | ${coverage}% | ${numCoveredStmts} | ${averageTestsPerStmt} | ${numUniquelyCoveredStmts} | ${numUniquelyCoveringTests}` 92 | ); 93 | } 94 | 95 | console.log(`Interpreting table: 96 | - First three columns are the same as the typical table we output 97 | - \# Covered stmts: the number of statements covered by the passing tests, from the report.json file 98 | - Avg num tests/stmt: for each covered statement, we find the tests that cover this statement and then calculate the average num of tests/stmt 99 | - \# Uniquely Covered Stmts: these are statements covered by only one test 100 | - \# Uniquely Covering Tests: number of tests that uniquely cover at least one statement (and percentage w.r.t number of passing tests; the higher the percentage the better although 100% is unlikely) 101 | `); 102 | } 103 | 104 | if (require.main === module) { 105 | if (process.argv.length != 3) { 106 | console.error("Usage: node generate_diversity_report.js "); 107 | process.exit(1); 108 | } 109 | const artifactDir = process.argv[2]; 110 | let coverageStats = parseReports(artifactDir); 111 | printTestDiversityReport( 112 | "Diversity of Tests w.r.t Stmt Coverage", 113 | coverageStats 114 | ); 115 | } 116 | -------------------------------------------------------------------------------- /.github/workflows/measure-coverage.yml: -------------------------------------------------------------------------------- 1 | name: Measure coverage of default test suite 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | packages: 7 | description: "Packages to generate tests for" 8 | default: "+benchmarks.txt" 9 | debug_enabled: 10 | type: boolean 11 | description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" 12 | default: false 13 | 14 | jobs: 15 | setup: 16 | runs-on: ubuntu-latest 17 | outputs: 18 | packages: "${{ steps.parse_packages.outputs.packages }}" 19 | steps: 20 | - uses: actions/checkout@v3 21 | 22 | - uses: actions/setup-node@v3 23 | with: 24 | node-version: 12 25 | 26 | - id: parse_packages 27 | run: | 28 | packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \ 29 | "${{ github.event.inputs.packages || '+benchmarks.txt' }}") 30 | echo "packages=$packages" >> $GITHUB_OUTPUT 31 | 32 | benchmark: 33 | needs: 34 | - setup 35 | runs-on: ubuntu-latest 36 | continue-on-error: true 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | package: ${{ fromJson(needs.setup.outputs.packages) }} 41 | steps: 42 | - name: Set up Node.js 43 | uses: actions/setup-node@v3 44 | with: 45 | node-version: 12 46 | 47 | - name: Checkout github package repo 48 | if: ${{ matrix.package.host == 'github.com' }} 49 | uses: actions/checkout@v3 50 | with: 51 | repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }} 52 | ref: ${{ matrix.package.sha }} 53 | path: "source" 54 | 55 | - name: Checkout gitlab package repo 56 | if: ${{ matrix.package.host == 'gitlab.com' }} 57 | run: | 58 | git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source 59 | cd source 60 | git checkout ${{ matrix.package.sha }} 61 | 62 | - name: Determine package name 63 | id: pkg-name 64 | run: | 65 | # name of the package 66 | TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name ) 67 | 68 | # some packages have a / in their names (looking at you, gitlab-js!) 69 | if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then 70 | TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/} 71 | fi 72 | 73 | # path to the package within the repo checkout 74 | TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}" 75 | # make sure there isn't already a directory with the same name 76 | if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then 77 | echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists" 78 | exit 1 79 | fi 80 | # rename checkout, since some packages examine its name (looking at you, bluebird!) 81 | mv source $TESTPILOT_PACKAGE_NAME 82 | echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH" 83 | # export environment variables 84 | echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV 85 | echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV 86 | echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT 87 | 88 | - name: Install package, its dependencies, and test packages 89 | run: | 90 | cd $TESTPILOT_PACKAGE_PATH 91 | npm i || npm i --legacy-peer-deps 92 | # if matrix.package.dependencies is not empty, install them 93 | if ! [ -z "${{ matrix.package.dependencies }}" ]; then 94 | npm i ${{ matrix.package.dependencies }} 95 | fi 96 | npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack' 97 | npm i --no-save mocha nyc 98 | 99 | - name: Measure coverage of default test suite 100 | run: | 101 | cd $TESTPILOT_PACKAGE_PATH 102 | ./node_modules/.bin/nyc npm test 103 | 104 | - name: Setup tmate session 105 | uses: mxschmitt/action-tmate@v3 106 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 107 | -------------------------------------------------------------------------------- /src/report.ts: -------------------------------------------------------------------------------- 1 | import { ICoverageSummary } from "./coverage"; 2 | import { Prompt } from "./promptCrafting"; 3 | 4 | export enum TestStatus { 5 | PASSED = "PASSED", 6 | FAILED = "FAILED", 7 | PENDING = "PENDING", 8 | OTHER = "OTHER", 9 | } 10 | 11 | export type TestOutcome = 12 | | { status: "PASSED"; coverageReport?: string; coverageData?: string } 13 | | { status: "PENDING" | "OTHER" } 14 | | { status: "FAILED"; err: ITestFailureInfo }; 15 | 16 | export namespace TestOutcome { 17 | export function PASSED( 18 | coverageReport?: string, 19 | coverageData?: string 20 | ): TestOutcome { 21 | return { status: "PASSED", coverageReport, coverageData }; 22 | } 23 | export const PENDING: TestOutcome = { status: "PENDING" }; 24 | export const OTHER: TestOutcome = { status: "OTHER" }; 25 | export function FAILED(err: ITestFailureInfo): TestOutcome { 26 | return { status: "FAILED", err }; 27 | } 28 | } 29 | 30 | export interface ITestFailureInfo { 31 | message: string; 32 | code?: string; 33 | stack?: string; 34 | } 35 | 36 | /** 37 | * Represents a test and all associated information 38 | */ 39 | export interface ITestInfo { 40 | /** The numeric ID of the test. */ 41 | id: number; 42 | /** The name of the test (constructed from the ID). */ 43 | testName: string; 44 | /** The outcome of the test. */ 45 | outcome: TestOutcome; 46 | /** The name of the file containing the test. */ 47 | testSource: string; 48 | /** The prompts that gave rise to this test. */ 49 | prompts: Prompt[]; 50 | /** The API method for which this test was generated. */ 51 | api: string; 52 | } 53 | 54 | /** 55 | * Represents the metadata associated with a generated test suite 56 | */ 57 | export interface IMetaData { 58 | /** The name of the package under test. */ 59 | packageName: string; 60 | /** Whether usage snippets were mined from documentation. */ 61 | useDocSnippets: boolean; 62 | /** Whether usage snippets were mined from code. */ 63 | useCodeSnippets: boolean; 64 | /** The maximum number of snippets to include in a prompt, or "all" if no limit was imposed. */ 65 | numSnippets: number | "all"; 66 | /** The maximum length of each snippet in lines. */ 67 | snippetLength: number; 68 | /** The number of completions to obtain for each prompt. */ 69 | numCompletions: number; 70 | } 71 | 72 | export type ReportForTest = { 73 | /** name of the test */ 74 | testName: string; 75 | /** API method for which the test was generated */ 76 | api: string; 77 | /** name of the file containing the test */ 78 | testFile: string; 79 | /** IDs of the prompts that gave rise to the test */ 80 | promptIds: number[]; 81 | /** status of the test */ 82 | status: TestStatus; 83 | /** error information if the test failed */ 84 | err: ITestFailureInfo | {}; 85 | /** statements covered by the test */ 86 | coveredStatements: string[]; 87 | /** duration of the test, if known */ 88 | duration: number | undefined; 89 | }; 90 | 91 | /** 92 | * Represents all test results, statistics, prompts, completions, and coverage information 93 | * associated with a generated test suite 94 | */ 95 | export interface ITestReport { 96 | metaData: IMetaData; 97 | /** total number of unique snippets available in the snippet map. */ 98 | nrUniqueSnippets: number; 99 | stats: { 100 | /** total number of tests */ 101 | nrTests: number; 102 | /** number of passing tests */ 103 | nrPasses: number; 104 | /** number of failing tests */ 105 | nrFailures: number; 106 | /** number of pending tests */ 107 | nrPending: number; 108 | /** number of other tests */ 109 | nrOther: number; 110 | /** time taken to explore package API */ 111 | apiExplorationTime: number; 112 | /** time taken to extract doc comments */ 113 | docCommentExtractionTime: number; 114 | /** time taken to extract snippets */ 115 | snippetExtractionTime: number; 116 | /** cumulative response time for all Codex queries */ 117 | codexQueryTime: number; 118 | /** end-to-end wall-clock time (in milliseconds) taken to generate the test suite */ 119 | totalTime: number; 120 | /** number of tests containing at least one non-trivial assertion */ 121 | nrNonTrivialTests?: number; 122 | /** number of passing tests containing at least one non-trivial assertion */ 123 | nrNonTrivialPasses?: number; 124 | }; 125 | tests: ReportForTest[]; 126 | coverage: ICoverageSummary; 127 | } 128 | -------------------------------------------------------------------------------- /benchmark/performanceMeasurer.ts: -------------------------------------------------------------------------------- 1 | import { performance, PerformanceObserver } from "perf_hooks"; 2 | import { CodexPostOptions } from ".."; 3 | 4 | export class PerformanceMeasurer { 5 | /** Time stamp when this measurer was instantiated. */ 6 | private readonly start: number; 7 | 8 | /** Time to explore package API, in milliseconds (includes time to extract doc comments). */ 9 | private apiExplorationTime: number | undefined = undefined; 10 | 11 | /** Time to extract doc comments, in milliseconds. */ 12 | private docCommentExtractionTime: number | undefined = undefined; 13 | 14 | /** Time to extract snippets, in milliseconds. */ 15 | private snippetExtractionTime: number | undefined = undefined; 16 | 17 | /** Runtimes for generated tests in milliseconds. */ 18 | private readonly testDurations: Map = new Map(); 19 | 20 | /** 21 | * Response times for requests to the Codex model together with the 22 | * corresponding request options. 23 | */ 24 | private readonly codexQueryTimes: [CodexPostOptions, number][] = []; 25 | 26 | /** An observer for performance measurements. */ 27 | private readonly observer = new PerformanceObserver((entries) => { 28 | for (const entry of entries.getEntries()) { 29 | if (entry.name.startsWith("duration:")) { 30 | // for each test `test_i.js`, we get a performance measurement `duration:test_i.js` 31 | const testName = entry.name.substring("duration:".length); 32 | if (this.testDurations.has(testName)) { 33 | console.warn(`Multiple durations for test ${testName}`); 34 | } 35 | this.testDurations.set(testName, entry.duration); 36 | } else if (entry.name.startsWith("codex-query:")) { 37 | // for each Codex query, we get a performance measurement `codex-query:` 38 | const options = JSON.parse(entry.name.substring("codex-query:".length)); 39 | // remove `logit_bias` property; it's an internal workaround 40 | delete options.logit_bias; 41 | this.codexQueryTimes.push([options, entry.duration]); 42 | } else if (entry.name === "snippet-extraction") { 43 | this.snippetExtractionTime = entry.duration; 44 | } else if (entry.name === "doc-comment-extraction") { 45 | if (this.docCommentExtractionTime === undefined) { 46 | this.docCommentExtractionTime = entry.duration; 47 | } else { 48 | this.docCommentExtractionTime += entry.duration; 49 | } 50 | } else if (entry.name === "api-exploration") { 51 | this.apiExplorationTime = entry.duration; 52 | } 53 | } 54 | }); 55 | 56 | constructor() { 57 | this.start = performance.now(); 58 | this.observer.observe({ entryTypes: ["measure"] }); 59 | } 60 | 61 | /** 62 | * Get the time (in milliseconds) taken to explore package API, not 63 | * including time to extract doc comments. 64 | */ 65 | getApiExplorationTime(): number | undefined { 66 | if (this.apiExplorationTime && this.docCommentExtractionTime) { 67 | return Math.max( 68 | 0, 69 | this.apiExplorationTime - this.docCommentExtractionTime 70 | ); 71 | } 72 | return this.apiExplorationTime; 73 | } 74 | 75 | /** Get the time (in milliseconds) taken to extract doc comments. */ 76 | getDocCommentExtractionTime(): number | undefined { 77 | return this.docCommentExtractionTime; 78 | } 79 | 80 | /** Get the time (in milliseconds) taken to extract snippets. */ 81 | getSnippetExtractionTime(): number | undefined { 82 | return this.snippetExtractionTime; 83 | } 84 | 85 | /** Get the time (in milliseconds) taken to run the given test. */ 86 | getTestDuration(testName: string): number | undefined { 87 | return this.testDurations.get(testName); 88 | } 89 | 90 | /** 91 | * Get a list of response times (in milliseconds) for Codex queries 92 | * together with the corresponding request parameters. 93 | */ 94 | getCodexQueryTimes(): [CodexPostOptions, number][] { 95 | return this.codexQueryTimes; 96 | } 97 | 98 | /** Get the cumulative response time (in milliseconds) for all Codex queries. */ 99 | getTotalCodexQueryTime(): number { 100 | return this.codexQueryTimes.reduce( 101 | (sum, [, duration]) => sum + duration, 102 | 0 103 | ); 104 | } 105 | 106 | /** Get the total elapsed time (in milliseconds) since this measurer was instantiated. */ 107 | getTotalTime(): number { 108 | return performance.now() - this.start; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/generateTests.ts: -------------------------------------------------------------------------------- 1 | import { ICompletionModel } from "./completionModel"; 2 | import { APIFunction } from "./exploreAPI"; 3 | import { 4 | IPromptRefiner, 5 | Prompt, 6 | RetryWithError, 7 | SnippetIncluder, 8 | DocCommentIncluder, 9 | FunctionBodyIncluder, 10 | defaultPromptOptions, 11 | } from "./promptCrafting"; 12 | import { ITestInfo, TestOutcome, TestStatus } from "./report"; 13 | import { SnippetMap } from "./snippetHelper"; 14 | import { ITestResultCollector } from "./testResultCollector"; 15 | import { TestValidator } from "./testValidator"; 16 | 17 | /** 18 | * Context class collecting various bits of information needed for test 19 | * generation. 20 | */ 21 | export class TestGenerator { 22 | private refiners: IPromptRefiner[] = [ 23 | new SnippetIncluder(), 24 | new RetryWithError(), 25 | new DocCommentIncluder(), 26 | new FunctionBodyIncluder(), 27 | ]; 28 | 29 | constructor( 30 | private temperatures: number[], 31 | private snippetMap: SnippetMap, 32 | private model: ICompletionModel, 33 | private validator: TestValidator, 34 | private collector: ITestResultCollector 35 | ) {} 36 | 37 | /** 38 | * Generate tests for a given function and validate them. 39 | */ 40 | async generateAndValidateTests(fun: APIFunction) { 41 | for (const temperature of this.temperatures) { 42 | let generatedPassingTests = false; 43 | const generatedPrompts = new Map(); 44 | const snippets = this.snippetMap(fun.functionName) ?? []; 45 | const worklist = [new Prompt(fun, snippets, defaultPromptOptions())]; 46 | while (worklist.length > 0) { 47 | const prompt = worklist.pop()!; 48 | 49 | // check whether we've generated this prompt before; if so, record that 50 | // fact by updating provenance info and skip it 51 | const assembledPrompt = prompt.assemble(); 52 | const previousPrompt = generatedPrompts.get(assembledPrompt); 53 | if (previousPrompt) { 54 | previousPrompt.withProvenance(...prompt.provenance); 55 | continue; 56 | } 57 | generatedPrompts.set(assembledPrompt, prompt); 58 | 59 | const completions = await this.model.completions( 60 | prompt.assemble(), 61 | temperature 62 | ); 63 | for (const completion of completions) { 64 | const testInfo = this.validateCompletion( 65 | prompt, 66 | completion, 67 | temperature 68 | ); 69 | if (testInfo.outcome.status === TestStatus.PASSED) { 70 | generatedPassingTests = true; 71 | } 72 | 73 | this.refinePrompts(prompt, completion, testInfo, worklist); 74 | } 75 | this.collector.recordPromptInfo(prompt, temperature, completions); 76 | } 77 | if (generatedPassingTests) break; 78 | } 79 | } 80 | 81 | /** 82 | * Build a test for the given prompt and completion, validate it, and return 83 | * a test info object. 84 | */ 85 | public validateCompletion( 86 | prompt: Prompt, 87 | completion: string, 88 | temperature: number 89 | ) { 90 | const testSource = prompt.completeTest(completion); 91 | 92 | const testInfo = this.collector.recordTestInfo( 93 | testSource ?? completion, 94 | prompt, 95 | prompt.fun.accessPath 96 | ); 97 | if (testInfo.prompts.length > 1) { 98 | // we have already validated this test 99 | return testInfo; 100 | } 101 | 102 | let outcome; 103 | if (completion === "") { 104 | outcome = TestOutcome.FAILED({ message: "Empty test" }); 105 | } else if (testSource) { 106 | outcome = this.validator.validateTest( 107 | testInfo.testName, 108 | testInfo.testSource 109 | ); 110 | } else { 111 | outcome = TestOutcome.FAILED({ message: "Invalid syntax" }); 112 | } 113 | this.collector.recordTestResult(testInfo, temperature, outcome); 114 | return testInfo; 115 | } 116 | 117 | /** 118 | * Refine the prompt based on the test outcome, and add the refined prompts 119 | * to the worklist. 120 | */ 121 | private refinePrompts( 122 | prompt: Prompt, 123 | completion: string, 124 | testInfo: ITestInfo, 125 | worklist: Prompt[] 126 | ) { 127 | for (const refiner of this.refiners) { 128 | for (const refinedPrompt of refiner.refine( 129 | prompt, 130 | completion, 131 | testInfo.outcome 132 | )) { 133 | const provenance = { 134 | originalPrompt: prompt, 135 | testId: testInfo.id, 136 | refiner: refiner.name, 137 | }; 138 | worklist.push(refinedPrompt.withProvenance(provenance)); 139 | } 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/snippetHelper.ts: -------------------------------------------------------------------------------- 1 | import levenshtein from "levenshtein"; 2 | 3 | type Partition = Set; 4 | 5 | export type SnippetMap = (functionName: string) => string[] | undefined; 6 | 7 | export class Snippets { 8 | /** The maximum number of snippets we can comfortably handle. */ 9 | MAX_SNIPPETS: number; 10 | 11 | /** A cache recording Levenshtein distance between pairs of strings. */ 12 | distanceCache: Map; 13 | 14 | constructor() { 15 | this.MAX_SNIPPETS = 50; 16 | this.distanceCache = new Map(); 17 | } 18 | 19 | /** 20 | * Create the partitions. Initially each snippet is in its own partition. 21 | * @param snippets The snippets to partition. 22 | * @returns The partitions. 23 | */ 24 | createPartitions(snippets: Set): Partition[] { 25 | return [...snippets].map((snippet) => new Set([snippet])); 26 | } 27 | 28 | /** 29 | * Compute the Levenshtein distance between two strings, utilizing a cache. 30 | */ 31 | computeDistance(a: string, b: string): number { 32 | // construct key for cache; this isn't injective, but it's good enough for our purposes 33 | const key = `${a}|||${b}`; 34 | if (this.distanceCache.has(key)) { 35 | return this.distanceCache.get(key)!; 36 | } else { 37 | const distance = new levenshtein(a, b).distance; 38 | this.distanceCache.set(key, distance); 39 | return distance; 40 | } 41 | } 42 | 43 | /** 44 | * Determine the lowest Levenshtein distance between elements of two partitions. 45 | * @param partition1 The first partition to compare. 46 | * @param partition2 The second partition to compare. 47 | * @returns The lowest Levenshtein distance between elements of the two partitions. 48 | */ 49 | comparePartitions(partition1: Partition, partition2: Partition): number { 50 | let lowestDistance = Number.MAX_VALUE; 51 | partition1.forEach((snippet1) => { 52 | partition2.forEach((snippet2) => { 53 | const distance = this.computeDistance(snippet1, snippet2); 54 | if (distance < lowestDistance) { 55 | lowestDistance = distance; 56 | } 57 | }); 58 | }); 59 | return lowestDistance; 60 | } 61 | 62 | /** 63 | * Merge the two partitions with the lowest Levenshtein distance between them. 64 | * @param partitions The partitions. 65 | * @returns The partitions after merging. 66 | */ 67 | mergeMostSimilarPartitions(partitions: Partition[]): Partition[] { 68 | let index1 = -1; 69 | let index2 = -1; 70 | let mostSimilarDistance = Number.MAX_VALUE; 71 | for (let i = 0; i < partitions.length; i++) { 72 | for (let j = i + 1; j < partitions.length; j++) { 73 | const distance = this.comparePartitions(partitions[i], partitions[j]); 74 | if (distance < mostSimilarDistance) { 75 | index1 = i; 76 | index2 = j; 77 | mostSimilarDistance = distance; 78 | } 79 | } 80 | } 81 | if (index1 !== -1 && index2 !== -1) { 82 | const mergedPartition = new Set([ 83 | ...partitions[index1], 84 | ...partitions[index2], 85 | ]); 86 | partitions.splice(Math.max(index1, index2), 1); // make sure to remove the element at the larger index first 87 | partitions.splice(Math.min(index1, index2), 1); 88 | partitions.push(mergedPartition); 89 | 90 | index1 = -1; 91 | index2 = -1; 92 | } else { 93 | throw new Error(); 94 | } 95 | return partitions; 96 | } 97 | 98 | /** 99 | * Select a set of representative snippets. This is done by grouping 100 | * the snippets into partitions so that the elements of each partition 101 | * are as similar as possible, and then selecting the smallest snippet 102 | * from each partition. 103 | * @param snippets The snippets to select representatives for. 104 | * @returns The selected snippets. 105 | */ 106 | selectSnippets(snippets: Set, n: number): Set { 107 | // create partitions: initially, each snippet is in its own partition 108 | let partitions = this.createPartitions(snippets); 109 | 110 | // while we have too many partitions, merge the most similar ones 111 | while (partitions.length > n) { 112 | partitions = this.mergeMostSimilarPartitions(partitions); 113 | } 114 | 115 | // find shortest snippet in each partition and add it to the selected snippets 116 | const selectedSnippets = new Set(); 117 | for (let i = 0; i < partitions.length; i++) { 118 | let shortestSnippet = ""; 119 | let shortestSnippetLength = Number.MAX_VALUE; 120 | partitions[i].forEach((snippet) => { 121 | if (snippet.length < shortestSnippetLength) { 122 | shortestSnippet = snippet; 123 | shortestSnippetLength = snippet.length; 124 | } 125 | }); 126 | selectedSnippets.add(shortestSnippet); 127 | } 128 | return selectedSnippets; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/codex.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import fs from "fs"; 3 | import { performance } from "perf_hooks"; 4 | import { ICompletionModel } from "./completionModel"; 5 | import { trimCompletion } from "./syntax"; 6 | 7 | const defaultPostOptions = { 8 | max_tokens: 100, // maximum number of tokens to return 9 | temperature: 0, // sampling temperature; higher values increase diversity 10 | n: 5, // number of completions to return 11 | top_p: 1, // no need to change this 12 | }; 13 | export type PostOptions = Partial; 14 | 15 | function getEnv(name: string): string { 16 | const value = process.env[name]; 17 | if (!value) { 18 | console.error(`Please set the ${name} environment variable.`); 19 | process.exit(1); 20 | } 21 | return value; 22 | } 23 | 24 | export class Codex implements ICompletionModel { 25 | private readonly apiEndpoint: string; 26 | private readonly authHeaders: string; 27 | 28 | constructor( 29 | private readonly isStarCoder: boolean, 30 | private readonly instanceOptions: PostOptions = {} 31 | ) { 32 | this.apiEndpoint = this.isStarCoder 33 | ? getEnv("STARCODER_API_ENDPOINT") 34 | : getEnv("TESTPILOT_LLM_API_ENDPOINT"); 35 | this.authHeaders = this.isStarCoder 36 | ? "{}" 37 | : getEnv("TESTPILOT_LLM_AUTH_HEADERS"); 38 | console.log( 39 | `Using ${this.isStarCoder ? "StarCoder" : "GPT"} API at ${ 40 | this.apiEndpoint 41 | }` 42 | ); 43 | } 44 | 45 | /** 46 | * Query Codex for completions with a given prompt. 47 | * 48 | * @param prompt The prompt to use for the completion. 49 | * @param requestPostOptions The options to use for the request. 50 | * @returns A promise that resolves to a set of completions. 51 | */ 52 | public async query( 53 | prompt: string, 54 | requestPostOptions: PostOptions = {} 55 | ): Promise> { 56 | const headers = { 57 | "Content-Type": "application/json", 58 | ...JSON.parse(this.authHeaders), 59 | }; 60 | const options = { 61 | ...defaultPostOptions, 62 | // options provided to constructor override default options 63 | ...this.instanceOptions, 64 | // options provided to this function override default and instance options 65 | ...requestPostOptions, 66 | }; 67 | 68 | performance.mark("codex-query-start"); 69 | 70 | const postOptions = this.isStarCoder 71 | ? { 72 | inputs: prompt, 73 | parameters: { 74 | max_new_tokens: options.max_tokens, 75 | temperature: options.temperature || 0.01, // StarCoder doesn't allow 0 76 | n: options.n, 77 | }, 78 | } 79 | : { 80 | prompt, 81 | ...options, 82 | }; 83 | 84 | const res = await axios.post(this.apiEndpoint, postOptions, { headers }); 85 | 86 | performance.measure( 87 | `codex-query:${JSON.stringify({ 88 | ...options, 89 | promptLength: prompt.length, 90 | })}`, 91 | "codex-query-start" 92 | ); 93 | if (res.status !== 200) { 94 | throw new Error( 95 | `Request failed with status ${res.status} and message ${res.statusText}` 96 | ); 97 | } 98 | if (!res.data) { 99 | throw new Error("Response data is empty"); 100 | } 101 | const json = res.data; 102 | if (json.error) { 103 | throw new Error(json.error); 104 | } 105 | let numContentFiltered = 0; 106 | const completions = new Set(); 107 | if (this.isStarCoder) { 108 | completions.add(json.generated_text); 109 | } else { 110 | for (const choice of json.choices || [{ text: "" }]) { 111 | if (choice.finish_reason === "content_filter") { 112 | numContentFiltered++; 113 | } 114 | completions.add(choice.text); 115 | } 116 | } 117 | if (numContentFiltered > 0) { 118 | console.warn( 119 | `${numContentFiltered} completions were truncated due to content filtering.` 120 | ); 121 | } 122 | return completions; 123 | } 124 | 125 | /** 126 | * Get completions from Codex and postprocess them as needed; print a warning if it did not produce any 127 | * 128 | * @param prompt the prompt to use 129 | */ 130 | public async completions( 131 | prompt: string, 132 | temperature: number 133 | ): Promise> { 134 | try { 135 | let result = new Set(); 136 | for (const completion of await this.query(prompt, { temperature })) { 137 | result.add(trimCompletion(completion)); 138 | } 139 | return result; 140 | } catch (err: any) { 141 | console.warn(`Failed to get completions: ${err.message}`); 142 | return new Set(); 143 | } 144 | } 145 | } 146 | 147 | if (require.main === module) { 148 | (async () => { 149 | const codex = new Codex(false); 150 | const prompt = fs.readFileSync(0, "utf8"); 151 | const responses = await codex.query(prompt, { n: 1 }); 152 | console.log([...responses][0]); 153 | })().catch((err) => { 154 | console.error(err); 155 | process.exit(1); 156 | }); 157 | } 158 | -------------------------------------------------------------------------------- /examples/momentjs_test_generation.md: -------------------------------------------------------------------------------- 1 | # Example: Test Generation for Moment.js Function 2 | 3 | This example demonstrates the process of generating tests for the **`moment().add`** function in Moment.js using a custom test generation framework. 4 | 5 | ## **Importing Dependencies** 6 | 7 | ```typescript 8 | import path from "path"; 9 | import { 10 | APIFunction, 11 | FunctionDescriptor, 12 | Codex, 13 | TestGenerator, 14 | MochaValidator, 15 | BaseTestResultCollector, 16 | } from "./"; 17 | ``` 18 | 19 | Imports necessary libraries and modules, including the test generation and validation classes. 20 | 21 | ## **Defining the Function Descriptor** 22 | 23 | ```typescript 24 | const functionDescriptor: FunctionDescriptor = { 25 | type: "function", 26 | signature: "(amount: number, unit: string)", 27 | isAsync: false, 28 | implementation: ` 29 | // Pseudo-implementation for moment().add 30 | `, 31 | isConstructor: false, 32 | docComment: 33 | "Adds the specified amount of time to the moment object. The unit can be years, months, weeks, days, hours, minutes, seconds, or milliseconds. This function modifies the original moment object and returns it for chaining.", 34 | }; 35 | ``` 36 | 37 | Describes the function being tested, including its signature and a brief documentation. 38 | 39 | ## **Initializing the Test Generator Components** 40 | 41 | ```typescript 42 | const apiFunction = new APIFunction("moment().add", functionDescriptor, "moment"); 43 | const model = new Codex(false, { 44 | n: 5, 45 | max_tokens: 150, 46 | temperature: 0.7, 47 | }); 48 | const momentPath = path.join(require.resolve("moment"), "../"); 49 | const validator = new MochaValidator("moment", momentPath); 50 | const collector = new BaseTestResultCollector(); 51 | const temperatures = [0.7]; 52 | const snippetMap = new Map([ 53 | [apiFunction.functionName, ["moment().add(10, 'days')", "moment().add(1, 'year').format('YYYY')"]], 54 | ]); 55 | const generator = new TestGenerator(temperatures, (fn) => snippetMap.get(fn), model, validator, collector); 56 | ``` 57 | 58 | Initializes the object that makes prompts to the Codex-based completion API, and sets up paths and validators for test generation. Used `https://api.openai.com/v1/engines/gpt-3.5-turbo-instruct/completions`. 59 | 60 | ## **Test Generation and Collection** 61 | 62 | ```tsx 63 | console.log("Generating test for moment().format()"); 64 | await generator.generateAndValidateTests(apiFunction); 65 | const testInfos = collector.getTestInfos(); 66 | console.log("Test generation complete. Test Details:"); 67 | testInfos.forEach((test) => { 68 | console.log(`Test ID: ${test.id}, Test Name: ${test.testName}, Outcome: ${test.outcome.status}`); 69 | }); 70 | 71 | ``` 72 | 73 | Generates tests and logs the results to the console. 74 | 75 | ## **Note on Test File Management** 76 | 77 | By default, the test files are temporarily stored in the **`node_modules//`** directory and are erased after testing. To change this behavior and save the test files, you can implement custom versions of the **`MochaValidator`** to a file saving version as shown below: 78 | 79 | ```typescript 80 | class CustomMochaValidator extends MochaValidator { 81 | constructor(packageName, packagePath, testDirectory) { 82 | super(packageName, packagePath); 83 | this.testDirectory = testDirectory; // Custom directory for saving test files 84 | // Ensure the directory exists 85 | if (!fs.existsSync(this.testDirectory)) { 86 | fs.mkdirSync(this.testDirectory, { recursive: true }); 87 | } 88 | } 89 | 90 | validateTest(testName, testSource) { 91 | let testFile = path.join(this.testDirectory, testName + '.js'); 92 | fs.writeFileSync(testFile, testSource); 93 | console.log(`Test saved to: ${testFile}`); // Log where the test is saved 94 | // Call original validateTest logic here if needed, or simulate a test outcome 95 | return { status: 'PASSED' }; // Simulate a passed test outcome 96 | } 97 | 98 | // Override the cleanup to prevent deletion 99 | cleanup() { 100 | console.log('Cleanup skipped, tests preserved.'); 101 | } 102 | } 103 | ``` 104 | 105 | > OBS: The `CustomMochaValidator` implementation above is just an idea. It was not tested, unlike the code before. 106 | 107 | ## Running the script 108 | 109 | The code shown in this example is at `/examples/testGenerationScript.ts`, but it will not run by default. To run the test generation script follow the below steps: 110 | 111 | 1. Copy `testGenerationScript.ts` to `src/`, making sure that the second import directory is `./` 112 | 113 | ```sh 114 | cp examples/testGenerationScript.ts src/ 115 | ``` 116 | 117 | 2. Install Moment.js with `npm` 118 | 119 | ```sh 120 | npm install moment 121 | ``` 122 | 123 | 3. Build the files again 124 | 125 | ```sh 126 | npm run build 127 | ``` 128 | 129 | 4. Finally, set the environment variables and run the script with `node`: 130 | 131 | ```sh 132 | export TESTPILOT_LLM_API_ENDPOINT='https://api.openai.com/v1/engines/gpt-3.5-turbo-instruct/completions' 133 | export TESTPILOT_LLM_AUTH_HEADERS='{"Authorization": "Bearer ", "OpenAI-Organization": ""}' 134 | node dist/testGenerationScript.js 135 | ``` 136 | -------------------------------------------------------------------------------- /test/test-generation.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import dedent from "dedent"; 3 | import fs from "fs"; 4 | import path from "path"; 5 | import { MochaValidator } from "../src/mochaValidator"; 6 | import { TestStatus } from "../src/report"; 7 | 8 | describe("MochaValidator", function () { 9 | this.timeout(10000); 10 | 11 | function check(tests: string[], expectedOutcomes: TestStatus[]) { 12 | const testDir = fs.mkdtempSync(path.join(".", "test-")); 13 | const validator = new MochaValidator("", testDir); 14 | try { 15 | for (let i = 0; i < tests.length; i++) { 16 | const testName = `test_${i}.js`; 17 | const valid = validator.validateTest(testName, tests[i]); 18 | expect(valid.status).to.equal(expectedOutcomes[i]); 19 | } 20 | } finally { 21 | fs.rmdirSync(testDir, { recursive: true }); 22 | validator.cleanup(); 23 | } 24 | } 25 | 26 | it("should run tests and report pass", () => { 27 | let tests = [ 28 | dedent`let mocha = require('mocha'); 29 | let assert = require('assert'); 30 | describe('test', () => { 31 | it('test', () => { 32 | assert([1, 2, 3].length === 3); 33 | }); 34 | });`, 35 | dedent`let mocha = require('mocha'); 36 | let expect = require('chai').expect; 37 | describe('test', () => { 38 | it('test', () => { 39 | expect([1, 2, 3, 4, 5].slice(1, 3)).to.eql([2, 3]); 40 | }); 41 | });`, 42 | ]; 43 | check(tests, [TestStatus.PASSED, TestStatus.PASSED]); 44 | }); 45 | 46 | it("should run tests and report fail", () => { 47 | let tests = [ 48 | dedent`let mocha = require('mocha'); 49 | let assert = require('assert'); 50 | describe('test', () => { 51 | it('test', () => { 52 | assert([1, 2, 3].length === 2); 53 | }); 54 | });`, 55 | dedent`let mocha = require('mocha'); 56 | let expect = require('chai').expect; 57 | describe('test', () => { 58 | it('test', () => { 59 | expect([1, 2, 3, 4, 5].slice(1, 3)).to.eql([3, 4]); 60 | }); 61 | });`, 62 | ]; 63 | check(tests, [TestStatus.FAILED, TestStatus.FAILED]); 64 | }); 65 | 66 | it("should correctly classify a test reported as both passing and failing by Mocha", () => { 67 | let test = dedent` 68 | const fs = require('fs'); 69 | describe('test fs', function() { 70 | it('test fs.ReadStream.prototype.push', function(done) { 71 | let rs = fs.createReadStream(__filename); 72 | rs.push("hello world"); 73 | rs.on("data", () => done()); 74 | }) 75 | }) 76 | `; 77 | check([test], [TestStatus.FAILED]); 78 | }); 79 | 80 | it.skip("should correctly classify another test reported as both passing and failing by Mocha", () => { 81 | let test = dedent` 82 | describe('test fs', function() { 83 | it('test fs.ReadStream', function(done) { 84 | new require('fs').ReadStream('/i/absolutely/do/not/exist'); 85 | done(); 86 | }) 87 | }) 88 | `; 89 | check([test], [TestStatus.FAILED]); 90 | }); 91 | 92 | it("should be robust against Mocha crashing and not producing a report", () => { 93 | let test = "describe('totally broken test', function() {)"; 94 | check([test], [TestStatus.FAILED]); 95 | }); 96 | 97 | it("should be robust against non-terminating tests (this test takes about five seconds)", () => { 98 | let test = dedent` 99 | let assert = require('assert'); 100 | let glob = require('glob'); 101 | describe('test glob', function() { 102 | it('test glob.Glob.prototype.setMaxListeners', function(done) { 103 | glob.Glob.prototype.setMaxListeners(2); 104 | let p = glob.Glob("./**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/**/*/index.js", {nodir: true}, (err, files) => { 105 | console.log("end"); 106 | console.log(p); 107 | }) 108 | }) 109 | }) 110 | `; 111 | check([test], [TestStatus.FAILED]); 112 | }).timeout(6000); 113 | 114 | it("should not classify a test as failing simply because it prints an error message to stderr", () => { 115 | let test = dedent` 116 | let assert = require('assert'); 117 | describe('test', function() { 118 | it('test', function(done) { 119 | console.error("Error: hello world"); 120 | assert(true); 121 | done(); 122 | }) 123 | }) 124 | `; 125 | check([test], [TestStatus.PASSED]); 126 | }); 127 | }); 128 | -------------------------------------------------------------------------------- /test/syntax.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import dedent from "dedent"; 3 | import { closeBrackets, commentOut, trimCompletion } from "../src/syntax"; 4 | 5 | describe("test closeBrackets", function () { 6 | it("should handle syntactically invalid code", function () { 7 | expect( 8 | closeBrackets(dedent` 9 | function f({) { 10 | return 1; 11 | } 12 | `) 13 | ).to.be.undefined; 14 | }); 15 | 16 | it("should handle code that closes more brackets than it opens", function () { 17 | expect( 18 | closeBrackets(dedent` 19 | function f() { 20 | return 1; 21 | }} 22 | `) 23 | ).to.be.undefined; 24 | }); 25 | 26 | it("should skip brackets in comments", function () { 27 | const complete = dedent` 28 | let mocha = require('mocha'); 29 | let assert = require('assert'); 30 | // testing { 31 | describe('test', () => { 32 | it('test', () => { // tests sth ( 33 | assert([1, 2, 3].length === 3); 34 | }); 35 | });`; 36 | let result = closeBrackets(complete); 37 | expect(result).to.not.be.undefined; 38 | expect(result!.source).to.equal(complete); 39 | }); 40 | 41 | let template = dedent` 42 | let mocha = require('mocha'); 43 | let assert = require('assert'); 44 | // testing ( 45 | describe('test', () => { 46 | it('test', () => { // tests sth { 47 | assert([1, 2, 3].length === 3);<1>})}<2>)<3>`; 48 | 49 | for (const i of [1, 2, 3]) { 50 | it(`should complete from <${i}>`, function () { 51 | let incomplete = template 52 | .slice(0, template.indexOf(`<${i}>`)) 53 | .replace(/<\d>/g, ""); 54 | let complete = template.replace(/<\d>/g, ""); 55 | let result = closeBrackets(incomplete); 56 | expect(result).to.not.be.undefined; 57 | expect(result!.source).to.equal(complete); 58 | }); 59 | } 60 | 61 | it("should handle square brackets", function () { 62 | expect( 63 | closeBrackets(dedent` 64 | let arr = [ 65 | [1, 2, 3], 66 | [4, 5, 6 67 | `)!.source 68 | ).to.equal(dedent` 69 | let arr = [ 70 | [1, 2, 3], 71 | [4, 5, 6]] 72 | `); 73 | }); 74 | }); 75 | 76 | describe("test trimCompletion", function () { 77 | it("should trim off incomplete lines", function () { 78 | expect( 79 | trimCompletion(dedent` 80 | assert([1, 2, 3].length === 3); 81 | assert( 82 | `) 83 | ).to.equal(dedent` 84 | assert([1, 2, 3].length === 3); 85 | `); 86 | }); 87 | 88 | it("should not trim off complete statements", function () { 89 | expect( 90 | trimCompletion(dedent` 91 | assert([1, 2, 3].length === 3); 92 | assert([1, 2].length === 2); 93 | `) 94 | ).to.equal(dedent` 95 | assert([1, 2, 3].length === 3); 96 | assert([1, 2].length === 2); 97 | `); 98 | }); 99 | 100 | it("should not trim off complete statements, even if followed by whitespace", function () { 101 | expect(trimCompletion("assert([1, 2, 3].length === 3); ")).to.equal( 102 | "assert([1, 2, 3].length === 3);" 103 | ); 104 | }); 105 | 106 | it("should not trim off complete blocks", function () { 107 | expect( 108 | trimCompletion(dedent` 109 | if (true) { 110 | assert([1, 2, 3].length === 3); 111 | } 112 | `) 113 | ).to.equal(dedent` 114 | if (true) { 115 | assert([1, 2, 3].length === 3); 116 | } 117 | `); 118 | }); 119 | 120 | it("should correctly trim incomplete statements if there is only a single line", function () { 121 | expect( 122 | trimCompletion(dedent` 123 | assert( 124 | `) 125 | ).to.equal(""); 126 | }); 127 | 128 | it("should trim completions that close more brackets than they open", function () { 129 | expect( 130 | trimCompletion(dedent` 131 | assert([1, 2, 3].length === 3); 132 | }); 133 | it('should do something else', function () { 134 | assert([1, 2].length === 2) 135 | `) 136 | ).to.equal(dedent` 137 | assert([1, 2, 3].length === 3); 138 | `); 139 | }); 140 | 141 | it("should trim completions that close more parentheses than they open", function () { 142 | expect( 143 | trimCompletion(dedent` 144 | assert([1, 2, 3].length === 3)); 145 | `) 146 | ).to.equal(dedent` 147 | assert([1, 2, 3].length === 3) 148 | `); 149 | }); 150 | }); 151 | 152 | describe("test commentOut", function () { 153 | it("should comment out a single line", function () { 154 | expect(commentOut("line\n")).to.equal("// line\n"); 155 | }); 156 | 157 | it("should comment out multiple lines", function () { 158 | expect(commentOut("line 1\nline 2\n")).to.equal("// line 1\n// line 2\n"); 159 | }); 160 | 161 | it("should add a final newline if it is missing", function () { 162 | expect(commentOut("line")).to.equal("// line\n"); 163 | }); 164 | 165 | it("should return the empty string if the input is empty", function () { 166 | expect(commentOut("")).to.equal(""); 167 | }); 168 | }); 169 | -------------------------------------------------------------------------------- /test/docSnippets.ts: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import dedent from "dedent"; 3 | import deepEqualInAnyOrder from "deep-equal-in-any-order"; 4 | import * as docSnippetMiner from "../src/docSnippets"; 5 | 6 | const chai = require("chai"); 7 | chai.use(deepEqualInAnyOrder); 8 | 9 | describe("unit test findFencedCodeBlocks", () => { 10 | it("should not detect snippets in non-covered language fencing", () => { 11 | const extractedSnippets = docSnippetMiner.findFencedCodeBlocks( 12 | `${__dirname}/input/coffee-fencing.md` 13 | ); 14 | expect(extractedSnippets.size).equal(0); 15 | }); 16 | 17 | it("should detect snippets in general fencing", () => { 18 | const expectedSnippets = [ 19 | "```\n" + 20 | "const vol = Volume.fromJSON({\n" + 21 | " '/app/index.js': '...',\n" + 22 | " '/app/package.json': '...',\n" + 23 | "});\n" + 24 | "```", 25 | ]; 26 | const extractedSnippets = docSnippetMiner.findFencedCodeBlocks( 27 | `${__dirname}/input/non-lang-fencing.md` 28 | ); 29 | expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets); 30 | }); 31 | 32 | it("should detect snippet in js fencing", () => { 33 | const expectedSnippets = [ 34 | dedent` 35 | \`\`\`js 36 | const vol = Volume.fromJSON({ 37 | "/app/index.js": "...", 38 | "/app/package.json": "...", 39 | }); 40 | \`\`\` 41 | `, 42 | ]; 43 | const extractedSnippets = docSnippetMiner.findFencedCodeBlocks( 44 | `${__dirname}/input/js-fencing-1.md` 45 | ); 46 | expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets); 47 | }); 48 | 49 | it("should detect snippet in ts fencing", () => { 50 | const expectedSnippets = [ 51 | dedent` 52 | \`\`\`ts 53 | const vol = Volume.fromJSON({ 54 | "/app/index.js": "...", 55 | "/app/package.json": "...", 56 | }); 57 | \`\`\` 58 | `, 59 | ]; 60 | const extractedSnippets = docSnippetMiner.findFencedCodeBlocks( 61 | `${__dirname}/input/ts-fencing-1.md` 62 | ); 63 | expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets); 64 | }); 65 | 66 | it.skip("should detect snippet with formatted fencing", () => { 67 | const expectedSnippets = [ 68 | "```js\nconcat = require('pull-stream/sinks/concat')\n```", 69 | "```js\nconcat(cb)\n```", 70 | ]; 71 | const extractedSnippets = docSnippetMiner.findFencedCodeBlocks( 72 | `${__dirname}/input/pull-stream-concat.md` 73 | ); 74 | expect(new Set(expectedSnippets)).to.deep.equal(extractedSnippets); 75 | }); 76 | }); 77 | 78 | describe("unit tests for callsAPIMethod", () => { 79 | it("should find method call in js fencing", () => { 80 | const inputSnippet = 81 | "```js\n" + 82 | "const vol = Volume.fromJSON({\n" + 83 | " '/app/index.js': '...',\n" + 84 | " '/app/package.json': '...',\n" + 85 | "});\n" + 86 | "```"; 87 | 88 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "fromJSON")).to.be.true; 89 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "Volume")).to.be.false; 90 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "app")).to.be.false; 91 | }); 92 | 93 | it("should not partially match method names", () => { 94 | const inputSnippet = 95 | "```js\n" + 96 | "vol.writeFileSync('/script.sh', 'sudo rm -rf *')\n" + 97 | 'vol.toJSON(); // {"/script.sh": "sudo rm -rf *"}\n' + 98 | "fromTest();\n" + 99 | "toFile = 5;\n"; 100 | ("```"); 101 | 102 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "toJSON")).to.be.true; 103 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "JSON")).to.be.false; 104 | 105 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "fromTest")).to.be.true; 106 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "from")).to.be.false; 107 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "Test")).to.be.false; 108 | 109 | expect(docSnippetMiner.callsAPIMethod(inputSnippet, "toFile")).to.be.false; 110 | }); 111 | }); 112 | 113 | describe("test snippet trimming to max length", () => { 114 | it("should not trim", () => { 115 | const inputSnippet = dedent` 116 | import { fs } from 'memfs'; 117 | 118 | fs.writeFileSync('/hello.txt', 'World!'); 119 | fs.readFileSync('/hello.txt', 'utf8'); // World! 120 | `; 121 | 122 | expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 4)).to.equal( 123 | inputSnippet 124 | ); 125 | expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 6)).to.equal( 126 | inputSnippet 127 | ); 128 | }); 129 | 130 | it("it should trim to maxLength", () => { 131 | const inputSnippet = dedent` 132 | import { fs, vol } from 'memfs'; 133 | 134 | const json = { 135 | './README.md': '1', 136 | './src/index.js': '2', 137 | './node_modules/debug/index.js': '3', 138 | }; 139 | vol.fromJSON(json, '/app'); 140 | 141 | fs.readFileSync('/app/README.md', 'utf8'); // 1 142 | vol.readFileSync('/app/src/index.js', 'utf8'); // 2 143 | `; 144 | 145 | const expectedSnippet = dedent` 146 | import { fs, vol } from 'memfs'; 147 | 148 | const json = { 149 | './README.md': '1', 150 | './src/index.js': '2', 151 | './node_modules/debug/index.js': '3', 152 | `; 153 | 154 | expect(docSnippetMiner.trimSnippetToMaxLength(inputSnippet, 6)).to.equal( 155 | expectedSnippet 156 | ); 157 | }); 158 | }); 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Note: This version of TestPilot has been archived. Please refer to the new version at [https://github.com/neu-se/testpilot2](https://github.com/neu-se/testpilot2). 2 | 3 | # TestPilot 4 | 5 | TestPilot is a tool for automatically generating unit tests for npm packages 6 | written in JavaScript/TypeScript using a large language model (LLM). 7 | 8 | Note that TestPilot represents an early exploration in the use of LLMs for 9 | test generation, and has been made available in open source as a basis for 10 | research and exploration. For day-to-day use the test generation features 11 | in [Copilot Chat](https://docs.github.com/en/copilot/github-copilot-chat/about-github-copilot-chat) 12 | are likely to yield better results. 13 | 14 | ## Background 15 | 16 | TestPilot generates tests for a given function `f` by prompting the LLM with a 17 | skeleton of a test for `f`, including information about `f` embedded in code 18 | comments, such as its signature, the body of `f`, and examples usages of `f` 19 | automatically mined from project documentation. The model's response is then 20 | parsed and translated into a runnable unit test. Optionally, the test is run and 21 | if it fails the model is prompted again with additional information about the 22 | failed test, giving it a chance to refine the test. 23 | 24 | Unlike other systems for LLM-based test generation, TestPilot does not require 25 | any additional training or reinforcement learning, and no examples of functions 26 | and their associated tests are needed. 27 | 28 | A research paper describing TestPilot in detail is available on 29 | [arXiv](https://arxiv.org/abs/2302.06527) and [IEEExplore](https://ieeexplore.ieee.org/document/10329992). 30 | 31 | ## Requirements 32 | 33 | In general, to be able to run TestPilot you need access to a Codex-style LLM 34 | with completion API. Set the `TESTPILOT_LLM_API_ENDPOINT` environment variable to 35 | the URL of the LLM API endpoint you want to use, and 36 | `TESTPILOT_LLM_AUTH_HEADERS` to a JSON object containing the headers you need to 37 | authenticate with the API. 38 | 39 | Typical values for these variables might be: 40 | 41 | - `TESTPILOT_LLM_API_ENDPOINT='https://api.openai.com/v1/engines/code-cushman-001/completions'` 42 | - `TESTPILOT_LLM_AUTH_HEADERS='{"Authorization": "Bearer ", "OpenAI-Organization": ""}'` 43 | 44 | Note, however, that you can run TestPilot in reproduction mode without access to 45 | the LLM API where model responses are taken from the output of a previous run; 46 | see below for details. 47 | 48 | ## Installation 49 | 50 | You can install TestPilot from a pre-built package or from source. 51 | 52 | ### Installing from a pre-built package 53 | 54 | TestPilot is a available as a pre-built npm package, though it is not currently 55 | published to the npm registry. You can download a tarball from the repository 56 | and install it in the usual way. Note that this distribution only contains the 57 | core part of TestPilot, not the benchmarking harness. 58 | 59 | ### Installing from source 60 | 61 | The `src/` directory contains the source code for TestPilot, which is written in 62 | TypeScript and gets compiled into the `dist/` directory. Tests are in `test/`; 63 | the `benchmark/` directory contains a benchmarking harness for running TestPilot 64 | on multiple npm packages; and `ql/` contains the CodeQL queries used to analyze 65 | the results. 66 | 67 | In the root directory of a checkout of this repository, run `npm build` to 68 | install dependencies and build the package. 69 | 70 | You can also use `npm run build:watch` to automatically build anytime you make 71 | changes to the code. Note, however, that this will not automatically install 72 | dependencies, and also will not build the benchmarking harness. 73 | 74 | Use `npm run test` to run the tests. For convenience, this will also install 75 | dependencies and run a build. 76 | 77 | ## Benchmarking 78 | 79 | If you install TestPilot from source, you can use the benchmarking harness to 80 | run TestPilot on multiple packages and analyze the results. This is not 81 | currently available if you install TestPilot from a pre-built package. 82 | 83 | ### Running locally 84 | 85 | Basic usage is as follows: 86 | 87 | ```sh 88 | node benchmark/run.js --outputDir --package 89 | ``` 90 | 91 | This generates tests for all functions exported by the package in 92 | ``, validates them, and writes the results to ``. 93 | 94 | Note that this assumes that package dependencies are installed and any build 95 | steps have been run (e.g., using `npm i` and `npm run build`). TestPilot also 96 | relies on `mocha`, so if the package under test does not already depend on it, 97 | you must install it separately, for example using the command `npm i --no-save 98 | mocha`. 99 | 100 | ### Running on Actions 101 | 102 | The `run-experiment.yml` workflow runs an experiment on GitHub Actions, 103 | producing the final report as an artifact you can download. The `results-all` 104 | artifact contains the results of all packages, while the other artifacts contain 105 | the individual results of each package. 106 | 107 | ### Reproducing results 108 | 109 | The results of TestPilot are non-deterministic, so even if you run it from the 110 | same package on the same machine multiple times, you will get different results. 111 | However, the benchmarking harness records enough data to be able to replay a 112 | benchmark run in many cases. 113 | 114 | To do this, use the `--api` and `--responses` options to reuse the API listings 115 | and responses from a previous run: 116 | 117 | ```sh 118 | node benchmark/run.js --outputDir --package --api --responses 119 | ``` 120 | 121 | Note that by default replay will fail if any of the prompts are not found in the 122 | responses file. This typically happens if TestPilot is refining failing tests, 123 | since in this case the prompt to the model depends on the exact failure message, 124 | which can be system-specific (e.g., containing local file-system paths), or 125 | depend on the Node.js version or other factors. 126 | 127 | To work around these limitations, you can pass the `--strictResponses false` 128 | flag handle treat missing prompts by treating them as getting no response from 129 | the model. This will not, in general, produce the same results as the initial 130 | run, but suffices in many cases. 131 | 132 | ### Analyzing results 133 | 134 | The CodeQL queries in `ql/queries` can be used to analyze the results of running 135 | an experiment. See `ql/CodeQL.md` for instructions on how to setup CodeQL and 136 | run the queries. 137 | 138 | ## License 139 | 140 | This project is licensed under the terms of the MIT open source license. Please refer to [MIT](./LICENSE.txt) for the full terms. 141 | 142 | ## Maintainers 143 | 144 | - Max Schaefer (@max-schaefer) 145 | - Frank Tip (@franktip) 146 | - Sarah Nadi (@snadi) 147 | 148 | ## Support 149 | 150 | TestPilot is a research prototype and is not officially supported. However, if 151 | you have questions or feedback, please file an issue and we will do our best to 152 | respond. 153 | 154 | ## Acknowledgement 155 | 156 | We thank Aryaz Eghbali (@aryaze) for his work on the initial version of 157 | TestPilot. 158 | -------------------------------------------------------------------------------- /src/mineSnippets.ts: -------------------------------------------------------------------------------- 1 | import * as cp from "child_process"; 2 | import * as fs from "fs"; 3 | import * as os from "os"; 4 | import * as path from "path"; 5 | import yargs from "yargs"; 6 | import { hideBin } from "yargs/helpers"; 7 | import AdmZip from "adm-zip"; 8 | import { Snippets } from "./snippetHelper"; 9 | 10 | const snippetHelper = new Snippets(); 11 | 12 | /** 13 | * Extract raw information about usage snippets for the given methods from the 14 | * given CodeQL database. 15 | * 16 | * @param database The path to the CodeQL database. 17 | * @param methods The methods to extract usage snippets for. 18 | * @returns A stream of result tuples `{id, method, file, line}`, where `id` is 19 | * the CodeQL ID of a call to `method`, and `file`:`line` belongs to 20 | * the intraprocedural slice of this call. 21 | */ 22 | export function* getSnippetData(database: string, methods: string[]) { 23 | // create temporary CSV file to store relevant method names in 24 | const csvFile = `${os.tmpdir()}/targetMethod.csv`; 25 | const escapedMethodNames = methods.map( 26 | (method) => `"${method.replace(/"/g, '""')}"` 27 | ); 28 | fs.writeFileSync(csvFile, escapedMethodNames.join("\n") + "\n"); 29 | 30 | // run mining query 31 | const bqrsFile = `${os.tmpdir()}/results.bqrs`; 32 | cp.execFileSync( 33 | "codeql", 34 | [ 35 | "query", 36 | "run", 37 | "-d", 38 | database, 39 | "-o", 40 | bqrsFile, 41 | "--external", 42 | `targetFunction=${csvFile}`, 43 | path.join(__dirname, "../../ql/queries/SnippetMining.ql"), 44 | ], 45 | { stdio: "inherit" } 46 | ); 47 | 48 | // decode results into CSV format 49 | const outputFile = `${os.tmpdir()}/results.csv`; 50 | cp.execFileSync( 51 | "codeql", 52 | [ 53 | "bqrs", 54 | "decode", 55 | "--format", 56 | "csv", 57 | "--no-titles", 58 | "--entities", 59 | "id", 60 | "--output", 61 | outputFile, 62 | bqrsFile, 63 | ], 64 | { stdio: "inherit" } 65 | ); 66 | 67 | const results = fs.readFileSync(outputFile, "utf8"); 68 | for (const data of results.split("\n")) { 69 | let [id, method, file, line] = data.split(","); 70 | if (!id) { 71 | continue; 72 | } 73 | yield { 74 | id: +id, 75 | method: method.slice(1, -1), 76 | file: file.slice(1, -1), 77 | line: +line, 78 | }; 79 | } 80 | } 81 | 82 | type SnippetMap = [string, Map][]; 83 | 84 | /** 85 | * Extract structured information about usage snippets for the given methods 86 | * from the given CodeQL database 87 | * 88 | * @param database The path to the CodeQL database. 89 | * @param methods The methods to extract usage snippets for. 90 | * @returns A sparse array indexed by CodeQL IDs. For each ID it records the 91 | * name of the called method as well as a map from file names to 92 | * relevant line numbers in that file. 93 | */ 94 | export function getSnippetsInfo( 95 | database: string, 96 | methods: string[] 97 | ): SnippetMap { 98 | const snippets: SnippetMap = []; 99 | 100 | for (const { id, method, file, line } of getSnippetData(database, methods)) { 101 | if (!snippets[id]) { 102 | snippets[id] = [method, new Map()]; 103 | } 104 | const fileMap = snippets[id][1]; 105 | if (!fileMap.has(file)) { 106 | fileMap.set(file, []); 107 | } 108 | const lineNumbers = fileMap.get(file)!; 109 | lineNumbers.push(line); 110 | } 111 | 112 | return snippets; 113 | } 114 | 115 | /** 116 | * Extract usage snippets for the given methods from the given CodeQL database. 117 | * 118 | * @param database The path to the CodeQL database. 119 | * @param numSnippets The number of snippets to extract. 120 | * @param methods The methods to extract usage snippets for. 121 | * @param maxLength The maximum number of lines to include in each snippet. 122 | * @returns A string array of usage snippets. 123 | */ 124 | 125 | export function getSnippets( 126 | database: string, 127 | numSnippets: number, 128 | methods: string[], 129 | maxLength: number 130 | ): Map { 131 | let results = new Map>(); 132 | 133 | // mine snippets 134 | const snippets = getSnippetsInfo(database, methods); 135 | 136 | // now output them 137 | const srcArchive = new AdmZip(path.join(database, "src.zip")); 138 | for (const i in snippets) { 139 | const [methodName, files] = snippets[i]; 140 | let currentSnippet = `for ${methodName}`; 141 | for (const [file, lineNumbers] of files.entries()) { 142 | const contents = srcArchive.readAsText(file.slice(1)); 143 | const lines = contents.split("\n"); 144 | 145 | // pull out relevant lines from the file and record 146 | // minimum indentation level 147 | let relevantLineNumbers = lineNumbers.sort((a, b) => a - b); 148 | if (maxLength !== -1) { 149 | relevantLineNumbers = relevantLineNumbers.slice(-maxLength); 150 | } 151 | const relevantLines = []; 152 | let minIndent = -1; 153 | for (const lineNumber of relevantLineNumbers) { 154 | const line = lines[lineNumber - 1] || ""; 155 | const indent = line.search(/\S/); 156 | if (minIndent === -1 || indent < minIndent) { 157 | minIndent = indent; 158 | } 159 | relevantLines.push(line); 160 | } 161 | if (minIndent === -1) { 162 | minIndent = 0; 163 | } 164 | 165 | // output relevant lines, outdenting them by the minimum indentation 166 | for (const line of relevantLines) { 167 | currentSnippet += `\n ${line}`; 168 | } 169 | } 170 | if (results.has(methodName)) { 171 | results.get(methodName)!.add(currentSnippet); 172 | } else { 173 | results.set(methodName, new Set([currentSnippet])); 174 | } 175 | } 176 | 177 | // select snippets that are dissimilar 178 | let finalSnippets = new Map(); 179 | for (let [method, snippets] of results) { 180 | // if we have too many snippets, throw some away (snippet selection doesn't scale beyond ~50 snippets) 181 | if (snippets.size > snippetHelper.MAX_SNIPPETS) { 182 | snippets = new Set([...snippets].slice(0, snippetHelper.MAX_SNIPPETS)); 183 | } 184 | let selectedSnippets = snippetHelper.selectSnippets(snippets, numSnippets); 185 | finalSnippets.set(method, Array.from(selectedSnippets)); 186 | snippetHelper.distanceCache.clear(); 187 | } 188 | return finalSnippets; 189 | } 190 | 191 | if (require.main === module) { 192 | (async () => { 193 | const parser = yargs(hideBin(process.argv)) 194 | .usage("$0 [-n ] [-l ] ") 195 | .example( 196 | "$0 ~/databases/memfs toJSON", 197 | "extract three usage snippets for method toJSON from the memfs database" 198 | ) 199 | .option("n", { 200 | describe: "number of snippets to generate", 201 | default: 3, 202 | type: "number", 203 | }) 204 | .option("l", { 205 | alias: "length", 206 | describe: "maximum length of each snippet in lines; -1 means no limit", 207 | default: -1, 208 | type: "number", 209 | }) 210 | .demand(2); 211 | const argv = await parser.argv; 212 | const database = argv._[0] as string; 213 | const methods = argv._.slice(1) as string[]; 214 | const numSnippets = argv.n; 215 | const maxLength = argv.l; 216 | const allSnippets = getSnippets(database, numSnippets, methods, maxLength); 217 | for (const [method, snippets] of allSnippets) { 218 | console.log(`${method}:`); 219 | console.log(snippets.join("\n")); 220 | } 221 | })().catch((err) => { 222 | console.error(err); 223 | process.exit(1); 224 | }); 225 | } 226 | -------------------------------------------------------------------------------- /benchmark/editDistance.ts: -------------------------------------------------------------------------------- 1 | import * as fs from "fs"; 2 | import levenshtein from "levenshtein"; 3 | import fg from "fast-glob"; 4 | import yargs from "yargs"; 5 | import { hideBin } from "yargs/helpers"; 6 | 7 | export interface Test { 8 | fileName: string; // in what file was the test found 9 | index: number; // the index of the test in the file 10 | contents: string; // the contents of the test 11 | } 12 | 13 | export interface SimilarityReportEntry { 14 | generatedTestName: string; 15 | generatedTestCode: string; 16 | mostSimilarTest: Test; 17 | similarity: number; 18 | } 19 | 20 | export interface SimilarityReport { 21 | numGeneratedTests: number; 22 | numExistingTests: number; 23 | similarities: SimilarityReportEntry[]; 24 | maxSimilarity: number; 25 | } 26 | 27 | const testLoc: { [key: string]: string } = { 28 | glob: "test", 29 | "fs-extra": "lib/**/__tests__", 30 | "graceful-fs": "test", 31 | jsonfile: "test", 32 | bluebird: "test", 33 | q: "spec", 34 | rsvp: "test", 35 | memfs: "src/__tests__", 36 | "node-dir": "test", 37 | "zip-a-folder": "test", 38 | "js-sdsl": "test", 39 | "quill-delta": "test", 40 | "complex.js": "tests", 41 | "pull-stream": "test", 42 | "countries-and-timezones": "test", 43 | "simple-statistics": "test", 44 | plural: "test.js", 45 | dirty: "test", 46 | "geo-point": "src/geo-point.spec.ts", 47 | uneval: "test.js", 48 | omnitool: "test", 49 | core: "test", 50 | "image-downloader": "test", 51 | "crawler-url-parser": "test", 52 | "gitlab-js": "test", 53 | }; 54 | 55 | /** 56 | * Parse a file and return all tests in it 57 | * @param fileName the name of the file 58 | * @param contents the contents of the file 59 | * @returns the set of tests in the file 60 | **/ 61 | export function parseTests(fileName: string, contents: string): Set { 62 | const tests = new Set(); 63 | 64 | const callToIt = /\b(it|test)\s*\(\s*['`"].*['`"],/g; // pattern specifying where a tests starts, including its it description 65 | 66 | // find all index positions where this regexp matches and then figure out where it ends by counting parentheses and curly braces 67 | let match; 68 | while ((match = callToIt.exec(contents))) { 69 | const index = match.index; 70 | 71 | // find index of open curly brace defining test body, ignoring any open curly braces in the test description 72 | const indexToStartSearch = index + match[0].length; 73 | const openCurlyBraceIndex = contents.indexOf("{", indexToStartSearch); 74 | 75 | if (openCurlyBraceIndex === -1) { 76 | console.warn( 77 | "WARNING: No open curly brace found for test starting at index " + 78 | index + 79 | " in file " + 80 | fileName + 81 | ". Skipping test." 82 | ); 83 | continue; 84 | } 85 | 86 | // find index of matching closing curly brace 87 | let openCurlyBraces = 1; 88 | let closeCurlyBraceIndex = openCurlyBraceIndex; 89 | for (let i = openCurlyBraceIndex + 1; i < contents.length; i++) { 90 | if (contents[i] === "{") { 91 | openCurlyBraces++; 92 | } else if (contents[i] === "}") { 93 | openCurlyBraces--; 94 | if (openCurlyBraces === 0) { 95 | closeCurlyBraceIndex = i; 96 | break; 97 | } 98 | } 99 | } 100 | // find index of matching closing parenthesis 101 | for (let i = closeCurlyBraceIndex + 1; i < contents.length; i++) { 102 | if (contents[i] === ")") { 103 | closeCurlyBraceIndex = i; 104 | break; 105 | } 106 | } 107 | 108 | const testCode = contents.substring(index, closeCurlyBraceIndex + 1); 109 | 110 | tests.add({ fileName: fileName, index: tests.size, contents: testCode }); 111 | } 112 | 113 | return tests; 114 | } 115 | 116 | /** 117 | * find all tests in a directory and its subdirectories 118 | * @param patterns the files and directories to search, specified as a glob pattern 119 | * @returns an array of test names 120 | */ 121 | export function findTests( 122 | pkgName: string, 123 | testDir: string, 124 | isGenerated: boolean = false 125 | ): Set { 126 | var testFilePatterns = "tests/*.js"; 127 | 128 | if (!isGenerated) { 129 | testFilePatterns = testLoc[pkgName]; 130 | 131 | testFilePatterns = 132 | testFilePatterns.endsWith(".ts") || testFilePatterns.endsWith(".js") 133 | ? testFilePatterns 134 | : testFilePatterns + "/**/*.(js|ts)"; 135 | } 136 | 137 | const tests = new Set(); 138 | const testFiles = fg.sync(`${testDir}/${testFilePatterns}`, { dot: true }); 139 | 140 | testFiles.forEach((f) => { 141 | const contents = fs.readFileSync(`${f}`, "utf8"); 142 | const fileTests = parseTests(f, contents); 143 | fileTests.forEach((t) => tests.add(t)); 144 | }); 145 | return tests; 146 | } 147 | 148 | /** 149 | * Generate a report on the similarity of tests in two directories 150 | * @param existingTestsPatterns glob pattern of paths of existing tests 151 | * @param generatedTestsPatterns glob pattern of paths of generated tests 152 | */ 153 | export function generateReport( 154 | pkgName: string, 155 | existingTestsDir: string, 156 | generatedTestsDir: string 157 | ): SimilarityReport { 158 | const existingTests = findTests(pkgName, existingTestsDir); 159 | const generatedTests = findTests(pkgName, generatedTestsDir, true); 160 | 161 | console.log( 162 | `Found ${existingTests.size} existing tests and ${generatedTests.size} generated tests.` 163 | ); 164 | 165 | const report = {} as SimilarityReport; 166 | report.numExistingTests = existingTests.size; 167 | report.numGeneratedTests = generatedTests.size; 168 | report.similarities = []; 169 | 170 | var overallMaxSimilarity = 0; 171 | 172 | // for each test in the generated tests, find the maximum similarity to an existing test 173 | generatedTests.forEach((generatedTest) => { 174 | let maxSimilarity = 0; 175 | let mostSimilarTest = { 176 | fileName: "NOT_FOUND", 177 | index: -1, 178 | contents: "NOT_FOUND", 179 | }; 180 | existingTests.forEach((existingTest) => { 181 | const similarity = 182 | 1 - 183 | new levenshtein(generatedTest.contents, existingTest.contents) 184 | .distance / 185 | Math.max(generatedTest.contents.length, existingTest.contents.length); 186 | if (similarity > maxSimilarity) { 187 | maxSimilarity = similarity; 188 | mostSimilarTest = existingTest; 189 | } 190 | }); 191 | //console.log(`generated test ${generatedTest.fileName} has maximal similarity ${maxSimilarity} to existing test#${mostSimilarTest.index} in ${mostSimilarTest.fileName}`); 192 | report.similarities.push({ 193 | generatedTestName: generatedTest.fileName, 194 | generatedTestCode: generatedTest.contents, 195 | mostSimilarTest: mostSimilarTest, 196 | similarity: maxSimilarity, 197 | }); 198 | 199 | if (maxSimilarity > overallMaxSimilarity) { 200 | overallMaxSimilarity = maxSimilarity; 201 | } 202 | }); 203 | 204 | report.maxSimilarity = overallMaxSimilarity; 205 | return report; 206 | } 207 | 208 | if (require.main === module) { 209 | (async () => { 210 | // example usage: node benchmark/editDistance.js --pkgName countries-and-timezones --generatedTestsDir 'results/countries-and-timezones/tests' --existingTestsDir 'bencmarks/countries-and-timezones' 211 | const parser = yargs(hideBin(process.argv)) 212 | .strict() 213 | .options({ 214 | generatedTestsDir: { 215 | type: "string", 216 | demandOption: true, 217 | description: "directory where the generated tests are", 218 | }, 219 | existingTestsDir: { 220 | type: "string", 221 | demandOption: true, 222 | description: "directory where the existing tests are", 223 | }, 224 | pkgName: { 225 | type: "string", 226 | demandOption: true, 227 | description: "name of the package", 228 | }, 229 | }); 230 | 231 | const argv = await parser.argv; 232 | 233 | const report = generateReport( 234 | argv.pkgName, 235 | argv.existingTestsDir, 236 | argv.generatedTestsDir 237 | ); 238 | 239 | const json = JSON.stringify(report, null, 2); 240 | fs.writeFileSync("similarityReport.json", json, "utf8"); 241 | })().catch((e) => { 242 | console.error(e); 243 | process.exit(1); 244 | }); 245 | } 246 | -------------------------------------------------------------------------------- /src/mochaValidator.ts: -------------------------------------------------------------------------------- 1 | import path from "path"; 2 | import fs from "fs"; 3 | import os from "os"; 4 | import child_process from "child_process"; 5 | import { spawnSync } from "child_process"; 6 | import { TestValidator } from "./testValidator"; 7 | import { ITestFailureInfo, TestOutcome } from "./report"; 8 | import { ICoverageSummary, emptyCoverageSummary } from "./coverage"; 9 | import { performance } from "perf_hooks"; 10 | 11 | /** 12 | * A bare-bones type definition for a Mocha test result, only modelling the 13 | * fields we need. 14 | */ 15 | interface IMochaTestResult { 16 | err: { 17 | message?: string; 18 | }; 19 | } 20 | 21 | /** 22 | * A bare-bones type definition for a Mocha test report, only modelling the 23 | * fields we need. 24 | */ 25 | interface IMochaReport { 26 | passes: IMochaTestResult[]; 27 | failures: IMochaTestResult[]; 28 | pending: IMochaTestResult[]; 29 | } 30 | 31 | export class MochaValidator extends TestValidator { 32 | private readonly testDir: string; 33 | private readonly coverageDirs: string[] = []; 34 | 35 | constructor(private packageName: string, private packagePath: string) { 36 | super(); 37 | this.testDir = fs.mkdtempSync(path.join(packagePath, "test-")); 38 | } 39 | 40 | private scrubTestDirFromError(error: ITestFailureInfo): ITestFailureInfo { 41 | if (!error || typeof error !== "object") { 42 | console.warn(`Unexpected error type: ${typeof error}`); 43 | return error; 44 | } else if (typeof error.message !== "string") { 45 | console.warn(`Unexpected error.message type: ${typeof error.message}`); 46 | return error; 47 | } 48 | error.message = error.message.replace( 49 | new RegExp(this.testDir, "g"), 50 | "/path/to/test" 51 | ); 52 | return error; 53 | } 54 | 55 | public validateTest(testName: string, testSource: string): TestOutcome { 56 | const requirePattern = new RegExp( 57 | `require\\('${this.packageName}'\\)`, 58 | "g" 59 | ); 60 | let testFile = path.join(this.testDir, testName); 61 | if (fs.existsSync(testFile)) { 62 | throw new Error(`Test file ${testFile} already exists`); 63 | } 64 | fs.writeFileSync( 65 | testFile, 66 | testSource.replace(requirePattern, `require('..')`) 67 | ); 68 | 69 | const packagePath = path.resolve(this.testDir, ".."); 70 | 71 | // temporary directory to store output from mocha and nyc 72 | const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mocha-validator")); 73 | // directory to store nyc profile and coverage data 74 | const coverageDir = path.join(tmpDir, "coverage"); 75 | // coverage report, produced by nyc 76 | const coverageReport = path.join(coverageDir, "coverage-final.json"); 77 | // test report, produced by mocha 78 | const reportFile = path.join(tmpDir, "report.json"); 79 | 80 | performance.mark(`start:${testName}`); 81 | const res = spawnSync( 82 | path.join(__dirname, "..", "node_modules", ".bin", "nyc"), 83 | [ 84 | `--cwd=${packagePath}`, 85 | `--exclude=${path.basename(this.testDir)}`, 86 | "--reporter=json", 87 | `--report-dir=${coverageDir}`, 88 | `--temp-dir=${coverageDir}`, 89 | path.join(__dirname, "..", "node_modules", ".bin", "mocha"), 90 | "--full-trace", 91 | "--exit", 92 | "--allow-uncaught=false", 93 | "--reporter=json", 94 | "--reporter-option", 95 | `output=${reportFile}`, 96 | "--", 97 | testFile, 98 | ], 99 | { 100 | timeout: 5000, 101 | killSignal: "SIGKILL", 102 | } 103 | ); 104 | performance.measure(`duration:${testName}`, `start:${testName}`); 105 | const stderr = res.stderr.toString(); 106 | const report = MochaValidator.tryParseReport(reportFile); 107 | 108 | // parse test results; this is a bit complicated since Mocha sometimes reports asynchroneous tests 109 | // as both passed and failed; we want to make sure to count them as failed 110 | let outcome: TestOutcome = TestOutcome.OTHER; 111 | if ( 112 | res.status != 0 || 113 | stderr.includes("AssertionError") || 114 | !report || 115 | report.failures.length > 0 116 | ) { 117 | // we need to construct a ITestFailureInfo object 118 | // first, try to get it from the report 119 | if ( 120 | report && 121 | report.failures.length > 0 && 122 | report.failures[0].err.message 123 | ) { 124 | outcome = TestOutcome.FAILED( 125 | this.scrubTestDirFromError(report.failures[0].err as ITestFailureInfo) 126 | ); 127 | } else { 128 | // if that fails, try to get it from stderr 129 | const match = stderr.match(/(AssertionError: .*)/); 130 | if (match) { 131 | outcome = TestOutcome.FAILED( 132 | this.scrubTestDirFromError({ message: match[1] }) 133 | ); 134 | } else { 135 | // if that fails, just use the whole stderr or (if that's empty) the exit code 136 | outcome = TestOutcome.FAILED( 137 | this.scrubTestDirFromError({ 138 | message: stderr ?? `Mocha exited with code ${res.status}`, 139 | }) 140 | ); 141 | } 142 | } 143 | } else { 144 | // further sanity check: there should be exactly one result (either passed or pending) 145 | const numResults = report.passes.length + report.pending.length; 146 | if (numResults != 1) { 147 | throw new Error(`Expected 1 test result, got ${numResults}`); 148 | } 149 | 150 | if (report.passes.length > 0) { 151 | outcome = TestOutcome.PASSED(coverageReport, coverageDir); 152 | this.coverageDirs.push(coverageDir); 153 | } else { 154 | outcome = TestOutcome.PENDING; 155 | } 156 | } 157 | 158 | // no need to keep coverage data for invalid tests 159 | if (outcome.status != "PASSED") { 160 | fs.rmdirSync(coverageDir, { recursive: true }); 161 | } 162 | return outcome; 163 | } 164 | 165 | private static tryParseReport(reportFile: string): IMochaReport | undefined { 166 | try { 167 | return JSON.parse(fs.readFileSync(reportFile, "utf8")); 168 | } catch (e: any) { 169 | console.warn(`Error parsing coverage report: ${e}`); 170 | return undefined; 171 | } 172 | } 173 | 174 | public computeCoverageSummary(): ICoverageSummary { 175 | if (this.coverageDirs.length == 0) { 176 | return emptyCoverageSummary(); 177 | } 178 | 179 | const testDir = fs.mkdtempSync(path.join(this.packagePath, "test-")); 180 | try { 181 | // create/clean .nyc_output directory 182 | const nycOutput = path.join(this.packagePath, ".nyc_output"); 183 | if (fs.existsSync(nycOutput)) { 184 | fs.rmdirSync(nycOutput, { recursive: true }); 185 | } 186 | fs.mkdirSync(nycOutput); 187 | 188 | // copy all .json files from coverageDirs to nycOutput 189 | for (const coverageDir of this.coverageDirs) { 190 | MochaValidator.copyCoverageData(coverageDir, nycOutput); 191 | } 192 | 193 | // create nyc report 194 | child_process.spawnSync( 195 | path.join(__dirname, "..", "node_modules", ".bin", "nyc"), 196 | [ 197 | `--report-dir=${path.join(testDir, "coverage")}`, 198 | "--reporter=json-summary", 199 | "report", 200 | ], 201 | { 202 | cwd: this.packagePath, 203 | stdio: "inherit", 204 | } 205 | ); 206 | 207 | const coverageSummaryFileName = path.join( 208 | testDir, 209 | "coverage", 210 | "coverage-summary.json" 211 | ); 212 | if (fs.existsSync(coverageSummaryFileName)) { 213 | return JSON.parse(fs.readFileSync(coverageSummaryFileName, "utf8")); 214 | } else { 215 | throw new Error( 216 | `Failed to generate coverage summary: ${coverageSummaryFileName} does not exist.` 217 | ); 218 | } 219 | } finally { 220 | fs.rmdirSync(testDir, { recursive: true }); 221 | } 222 | } 223 | 224 | /** 225 | * Copy all .json files from `src` to `dest` (which must exist). 226 | */ 227 | public static copyCoverageData(src: string, dest: string) { 228 | for (const file of fs.readdirSync(src)) { 229 | if (file.endsWith(".json") && file !== "coverage-final.json") { 230 | fs.copyFileSync(path.join(src, file), path.join(dest, file)); 231 | } 232 | } 233 | } 234 | 235 | public cleanup(): void { 236 | for (const coverageDir of this.coverageDirs) { 237 | fs.rmdirSync(coverageDir, { recursive: true }); 238 | } 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /benchmark/generate_report.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import { 3 | CoverageStats, 4 | FailureStats, 5 | parseReports, 6 | RefinerStats, 7 | SimilarityStats, 8 | } from "./parse_reports"; 9 | 10 | function percentage(p: number | string) { 11 | if (typeof p === "number") { 12 | return `${p.toFixed(2)}%`; 13 | } else { 14 | return p; 15 | } 16 | } 17 | 18 | type DiffCoverageStats = { 19 | [packageName: string]: { [key: keyof CoverageStats]: string | number }; 20 | }; 21 | 22 | function printCoverageReport( 23 | title: string, 24 | stats: CoverageStats | DiffCoverageStats 25 | ) { 26 | console.log(` 27 | # ${title} 28 | Project | # Snippets Available | # Tests | # Passing Tests | Statement coverage | # Non-trivial tests | # Non-trivial passing tests | Statement coverage by non-trivial tests 29 | --- | --: | --: | --: | --: | --: | --: | --:`); 30 | for (const { 31 | proj, 32 | nrUniqueSnippets, 33 | numTests, 34 | numPassing, 35 | coverage, 36 | nonTrivialTests, 37 | nonTrivialPassing, 38 | nonTrivialCoverage, 39 | } of Object.values(stats)) { 40 | console.log( 41 | `${proj} | ${nrUniqueSnippets} | ${numTests} | ${numPassing} | ${percentage( 42 | coverage 43 | )} | ${nonTrivialTests} | ${nonTrivialPassing} | ${percentage( 44 | nonTrivialCoverage 45 | )}` 46 | ); 47 | } 48 | } 49 | 50 | function printFailureReport( 51 | title: string, 52 | stats: FailureStats, 53 | showPercentages = true 54 | ) { 55 | console.log(` 56 | # ${title} 57 | Project | # FailedTests | # AssertionErrors | # FileSysErrors | # CorrectnessErrors | # Timeout | # Other 58 | --- | --: | --: | --: | --: | --: | --:|`); 59 | for (const { 60 | proj, 61 | numFailing, 62 | numAssertionErrors, 63 | numFileSysErrors, 64 | numCorrectnessErrors, 65 | numTimeoutErrors, 66 | numOther, 67 | } of Object.values(stats)) { 68 | console.log( 69 | `${proj} | ${numFailing} | ${formatNum( 70 | numAssertionErrors, 71 | numFailing, 72 | showPercentages 73 | )} | ${formatNum( 74 | numFileSysErrors, 75 | numFailing, 76 | showPercentages 77 | )} | ${formatNum( 78 | numCorrectnessErrors, 79 | numFailing, 80 | showPercentages 81 | )} | ${formatNum( 82 | numTimeoutErrors, 83 | numFailing, 84 | showPercentages 85 | )} | ${formatNum(numOther, numFailing, showPercentages)}` 86 | ); 87 | } 88 | } 89 | 90 | function printRefinerReport(title: string, stats: RefinerStats) { 91 | const refinerNames = Array.from(stats.refinerNames).sort(); 92 | console.log(` 93 | # ${title} 94 | Project | ${refinerNames.join(" | ")} 95 | --- | ${"--: |".repeat(refinerNames.length)}`); 96 | for (const { proj, refinersData } of Object.values(stats.stats)) { 97 | if (!proj) continue; 98 | console.log( 99 | `${proj} | ${refinerNames 100 | .map((name) => 101 | name in refinersData ? refinersData[name].coverage + "%" : "--" 102 | ) 103 | .join(" | ")}` 104 | ); 105 | } 106 | } 107 | 108 | function printSimilarityReport(title: string, stats: SimilarityStats) { 109 | console.log(` 110 | # ${title} 111 | Project | numGeneratedTests | numExistingTests | maxSimilarity 112 | --- | --: | --: | --:`); 113 | 114 | for (const { proj, similarityReport } of Object.values(stats)) { 115 | console.log( 116 | `${proj} | ${similarityReport.numGeneratedTests} | ${similarityReport.numExistingTests} | ${similarityReport.maxSimilarity}` 117 | ); 118 | } 119 | } 120 | 121 | function formatNum( 122 | number: number, 123 | denominator: number, 124 | showPercentages = true 125 | ) { 126 | if (denominator == 0) return "--"; 127 | if (showPercentages) 128 | return `${number} (${((number / denominator) * 100).toFixed(2)}%)`; 129 | else return `${number}`; 130 | } 131 | 132 | function coverageDiff(cov1: number | "unknown", cov2: number | "unknown") { 133 | if (cov1 === "unknown" || cov2 === "unknown") { 134 | return "unknown"; 135 | } else { 136 | return (cov1 - cov2).toFixed(2); 137 | } 138 | } 139 | 140 | function compareCovToBaseline(baselineCovStats: CoverageStats) { 141 | const diffStats: DiffCoverageStats = {}; 142 | for (const [packageName, projStats] of Object.entries(coverageStats)) { 143 | const baseline = baselineCovStats[packageName]; 144 | 145 | // print diff if the same config is in the baseline, otherwise, skip diff for this config 146 | if (baseline) { 147 | const nonTrivialTestDiff = 148 | projStats.nonTrivialTests - baseline.nonTrivialTests; 149 | const nonTrivialPassingDiff = 150 | projStats.nonTrivialPassing - baseline.nonTrivialPassing; 151 | diffStats[packageName] = { 152 | proj: projStats.proj, 153 | nrUniqueSnippets: ppDiff( 154 | projStats.nrUniqueSnippets - baseline.nrUniqueSnippets 155 | ), 156 | numTests: ppDiff(projStats.numTests - baseline.numTests), 157 | numPassing: ppDiff(projStats.numPassing - baseline.numPassing), 158 | coverage: ppDiff( 159 | coverageDiff(projStats.stmtCoverage, baseline.stmtCoverage) 160 | ), 161 | nonTrivialTests: ppDiff(nonTrivialTestDiff), 162 | nonTrivialPassing: ppDiff(nonTrivialPassingDiff), 163 | nonTrivialCoverage: ppDiff( 164 | coverageDiff( 165 | projStats.nonTrivialCoverage, 166 | baseline.nonTrivialCoverage 167 | ) 168 | ), 169 | }; 170 | } 171 | } 172 | printCoverageReport("Coverage Comparison to baseline", diffStats); 173 | } 174 | 175 | function compareFailuresToBaseline(baselineFailureStats: any) { 176 | const diffStats: any = {}; 177 | for (const [packageName, projStats] of Object.entries(failureStats)) { 178 | const baseline = baselineFailureStats[packageName]; 179 | 180 | //print diff if the same config is in the baseline, otherwise, skip diff for this config 181 | if (baseline) { 182 | diffStats[packageName] = { 183 | proj: projStats.proj, 184 | numFailing: ppDiff(projStats.numFailing - baseline.numFailing, true), 185 | numAssertionErrors: ppDiff( 186 | projStats.numAssertionErrors - baseline.numAssertionErrors 187 | ), 188 | numFileSysErrors: ppDiff( 189 | projStats.numFileSysErrors - baseline.numFileSysErrors 190 | ), 191 | numCorrectnessErrors: ppDiff( 192 | projStats.numCorrectnessErrors - baseline.numCorrectnessErrors 193 | ), 194 | numTimeoutErrors: ppDiff( 195 | projStats.numTimeoutErrors - baseline.numTimeoutErrors 196 | ), 197 | numOther: ppDiff(projStats.numOther - baseline.numOther), 198 | }; 199 | } 200 | } 201 | printFailureReport("Failure Comparison to baseline", diffStats, false); 202 | } 203 | 204 | function ppDiff(d: number | string, lowerIsBetter = false) { 205 | let s; 206 | if (d > 0) { 207 | s = `+${d}`; 208 | } else if (d == 0) { 209 | s = "±0"; 210 | } else { 211 | s = String(d); 212 | } 213 | if (lowerIsBetter ? d < 0 : d > 0) { 214 | return `**${s}**`; 215 | } else { 216 | return s; 217 | } 218 | } 219 | 220 | if (process.argv.length < 3 || process.argv.length > 5) { 221 | console.error( 222 | "Usage: node generate_report.js [] []" 223 | ); 224 | process.exit(1); 225 | } 226 | const hasConfig = fs.lstatSync(process.argv[2]).isFile(); 227 | const config = hasConfig 228 | ? JSON.parse(fs.readFileSync(process.argv[2], "utf8")) 229 | : {}; 230 | const artifactDir = hasConfig ? process.argv[3] : process.argv[2]; 231 | const baselineArtifactDir = hasConfig ? process.argv[4] : process.argv[3]; 232 | 233 | console.log(` 234 | # Parameters 235 | - snippets from: ${config.snippetsFrom} 236 | - snippet length: ${config.snippetLength} 237 | - numSnippets: ${config.numSnippets} 238 | - temperatures: ${config.temperatures} 239 | - number of completions: ${config.numCompletions}`); 240 | 241 | const { coverageStats, failureStats, refinersStats, similarityStats } = 242 | parseReports(artifactDir); 243 | 244 | printCoverageReport("Coverage report", coverageStats); 245 | printFailureReport("Failure report", failureStats); 246 | printRefinerReport("Coverage when excluding refiners", refinersStats); 247 | printSimilarityReport( 248 | "Similarity of generated tests to existing tests", 249 | similarityStats 250 | ); 251 | 252 | if (baselineArtifactDir) { 253 | const baselineResults = parseReports(baselineArtifactDir); 254 | const baselineCovStats = baselineResults.coverageStats; 255 | const baselineFailureStats = baselineResults.failureStats; 256 | compareCovToBaseline(baselineCovStats); 257 | compareFailuresToBaseline(baselineFailureStats); 258 | } 259 | -------------------------------------------------------------------------------- /benchmark/run.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { performance } from "perf_hooks"; 4 | import { 5 | APIFunction, 6 | Codex, 7 | exploreAPI, 8 | FunctionDescriptor, 9 | getDocSnippets, 10 | getSnippets, 11 | ICompletionModel, 12 | MochaValidator, 13 | MockCompletionModel, 14 | TestGenerator, 15 | TestValidator, 16 | } from ".."; 17 | import yargs from "yargs"; 18 | import { hideBin } from "yargs/helpers"; 19 | import { PerformanceMeasurer } from "./performanceMeasurer"; 20 | import { TestResultCollector } from "./testResultCollector"; 21 | require("console-stamp")(console); 22 | 23 | /** 24 | * Run an end-to-end experiment. 25 | * Given a package generate tests for its methods, run them, and generate a report. 26 | * @param model The completion model to use. 27 | * @param packageName The name of the package to use. 28 | * @param packagePath The path to the package to use. 29 | * @param functions The list of functions in the API. 30 | * @param snippetMap The snippets for package methods. 31 | * @param timeLimit The maximum time (in milliseconds) to run the experiment. 32 | */ 33 | export async function runExperiment( 34 | functions: APIFunction[], 35 | temperatures: number[], 36 | snippetMap: Map, 37 | model: ICompletionModel, 38 | validator: TestValidator, 39 | collector: TestResultCollector, 40 | timeLimit: number 41 | ): Promise { 42 | const deadline = performance.now() + timeLimit; 43 | const generator = new TestGenerator( 44 | temperatures, 45 | (fn) => snippetMap.get(fn), 46 | model, 47 | validator, 48 | collector 49 | ); 50 | 51 | // initialize the workList with all functions 52 | let workList = functions.map((f) => ({ fun: f, nrTimesExtended: 0 })); 53 | 54 | while (workList.length > 0) { 55 | if (performance.now() > deadline) { 56 | console.log( 57 | `Time limit reached, ${workList.length} worklist items ignored.` 58 | ); 59 | break; 60 | } 61 | 62 | const { fun } = workList.shift()!; 63 | await generator.generateAndValidateTests(fun); 64 | } 65 | 66 | collector.recordCoverageInfo(validator.computeCoverageSummary()); 67 | } 68 | 69 | if (require.main === module) { 70 | (async () => { 71 | const parser = yargs(hideBin(process.argv)) 72 | .strict() 73 | .options({ 74 | outputDir: { 75 | type: "string", 76 | demandOption: true, 77 | description: "directory where output files will be placed", 78 | }, 79 | package: { 80 | type: "string", 81 | demandOption: true, 82 | description: "package source", 83 | }, 84 | api: { 85 | type: "string", 86 | description: 87 | "JSON file with API to generate tests for (usually api.json from a previous run)", 88 | }, 89 | snippets: { 90 | type: "string", 91 | choices: ["code", "doc", "both", "none"], 92 | default: "doc", 93 | description: "where to collect usage snippets from", 94 | }, 95 | database: { 96 | type: "string", 97 | description: 98 | "CodeQL database; only required if collecting snippets from code", 99 | }, 100 | responses: { 101 | type: "string", 102 | description: 103 | "file with simulated model responses (usually prompts.json from a previous run)", 104 | }, 105 | timeLimit: { 106 | type: "number", 107 | default: 5 * 60 * 60, 108 | description: "time limit in seconds (default is five hours)", 109 | }, 110 | numSnippets: { 111 | default: "all", 112 | description: 113 | 'number of snippets to include in the prompt, or "all" to include all snippets', 114 | }, 115 | snippetLength: { 116 | type: "number", 117 | default: 20, 118 | description: "maximum length of each snippet in lines", 119 | }, 120 | temperatures: { 121 | type: "string", 122 | default: "0.0", 123 | description: 124 | "whitespace-separated list of sampling temperatures to try when obtaining completions", 125 | }, 126 | numCompletions: { 127 | type: "number", 128 | default: 5, 129 | description: "number of completions to generate for each prompt", 130 | }, 131 | strictResponses: { 132 | type: "boolean", 133 | default: true, 134 | description: 135 | "whether to require that all prompts are found when running with --responses; does not have any effect otherwise", 136 | }, 137 | model: { 138 | type: "string", 139 | choices: ["gpt", "starcoder"], 140 | default: "gpt", 141 | description: "LLM api to use", 142 | }, 143 | }); 144 | const argv = await parser.argv; 145 | 146 | var model: ICompletionModel; 147 | if (!argv.responses) { 148 | if (argv.strictResponses) { 149 | console.warn( 150 | "Warning: --strictResponses has no effect when not using --responses" 151 | ); 152 | } 153 | model = new Codex(argv.model === "starcoder", { n: argv.numCompletions }); 154 | } else { 155 | model = MockCompletionModel.fromFile( 156 | argv.responses, 157 | argv.strictResponses 158 | ); 159 | } 160 | 161 | const packagePath = argv.package; 162 | const packageName = JSON.parse( 163 | fs.readFileSync(path.join(packagePath, "package.json"), "utf8") 164 | ).name; 165 | const perf = new PerformanceMeasurer(); 166 | console.log(`Running experiment for ${packageName}`); 167 | 168 | let api: APIFunction[]; 169 | if (argv.api) { 170 | console.log(`Loading API from ${argv.api}`); 171 | const rawApi: { 172 | accessPath: string; 173 | descriptor: FunctionDescriptor; 174 | }[] = JSON.parse(fs.readFileSync(argv.api, "utf8")); 175 | api = rawApi.map( 176 | ({ accessPath, descriptor }) => 177 | new APIFunction(accessPath, descriptor, packageName) 178 | ); 179 | } else { 180 | console.log("Exploring API"); 181 | api = Array.from(exploreAPI(packagePath).getFunctions(packageName)); 182 | } 183 | 184 | let numSnippets: number | "all" = 185 | argv.numSnippets === "all" ? argv.numSnippets : +argv.numSnippets; 186 | if (numSnippets !== "all" && !(numSnippets >= 0)) { 187 | throw new Error(`Invalid value for --numSnippets: ${argv.numSnippets}`); 188 | } 189 | 190 | performance.mark("snippet-extraction-start"); 191 | let allSnippets = new Map(); 192 | if (numSnippets !== 0) { 193 | console.log("Extracting snippets"); 194 | const functionNames = api.map((f) => f.functionName); 195 | if (argv.snippets == "code") { 196 | if (!argv.database) { 197 | throw new Error("--database is required if --snippets is code"); 198 | } 199 | if (numSnippets === "all") { 200 | throw new Error( 201 | "--numSnippets=all is not supported when collecting snippets from code" 202 | ); 203 | } 204 | allSnippets = getSnippets( 205 | argv.database, 206 | numSnippets, 207 | functionNames, 208 | argv.snippetLength 209 | ); 210 | } else if (argv.snippets == "doc") { 211 | if (argv.database) { 212 | console.warn("--database is ignored if --snippets is doc"); 213 | } 214 | allSnippets = getDocSnippets( 215 | packagePath, 216 | numSnippets, 217 | functionNames, 218 | argv.snippetLength 219 | ); 220 | } else if (argv.snippets == "both") { 221 | if (!argv.database) { 222 | throw new Error("--database is required if --snippets is code"); 223 | } 224 | if (numSnippets === "all") { 225 | throw new Error( 226 | "--numSnippets=all is not supported when collecting snippets from code" 227 | ); 228 | } 229 | const snippets = getSnippets( 230 | argv.database, 231 | numSnippets, 232 | functionNames, 233 | argv.snippetLength 234 | ); 235 | const docSnippets = getDocSnippets( 236 | packagePath, 237 | numSnippets, 238 | functionNames, 239 | argv.snippetLength 240 | ); 241 | for (const [key, value] of snippets.entries()) { 242 | allSnippets.set(key, [...value, ...(docSnippets.get(key) || [])]); 243 | } 244 | } else { 245 | if (argv.database) { 246 | console.warn("--database is ignored if --snippets is none"); 247 | } 248 | } 249 | } 250 | performance.measure("snippet-extraction", "snippet-extraction-start"); 251 | 252 | console.log("Generating tests"); 253 | const collector = new TestResultCollector( 254 | packageName, 255 | packagePath, 256 | argv.outputDir, 257 | api, 258 | allSnippets, 259 | perf, 260 | argv.snippets, 261 | numSnippets, 262 | argv.snippetLength, 263 | argv.numCompletions 264 | ); 265 | const validator = new MochaValidator(packageName, packagePath); 266 | try { 267 | await runExperiment( 268 | api, 269 | argv.temperatures.split(/\s+/).map(parseFloat), 270 | allSnippets, 271 | model, 272 | validator, 273 | collector, 274 | argv.timeLimit * 1000 275 | ); 276 | collector.report(); 277 | const report = collector.getReport(); 278 | const coverage = report.coverage?.total.statements.pct ?? 0; 279 | console.log(`${coverage}% statement coverage`); 280 | } finally { 281 | validator.cleanup(); 282 | } 283 | })().catch((e) => { 284 | console.error(e); 285 | process.exit(1); 286 | }); 287 | } 288 | -------------------------------------------------------------------------------- /src/promptCrafting.ts: -------------------------------------------------------------------------------- 1 | import dedent from "dedent"; 2 | import { APIFunction, sanitizePackageName } from "./exploreAPI"; 3 | import { TestOutcome, TestStatus } from "./report"; 4 | import { closeBrackets, commentOut, trimAndCombineDocComment } from "./syntax"; 5 | 6 | /** 7 | * A strategy object for refining a prompt based on the outcome of a test 8 | * generated from it. 9 | */ 10 | export interface IPromptRefiner { 11 | /** A human-readable name for identifying this refiner. */ 12 | get name(): string; 13 | 14 | /** 15 | * Refine the `original` prompt based on the `outcome` of a test generated 16 | * from it and the given `body`. 17 | */ 18 | refine(original: Prompt, body: string, outcome: TestOutcome): Prompt[]; 19 | } 20 | 21 | /** 22 | * Options for controlling prompt generation. 23 | */ 24 | type PromptOptions = { 25 | /** Whether to include usage snippets in the prompt. */ 26 | includeSnippets: boolean; 27 | /** Whether to include the function's doc comment in the prompt. */ 28 | includeDocComment: boolean; 29 | /** Whether to include the function's body in the prompt. */ 30 | includeFunctionBody: boolean; 31 | }; 32 | 33 | export function defaultPromptOptions(): PromptOptions { 34 | return { 35 | includeSnippets: false, 36 | includeDocComment: false, 37 | includeFunctionBody: false, 38 | }; 39 | } 40 | 41 | /** 42 | * Structured representation of a prompt we send to the model. 43 | * 44 | * In general, our prompts look like this: 45 | * 46 | * ```js 47 | * let mocha = require('mocha'); // -+ 48 | * let assert = require('assert'); // | Imports 49 | * let pkg = require('pkg'); // -+ 50 | * 51 | * // usage #1 // -+ 52 | * ... // | 53 | * // usage #2 // | Usage snippets 54 | * ... // -+ 55 | * 56 | * // this does... // -+ 57 | * // @param foo // | 58 | * // @returns bar // | Doc comment 59 | * ... // -+ 60 | * 61 | * // fn(args) // Signature of the function we're testing 62 | * // function fn(args) { // -+ 63 | * // ... // | Function body (optional) 64 | * // } // -+ 65 | * 66 | * describe('test pkg', function() { // Test suite header 67 | * it('test fn', function(done) { // Test case header 68 | * ``` 69 | * 70 | * The structured representation keeps track of these parts and provides methods 71 | * to assemble them into a textual prompt and complete them into a test case. 72 | */ 73 | export class Prompt { 74 | private readonly imports: string; 75 | private readonly signature: string; 76 | private readonly docComment: string; 77 | private readonly functionBody: string; 78 | private readonly suiteHeader: string; 79 | protected readonly testHeader: string; 80 | public readonly provenance: PromptProvenance[] = []; 81 | 82 | constructor( 83 | public readonly fun: APIFunction, 84 | public readonly usageSnippets: string[], 85 | public readonly options: PromptOptions 86 | ) { 87 | const sanitizedPackageName = sanitizePackageName(fun.packageName); 88 | this.imports = dedent` 89 | let mocha = require('mocha'); 90 | let assert = require('assert'); 91 | let ${sanitizedPackageName} = require('${fun.packageName}');\n`; 92 | 93 | this.signature = commentOut(fun.signature); 94 | 95 | if (options.includeFunctionBody) { 96 | this.functionBody = commentOut(fun.descriptor.implementation); 97 | } else { 98 | this.functionBody = ""; 99 | } 100 | 101 | this.suiteHeader = `describe('test ${sanitizedPackageName}', function() {\n`; 102 | this.testHeader = ` it('test ${fun.accessPath}', function(done) {\n`; 103 | 104 | if (options.includeDocComment) { 105 | this.docComment = trimAndCombineDocComment( 106 | fun.descriptor.docComment ?? "" 107 | ); 108 | } else { 109 | this.docComment = ""; 110 | } 111 | } 112 | 113 | /** 114 | * Assemble the usage snippets into a single string. 115 | */ 116 | private assembleUsageSnippets(): string { 117 | if (!this.options.includeSnippets) { 118 | return ""; 119 | } else { 120 | return this.usageSnippets 121 | .map((snippet, index) => { 122 | const lines = snippet.split("\n"); 123 | const commentedLines = lines.map((line) => `// ${line}\n`); 124 | return `// usage #${index + 1}\n` + commentedLines.join(""); 125 | }) 126 | .join(""); 127 | } 128 | } 129 | 130 | /** 131 | * Assemble a prompt to send to the model from the structured 132 | * representation. 133 | */ 134 | public assemble(): string { 135 | return ( 136 | this.imports + 137 | this.assembleUsageSnippets() + 138 | this.docComment + 139 | this.signature + 140 | this.functionBody + 141 | this.suiteHeader + 142 | this.testHeader 143 | ); 144 | } 145 | 146 | /** 147 | * Given a test body suggested by the model, assemble a complete, 148 | * syntactically correct test. 149 | */ 150 | public completeTest( 151 | body: string, 152 | stubOutHeaders: boolean = true 153 | ): string | undefined { 154 | let fixed = closeBrackets( 155 | this.imports + 156 | (stubOutHeaders 157 | ? // stub out suite header and test header so we don't double-count identical tests 158 | "describe('test suite', function() {\n" + 159 | " it('test case', function(done) {\n" 160 | : this.suiteHeader + this.testHeader) + 161 | // add the body, making sure the first line is indented correctly 162 | body.replace(/^(?=\S)/, " ".repeat(8)) + 163 | "\n" 164 | ); 165 | // beautify closing brackets 166 | return fixed?.source.replace(/\}\)\}\)$/, " })\n})"); 167 | } 168 | 169 | public withProvenance(...provenanceInfos: PromptProvenance[]): Prompt { 170 | this.provenance.push(...provenanceInfos); 171 | return this; 172 | } 173 | 174 | public functionHasDocComment(): boolean { 175 | return this.fun.descriptor.docComment !== undefined; 176 | } 177 | } 178 | 179 | /** 180 | * A record of how a prompt was generated, including information about which 181 | * `originalPrompt` it was generated from, information about the test that gave 182 | * rise to the prompt refinement, and the name of the refiner. 183 | */ 184 | export type PromptProvenance = { 185 | originalPrompt: Prompt; 186 | testId: number; 187 | refiner: string; 188 | }; 189 | 190 | /** 191 | * A prompt refiner that adds usage snippets to the prompt. 192 | */ 193 | export class SnippetIncluder implements IPromptRefiner { 194 | public get name(): string { 195 | return "SnippetIncluder"; 196 | } 197 | 198 | public refine( 199 | original: Prompt, 200 | completion: string, 201 | outcome: TestOutcome 202 | ): Prompt[] { 203 | if ( 204 | !original.options.includeSnippets && 205 | original.usageSnippets.length > 0 206 | ) { 207 | return [ 208 | new Prompt(original.fun, original.usageSnippets, { 209 | ...original.options, 210 | includeSnippets: true, 211 | }), 212 | ]; 213 | } 214 | return []; 215 | } 216 | } 217 | 218 | /** 219 | * A prompt refiner that adds a function's doc comments to the prompt. 220 | */ 221 | export class DocCommentIncluder implements IPromptRefiner { 222 | public get name(): string { 223 | return "DocCommentIncluder"; 224 | } 225 | 226 | public refine( 227 | original: Prompt, 228 | completion: string, 229 | outcome: TestOutcome 230 | ): Prompt[] { 231 | if ( 232 | !original.options.includeDocComment && 233 | original.functionHasDocComment() 234 | ) { 235 | return [ 236 | new Prompt(original.fun, original.usageSnippets, { 237 | ...original.options, 238 | includeDocComment: true, 239 | }), 240 | ]; 241 | } 242 | return []; 243 | } 244 | } 245 | 246 | export class RetryPrompt extends Prompt { 247 | constructor( 248 | prev: Prompt, 249 | private body: string, 250 | private readonly err: string 251 | ) { 252 | super(prev.fun, prev.usageSnippets, prev.options); 253 | } 254 | 255 | public assemble() { 256 | const rawFailingTest = super.assemble() + this.body + "\n"; 257 | const completedFailingTest = closeBrackets(rawFailingTest); 258 | let failingTest; 259 | if (completedFailingTest) { 260 | failingTest = completedFailingTest.source.replace( 261 | /\}\)\}\)$/, 262 | " })\n" 263 | ); 264 | } else { 265 | failingTest = rawFailingTest + " })\n"; 266 | } 267 | 268 | return ( 269 | failingTest + 270 | " // the test above fails with the following error:\n" + 271 | ` // ${this.err}\n` + 272 | " // fixed test:\n" + 273 | this.testHeader 274 | ); 275 | } 276 | } 277 | 278 | /** 279 | * A prompt refiner that, for a failed test, adds the error message to the 280 | * prompt and tries again. 281 | */ 282 | export class RetryWithError implements IPromptRefiner { 283 | public get name(): string { 284 | return "RetryWithError"; 285 | } 286 | 287 | public refine( 288 | original: Prompt, 289 | completion: string, 290 | outcome: TestOutcome 291 | ): Prompt[] { 292 | if ( 293 | !(original instanceof RetryPrompt) && 294 | outcome.status === TestStatus.FAILED 295 | ) { 296 | return [new RetryPrompt(original, completion, outcome.err.message)]; 297 | } 298 | return []; 299 | } 300 | } 301 | 302 | /** 303 | * A prompt refiner that includes the body of the function in the prompt. 304 | */ 305 | export class FunctionBodyIncluder implements IPromptRefiner { 306 | public get name(): string { 307 | return "FunctionBodyIncluder"; 308 | } 309 | 310 | public refine( 311 | original: Prompt, 312 | completion: string, 313 | outcome: TestOutcome 314 | ): Prompt[] { 315 | if ( 316 | !original.options.includeFunctionBody && 317 | original.fun.descriptor.implementation !== "" 318 | ) { 319 | return [ 320 | new Prompt(original.fun, original.usageSnippets, { 321 | ...original.options, 322 | includeFunctionBody: true, 323 | }), 324 | ]; 325 | } 326 | return []; 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /benchmark/testResultCollector.ts: -------------------------------------------------------------------------------- 1 | import * as fs from "fs"; 2 | import * as path from "path"; 3 | import { 4 | APIFunction, 5 | BaseTestResultCollector, 6 | IMetaData, 7 | ITestInfo, 8 | ITestReport, 9 | MochaValidator, 10 | ReportForTest, 11 | TestOutcome, 12 | TestStatus, 13 | } from ".."; 14 | import { PerformanceMeasurer } from "./performanceMeasurer"; 15 | import { 16 | createUniqueStmtId, 17 | getCoveredStmtsForFile, 18 | } from "./testCollectorHelper"; 19 | 20 | /** 21 | * A full-featured test-result collector that can be used to persist information 22 | * to disk. 23 | */ 24 | export class TestResultCollector extends BaseTestResultCollector { 25 | private readonly metaData: IMetaData; 26 | 27 | /** 28 | * constructor registers meta-data associated with a test run 29 | * 30 | * @param outputDir: the directory in which to write the report and other files 31 | * @param snippetsTypeAsString: the type of snippets used to generate the tests (code, doc, both, or none) 32 | * @param numSnippets: number of snippets to include in a prompt (default 3) 33 | * @param snippetLength: length of each snippet (maximum length of each snippet in lines (default 20 lines)) 34 | * @param temperature: sampling temperature for obtaining completions (default 0) 35 | * @param numCompletions: number of completions to obtain for each prompt (default 5) 36 | */ 37 | constructor( 38 | packageName: string, 39 | private readonly packagePath: string, 40 | private readonly outputDir: string, 41 | private readonly api: APIFunction[], 42 | private readonly snippetMap: Map, 43 | private readonly perf: PerformanceMeasurer, 44 | snippetsTypeAsString: string, 45 | numSnippets: number | "all", 46 | snippetLength: number, 47 | numCompletions: number 48 | ) { 49 | super(); 50 | this.metaData = { 51 | packageName, 52 | useDocSnippets: 53 | snippetsTypeAsString === "doc" || snippetsTypeAsString === "both", 54 | useCodeSnippets: 55 | snippetsTypeAsString === "code" || snippetsTypeAsString === "both", 56 | numSnippets, 57 | snippetLength, 58 | numCompletions, 59 | }; 60 | this.createOutputDir(); 61 | } 62 | 63 | private getTestsWithStatus(status: TestStatus) { 64 | return [...this.tests.values()].filter( 65 | (test) => test.outcome.status === status 66 | ); 67 | } 68 | 69 | public getNrPasses() { 70 | return this.getTestsWithStatus(TestStatus.PASSED).length; 71 | } 72 | 73 | public getNrFailures() { 74 | return this.getTestsWithStatus(TestStatus.FAILED).length; 75 | } 76 | 77 | public getNrPending() { 78 | return this.getTestsWithStatus(TestStatus.PENDING).length; 79 | } 80 | 81 | public getNrOther() { 82 | return this.getTestsWithStatus(TestStatus.OTHER).length; 83 | } 84 | 85 | public getReport(): ITestReport { 86 | return { 87 | metaData: this.metaData, 88 | nrUniqueSnippets: this.computeNrUniqueSnippets(), 89 | stats: { 90 | nrTests: this.tests.size, 91 | nrPasses: this.getNrPasses(), 92 | nrFailures: this.getNrFailures(), 93 | nrPending: this.getNrPending(), 94 | nrOther: this.getNrOther(), 95 | apiExplorationTime: this.perf.getApiExplorationTime()!, 96 | docCommentExtractionTime: this.perf.getDocCommentExtractionTime()!, 97 | snippetExtractionTime: this.perf.getSnippetExtractionTime()!, 98 | codexQueryTime: this.perf.getTotalCodexQueryTime(), 99 | totalTime: this.perf.getTotalTime(), 100 | }, 101 | tests: [...this.tests.values()].map(this.getReportForTest, this), 102 | coverage: this.coverageSummary, 103 | }; 104 | } 105 | 106 | private getReportForTest(test: ITestInfo): ReportForTest { 107 | const promptIds = test.prompts.map( 108 | (prompt) => this.prompts.get(prompt)!.id 109 | ); 110 | const err = 111 | test.outcome.status === TestStatus.FAILED ? test.outcome.err : {}; 112 | const coveredStatements = this.getCoveredStatements(test.outcome); 113 | return { 114 | testName: test.testName, 115 | api: test.api, 116 | testFile: test.testName, 117 | promptIds: promptIds, 118 | status: test.outcome.status as TestStatus, 119 | err: err, 120 | coveredStatements: coveredStatements, 121 | duration: this.perf.getTestDuration(test.testName), 122 | }; 123 | } 124 | 125 | /** 126 | * Get the list of statements covered by the test with the given outcome. 127 | * 128 | * Tests that do not pass or that do not have a coverage summary are not 129 | * considered to cover any statements. For passing tests, covered statements are 130 | * represented in the form 131 | * '@:-:'. 132 | */ 133 | private getCoveredStatements(outcome: TestOutcome) { 134 | if ( 135 | outcome.status !== TestStatus.PASSED || 136 | outcome.coverageReport === undefined 137 | ) { 138 | return []; 139 | } 140 | const coveredStatements = []; 141 | const coverage = JSON.parse( 142 | fs.readFileSync(outcome.coverageReport, "utf8") 143 | ); 144 | for (const file of Object.keys(coverage)) { 145 | const relpath = path.relative(this.packagePath, coverage[file].path); 146 | coveredStatements.push( 147 | ...getCoveredStmtsForFile(coverage[file], relpath) 148 | ); 149 | } 150 | return coveredStatements; 151 | } 152 | 153 | /** 154 | * compute the number of unique snippets that are available in the snippet map 155 | * @returns the number of unique snippets 156 | */ 157 | private computeNrUniqueSnippets(): number { 158 | const uniqueSnippets = new Set(); 159 | for (const snippetGroup of this.snippetMap.values()) { 160 | for (const snippet of snippetGroup) { 161 | uniqueSnippets.add(snippet); 162 | } 163 | } 164 | return uniqueSnippets.size; 165 | } 166 | 167 | /** 168 | * For passing tests, preprend a checkmark and make the text green. 169 | * For failing tests, prepend an 'x' and make the text red. 170 | * For other tests, prepend a '?' and make the text purple. 171 | */ 172 | private getTestLabel(test: ITestInfo): string { 173 | const testName = test.testName; 174 | if (test.outcome.status === TestStatus.PASSED) { 175 | return "\u001b[32m" + "\u2713" + testName + "\u001b[0m"; 176 | } else if (test.outcome.status === TestStatus.FAILED) { 177 | return "\u001b[31m" + "\u2717" + testName + "\u001b[0m"; 178 | } else { 179 | return "\u001b[35m" + "\u2753" + testName + "\u001b[0m"; 180 | } 181 | } 182 | 183 | /** 184 | * print summary of test results for each API method 185 | */ 186 | private reportAPICoverage() { 187 | console.log("API coverage:"); 188 | const testsPerAPI = new Map>(); 189 | for (const test of this.tests.values()) { 190 | const api = test.api; 191 | if (!testsPerAPI.has(api)) { 192 | testsPerAPI.set(api, new Set()); 193 | } 194 | testsPerAPI.get(api)!.add(test); 195 | } 196 | for (const [api, tests] of testsPerAPI.entries()) { 197 | const testLabels = [...tests].map((test) => this.getTestLabel(test)); 198 | console.log(` ${api}: ${[...testLabels.values()].join(", ")}`); 199 | } 200 | } 201 | 202 | public report() { 203 | // write report to 'report.json' in the specified output directory 204 | const report = this.getReport(); 205 | fs.writeFileSync( 206 | path.join(this.outputDir, "report.json"), 207 | JSON.stringify(report, null, 2) 208 | ); 209 | 210 | // write out tests to 'tests' directory 211 | const testOutputDir = path.join(this.outputDir, "tests"); 212 | const coverageDataDir = path.join(this.outputDir, "coverageData"); 213 | for (const { testName, testSource, outcome } of this.tests.values()) { 214 | fs.writeFileSync(path.join(testOutputDir, testName), testSource); 215 | 216 | // copy coverage data if available 217 | if (outcome.status === "PASSED" && outcome.coverageData) { 218 | const destDir = path.join( 219 | coverageDataDir, 220 | path.basename(testName, ".js") 221 | ); 222 | fs.mkdirSync(destDir, { recursive: true }); 223 | MochaValidator.copyCoverageData(outcome.coverageData, destDir); 224 | } 225 | } 226 | 227 | // write out prompts to 'prompts' directory, and summary of prompts to 'prompts.json' 228 | const promptOutputDir = path.join(this.outputDir, "prompts"); 229 | for (const promptInfo of this.prompts.values()) { 230 | fs.writeFileSync( 231 | path.join(promptOutputDir, promptInfo.file), 232 | promptInfo.prompt.assemble() 233 | ); 234 | } 235 | let prompts = { 236 | metaData: this.metaData, 237 | prompts: [...this.prompts.values()].map( 238 | ({ prompt, id, file, temperature, completions }) => { 239 | const tests = [...this.tests.values()] 240 | .filter((test) => test.prompts.includes(prompt)) 241 | .map((test) => test.testName); 242 | const provenance = prompt.provenance.map((p) => ({ 243 | originalPrompt: this.prompts.get(p.originalPrompt)!.id, 244 | test: p.testId, 245 | refiner: p.refiner, 246 | })); 247 | return { 248 | id, 249 | file, 250 | temperature, 251 | completions: [...completions.values()], 252 | tests, 253 | provenance, 254 | }; 255 | } 256 | ), 257 | }; 258 | fs.writeFileSync( 259 | path.join(this.outputDir, "prompts.json"), 260 | JSON.stringify(prompts, null, 2) 261 | ); 262 | 263 | // write API info to 'api.json' 264 | fs.writeFileSync( 265 | path.join(this.outputDir, "api.json"), 266 | JSON.stringify(this.api, null, 2) 267 | ); 268 | 269 | // write snippetMap to 'snippetMap.json' 270 | fs.writeFileSync( 271 | path.join(this.outputDir, "snippetMap.json"), 272 | JSON.stringify([...this.snippetMap], null, 2) 273 | ); 274 | 275 | // write Codex query times to 'codexQueryTimes.json' 276 | fs.writeFileSync( 277 | path.join(this.outputDir, "codexQueryTimes.json"), 278 | JSON.stringify(this.perf.getCodexQueryTimes(), null, 2) 279 | ); 280 | 281 | // print summary statistics 282 | console.log( 283 | `${this.getNrPasses()} passed, ${this.getNrFailures()} failed, ${this.getNrPending()} pending, ${this.getNrOther()} other` 284 | ); 285 | 286 | // print API coverage 287 | this.reportAPICoverage(); 288 | } 289 | 290 | /** 291 | * Create directory for output files if it does not exist. If it does exist, delete it and its contents and create a new one. 292 | */ 293 | private createOutputDir() { 294 | if (fs.existsSync(this.outputDir)) { 295 | fs.rmdirSync(this.outputDir, { recursive: true }); 296 | } 297 | fs.mkdirSync(this.outputDir, { recursive: true }); 298 | fs.mkdirSync(path.join(this.outputDir, "tests")); 299 | fs.mkdirSync(path.join(this.outputDir, "prompts")); 300 | fs.mkdirSync(path.join(this.outputDir, "coverageData")); 301 | } 302 | 303 | public recordTestResult( 304 | test: ITestInfo, 305 | temperature: number, 306 | outcome: TestOutcome 307 | ) { 308 | super.recordTestResult(test, temperature, outcome); 309 | console.log( 310 | `${test.testName} (for ${test.api} at temperature ${temperature}, ${test.prompts[0].usageSnippets.length} snippets available): ${outcome.status}` 311 | ); 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /.github/workflows/run-experiment.yml: -------------------------------------------------------------------------------- 1 | name: Run TestPilot experiment 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | packages: 7 | description: "Packages to generate tests for" 8 | default: "+benchmarks.txt" 9 | snippetsFrom: 10 | description: "Code snippets source" 11 | default: "doc" 12 | numSnippets: 13 | description: 'Maximum number of snippets to include in each prompt, or "all"' 14 | default: "all" 15 | snippetLength: 16 | description: "Maximum length of each snippet in lines" 17 | default: "20" 18 | temperatures: 19 | description: "Sampling temperatures to try when obtaining completions (whitespace-separated)" 20 | default: "0.0" 21 | numCompletions: 22 | description: "Number of completions to generate for each prompt" 23 | default: "5" 24 | model: 25 | description: "Which LLM API to use" 26 | type: "choice" 27 | options: 28 | - "gpt" 29 | - "starcoder" 30 | default: "gpt" 31 | compareTo: 32 | description: "Run number of previous run to compare to (leave empty to skip comparison)" 33 | default: "" 34 | skipSlowBenchmarks: 35 | description: "Skip slow benchmarks" 36 | type: boolean 37 | default: false 38 | debug_enabled: 39 | type: boolean 40 | description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)" 41 | default: false 42 | # Run every weekday at 2:00 AM UTC 43 | # schedule: 44 | # - cron: '0 2 * * 1-5' 45 | 46 | jobs: 47 | setup: 48 | runs-on: ubuntu-latest 49 | outputs: 50 | packages: "${{ steps.parse_packages.outputs.packages }}" 51 | snippetsFrom: "${{ github.event.inputs.snippetsFrom || 'doc' }}" 52 | snippetLength: "${{ github.event.inputs.snippetLength || '20' }}" 53 | temperatures: "${{ github.event.inputs.temperatures || '0.0' }}" 54 | numSnippets: "${{ github.event.inputs.numSnippets || 'all' }}" 55 | numCompletions: "${{ github.event.inputs.numCompletions || '5' }}" 56 | model: "${{ github.event.inputs.model || 'gpt' }}" 57 | steps: 58 | - uses: actions/checkout@v3 59 | 60 | - uses: actions/setup-node@v3 61 | with: 62 | node-version: 12 63 | 64 | - id: parse_packages 65 | run: | 66 | packages=$(node ${GITHUB_WORKSPACE}/.github/parse_packages.js \ 67 | ${{ github.event.inputs.skipSlowBenchmarks == 'true' && '--skip-slow-benchmarks' || '' }} \ 68 | "${{ github.event.inputs.packages || '+benchmarks.txt' }}") 69 | echo "packages=$packages" >> $GITHUB_OUTPUT 70 | 71 | benchmark: 72 | needs: 73 | - setup 74 | runs-on: ubuntu-latest 75 | continue-on-error: true 76 | strategy: 77 | fail-fast: false 78 | matrix: 79 | package: ${{ fromJson(needs.setup.outputs.packages) }} 80 | steps: 81 | - uses: actions/checkout@v3 82 | with: 83 | path: testpilot 84 | 85 | - name: Check out CodeQL repo 86 | uses: actions/checkout@v3 87 | with: 88 | repository: github/codeql 89 | ref: codeql-cli/v2.10.0 90 | path: codeql-repo 91 | 92 | - name: Install CodeQL 2.10.0 93 | run: | 94 | wget -q https://github.com/github/codeql-cli-binaries/releases/download/v2.10.0/codeql-linux64.zip 95 | unzip codeql-linux64.zip 96 | echo "$GITHUB_WORKSPACE/codeql" >> $GITHUB_PATH 97 | env: 98 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 99 | 100 | - name: Set up Node.js 101 | uses: actions/setup-node@v3 102 | with: 103 | node-version: 12 104 | 105 | - name: Set up TestPilot 106 | run: | 107 | cd testpilot 108 | npm run build 109 | 110 | - name: Checkout github package repo 111 | if: ${{ matrix.package.host == 'github.com' }} 112 | uses: actions/checkout@v3 113 | with: 114 | repository: ${{ format('{0}/{1}', matrix.package.owner, matrix.package.repo) }} 115 | ref: ${{ matrix.package.sha }} 116 | path: "source" 117 | 118 | - name: Checkout gitlab package repo 119 | if: ${{ matrix.package.host == 'gitlab.com' }} 120 | run: | 121 | git clone ${{ format('https://gitlab.com/{0}/{1}', matrix.package.owner, matrix.package.repo) }} source 122 | cd source 123 | git checkout ${{ matrix.package.sha }} 124 | 125 | - name: Determine package name 126 | id: pkg-name 127 | run: | 128 | # name of the package 129 | TESTPILOT_PACKAGE_NAME=$(cat source/${{ matrix.package.path }}/package.json | jq -r .name ) 130 | 131 | # some packages have a / in their names (looking at you, gitlab-js!) 132 | if [[ "$TESTPILOT_PACKAGE_NAME" == *"/"* ]]; then 133 | TESTPILOT_PACKAGE_NAME=${TESTPILOT_PACKAGE_NAME##*/} 134 | fi 135 | 136 | # path to the package within the repo checkout 137 | TESTPILOT_PACKAGE_PATH="$GITHUB_WORKSPACE/$TESTPILOT_PACKAGE_NAME/${{ matrix.package.path }}" 138 | # make sure there isn't already a directory with the same name 139 | if [ -d "$TESTPILOT_PACKAGE_PATH" ]; then 140 | echo "ERROR: $TESTPILOT_PACKAGE_PATH already exists" 141 | exit 1 142 | fi 143 | # rename checkout, since some packages examine its name (looking at you, bluebird!) 144 | mv source $TESTPILOT_PACKAGE_NAME 145 | echo "Package name: $TESTPILOT_PACKAGE_NAME, path: $TESTPILOT_PACKAGE_PATH" 146 | # export environment variables 147 | echo "TESTPILOT_PACKAGE_NAME=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_ENV 148 | echo "TESTPILOT_PACKAGE_PATH=$TESTPILOT_PACKAGE_PATH" >> $GITHUB_ENV 149 | echo "pkgName=$TESTPILOT_PACKAGE_NAME" >> $GITHUB_OUTPUT 150 | 151 | - name: Install package, its dependencies, and test packages 152 | run: | 153 | cd $TESTPILOT_PACKAGE_PATH 154 | npm i || npm i --legacy-peer-deps 155 | # if matrix.package.dependencies is not empty, install them 156 | if ! [ -z "${{ matrix.package.dependencies }}" ]; then 157 | npm i ${{ matrix.package.dependencies }} 158 | fi 159 | npm run build || npm run prepack || echo 'Error with npm run build and npm run prepack' 160 | npm i --no-save mocha 161 | 162 | - name: Create CodeQL database 163 | if: ${{ needs.setup.outputs.snippetsFrom == 'code' || needs.setup.outputs.snippetsFrom == 'both' }} 164 | run: | 165 | codeql database create --language=javascript "--source-root=$TESTPILOT_PACKAGE_PATH" -- ./db 166 | 167 | - name: Generate tests 168 | env: 169 | TESTPILOT_LLM_API_ENDPOINT: "https://model-6.openai.azure.com/openai/deployments/turbo/completions?api-version=2022-12-01" 170 | TESTPILOT_LLM_AUTH_HEADERS: '{ "api-key": "${{ secrets.GPT35_API_KEY }}" }' 171 | STARCODER_API_ENDPOINT: '${{ secrets.STARCODER_API_ENDPOINT3 }}' 172 | run: | 173 | cd testpilot 174 | outputdir="results/$TESTPILOT_PACKAGE_NAME" 175 | mkdir -p $outputdir 176 | echo "Computing package statistics" 177 | node benchmark/package_stats.js "$TESTPILOT_PACKAGE_PATH" > stats.json 178 | echo "Generating tests for $TESTPILOT_PACKAGE_NAME" 179 | node --max-old-space-size=6144 benchmark/run.js \ 180 | --outputDir $outputdir \ 181 | --database ../db \ 182 | --package "$TESTPILOT_PACKAGE_PATH" \ 183 | --snippets ${{ needs.setup.outputs.snippetsFrom }} \ 184 | --numSnippets ${{ needs.setup.outputs.numSnippets }} \ 185 | --snippetLength ${{ needs.setup.outputs.snippetLength }} \ 186 | --temperatures "${{ needs.setup.outputs.temperatures }}" \ 187 | --numCompletions ${{ needs.setup.outputs.numCompletions }} \ 188 | --model ${{ needs.setup.outputs.model }} 189 | mv stats.json $outputdir 190 | 191 | - name: Calculate edit distance of generated tests 192 | run: | 193 | cd testpilot 194 | outputdir="results/$TESTPILOT_PACKAGE_NAME" 195 | node benchmark/editDistance.js --generatedTestsDir $outputdir --existingTestsDir $TESTPILOT_PACKAGE_PATH --pkgName $TESTPILOT_PACKAGE_NAME 196 | mv similarityReport.json $outputdir 197 | 198 | - name: Add non-trivial coverage data 199 | run: | 200 | cd testpilot 201 | ./.github/non_trivial_coverage.sh "results/$TESTPILOT_PACKAGE_NAME" 202 | 203 | - name: Zip up results 204 | run: | 205 | cd testpilot 206 | zip -r results.zip results 207 | 208 | - name: Upload artifacts 209 | uses: actions/upload-artifact@v3 210 | with: 211 | name: results-${{ steps.pkg-name.outputs.pkgName }} 212 | path: "testpilot/results.zip" 213 | 214 | - name: Setup tmate session 215 | uses: mxschmitt/action-tmate@v3 216 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 217 | 218 | combine_output: 219 | name: Combine output from all benchmarks 220 | needs: 221 | - setup 222 | - benchmark 223 | runs-on: ubuntu-latest 224 | steps: 225 | - name: Download output zips 226 | uses: actions/download-artifact@v4.1.7 227 | 228 | - name: Setup tmate session 229 | uses: mxschmitt/action-tmate@v3 230 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 231 | 232 | - name: Combine output zips 233 | run: | 234 | mkdir results 235 | for zip in results-*/results.zip 236 | do 237 | unzip -oq $zip 238 | done 239 | zip -r results.zip results 240 | - name: Upload combined output files 241 | uses: actions/upload-artifact@v2 242 | with: 243 | name: results-all 244 | path: results.zip 245 | 246 | generate-report: 247 | needs: 248 | - setup 249 | - benchmark 250 | - combine_output 251 | runs-on: ubuntu-latest 252 | steps: 253 | - uses: actions/checkout@v3 254 | 255 | - name: Set up Node.js 256 | uses: actions/setup-node@v3 257 | with: 258 | node-version: 12 259 | 260 | - name: Set up TestPilot 261 | run: | 262 | npm run build 263 | 264 | - name: Download artifacts for this run 265 | uses: actions/download-artifact@v4.1.7 266 | with: 267 | name: results-all 268 | path: results 269 | 270 | - name: Download artifacts for comparison run 271 | if: ${{ github.event.inputs.compareTo != '' }} 272 | uses: dawidd6/action-download-artifact@v2 273 | with: 274 | run_number: ${{ github.event.inputs.compareTo }} 275 | name: results-all 276 | path: baseline 277 | 278 | - name: Setup tmate session 279 | uses: mxschmitt/action-tmate@v3 280 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 281 | 282 | - name: Generate report 283 | run: | 284 | cd results 285 | unzip results.zip 286 | cd .. 287 | 288 | echo '${{ toJson(needs.setup.outputs) }}' > config.json 289 | if [ -d baseline ]; then 290 | cd baseline 291 | unzip results.zip 292 | cd .. 293 | baseline_artifact=baseline/results 294 | else 295 | baseline_artifact='' 296 | fi 297 | node ${GITHUB_WORKSPACE}/benchmark/generate_report.js config.json results/results $baseline_artifact > $GITHUB_STEP_SUMMARY 298 | -------------------------------------------------------------------------------- /ql/queries/AssertionQuality.qll: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes and predicates for working with TestPilot-generated reports. 3 | */ 4 | 5 | import javascript 6 | 7 | /** 8 | * A report.json file, representing all data collected for a particular 9 | * benchmark. 10 | */ 11 | class ReportJson extends JsonObject { 12 | ReportJson() { 13 | this.isTopLevel() and 14 | this.getFile().getBaseName() = "report.json" 15 | } 16 | 17 | /** Gets the `tests/` folder next to this file. */ 18 | Folder getTestFolder() { result = this.getFile().getParentContainer().getFolder("tests") } 19 | 20 | GeneratedTest getTest(string name) { 21 | result.getReport() = this and 22 | result.getBaseName() = name 23 | } 24 | 25 | GeneratedTest getTestById(int id) { 26 | exists(string strid | 27 | result = this.getTest("test_" + strid + ".js") and 28 | id = strid.toInt() 29 | ) 30 | } 31 | 32 | /** Gets the metadata in this report. */ 33 | JsonObject getMetadata() { result = this.getPropValue("metaData") } 34 | 35 | /** Gets the package name for this benchmark run. */ 36 | string getPackageName() { result = this.getMetadata().getPropStringValue("packageName") } 37 | 38 | /** Gets a prompt in this report. */ 39 | Prompt getAPrompt() { result.getReport() = this } 40 | 41 | /** Gets a test in this report. */ 42 | GeneratedTest getATest() { result.getReport() = this } 43 | 44 | /** Gets a non-trivial test in this report. */ 45 | GeneratedTest getANonTrivialTest() { 46 | result.getReport() = this and 47 | result.isNonTrivial() 48 | } 49 | 50 | /** Gets the total number of statements in the project covered by this report. */ 51 | int getNumberOfStatements() { 52 | result = 53 | getPropValue("coverage") 54 | .getPropValue("total") 55 | .getPropValue("statements") 56 | .getPropValue("total") 57 | .getIntValue() 58 | } 59 | 60 | /** Gets the total number of passing tests in this report. */ 61 | int getNumberOfTests() { result = count(GeneratedTest test | test.getReport() = this) } 62 | 63 | /** Holds if test `testName` in this report covers statement `stmtId`. */ 64 | predicate testCoversStmt(string testName, string stmtId) { 65 | exists(JsonObject test | 66 | test = this.getPropValue("tests").getElementValue(_) and 67 | test.getPropStringValue("testName") = testName and 68 | stmtId = test.getPropValue("coveredStatements").getElementValue(_).getStringValue() 69 | ) 70 | } 71 | 72 | /** Gets the status of test `testName`. */ 73 | string getTestStatus(string testName) { 74 | exists(JsonObject test | 75 | test = this.getPropValue("tests").getElementValue(_) and 76 | test.getPropStringValue("testName") = testName and 77 | result = test.getPropStringValue("status") 78 | ) 79 | } 80 | 81 | /** Gets the error message of test `testName`, if any. */ 82 | string getTestErrMsg(string testName) { 83 | exists(JsonObject test | 84 | test = this.getPropValue("tests").getElementValue(_) and 85 | test.getPropStringValue("testName") = testName and 86 | result = test.getPropValue("err").getPropValue("message").getStringValue() 87 | ) 88 | } 89 | 90 | /** Gets the error stack trace of test `testName`, if any. */ 91 | string getTestErrStack(string testName) { 92 | exists(JsonObject test | 93 | test = this.getPropValue("tests").getElementValue(_) and 94 | test.getPropStringValue("testName") = testName and 95 | result = test.getPropValue("err").getPropValue("stack").getStringValue() 96 | ) 97 | } 98 | 99 | /** Gets the error code of test `testName`, if any. */ 100 | string getTestErrCode(string testName) { 101 | exists(JsonObject test | 102 | test = this.getPropValue("tests").getElementValue(_) and 103 | test.getPropStringValue("testName") = testName and 104 | result = test.getPropValue("err").getPropValue("code").getStringValue() 105 | ) 106 | } 107 | 108 | override string toString() { result = getPackageName() } 109 | } 110 | 111 | /** A TestPilot-generated test stored in the report. */ 112 | class GeneratedTest extends File { 113 | ReportJson report; 114 | 115 | GeneratedTest() { this.getParentContainer() = report.getTestFolder() } 116 | 117 | /** Gets the report to which this test belongs. */ 118 | ReportJson getReport() { result = report } 119 | 120 | /** Gets the name of the package for which this test was generated. */ 121 | string getPackageName() { result = report.getPackageName() } 122 | 123 | /** 124 | * Holds if this test is non-trivial, i.e., it contains an assertion 125 | * that semantically depends on the package under test. 126 | */ 127 | predicate isNonTrivial() { 128 | exists(AssertionInGeneratedTest a | a.getFile() = this and a.isNonTrivial()) 129 | } 130 | 131 | /** Holds if this test covers the given statement. */ 132 | predicate coversStmt(string stmtId) { report.testCoversStmt(this.getBaseName(), stmtId) } 133 | 134 | string getStatus() { result = report.getTestStatus(this.getBaseName()) } 135 | 136 | /** Holds if this test passes. */ 137 | predicate passes() { this.getStatus() = "PASSED" } 138 | 139 | /** Holds if this test fails. */ 140 | predicate fails() { this.getStatus() = "FAILED" } 141 | 142 | /** Holds if this test fails with the given error message. */ 143 | predicate failsWith(string msg) { 144 | this.fails() and msg = report.getTestErrMsg(this.getBaseName()) 145 | } 146 | 147 | private predicate failsDueToInternal(ErrorCategory errorCategory) { 148 | errorCategory = "AssertionError" and 149 | report.getTestErrStack(this.getBaseName()).matches("%AssertionError%") 150 | or 151 | errorCategory = "FileSystemError" and 152 | report.getTestErrCode(this.getBaseName()) in [ 153 | "EEXIST", "EISDIR", "ENOENT", "ENOTEMPTY", "EACCES" 154 | ] 155 | or 156 | errorCategory = "CorrectnessError" and 157 | report 158 | .getTestErrStack(this.getBaseName()) 159 | .matches([ 160 | "%ReferenceError%", "%TypeError%", "%done() invoked with non-Error%", 161 | "%Maximum call stack size exceeded%", 162 | ]) 163 | or 164 | errorCategory = "CorrectnessError" and 165 | report.getTestErrMsg(this.getBaseName()).matches("%Invalid syntax%") 166 | or 167 | errorCategory = "TimeoutError" and 168 | report.getTestErrCode(this.getBaseName()) = "ERR_MOCHA_TIMEOUT" 169 | } 170 | 171 | predicate failsDueTo(ErrorCategory errorCategory) { 172 | this.failsDueToInternal(errorCategory) 173 | or 174 | this.fails() and 175 | not this.failsDueToInternal(_) and 176 | errorCategory = "OtherError" 177 | } 178 | } 179 | 180 | /** 181 | * An assertion in a TestPilot-generated test. 182 | */ 183 | class AssertionInGeneratedTest extends DataFlow::Node { 184 | GeneratedTest test; 185 | 186 | AssertionInGeneratedTest() { 187 | this = API::moduleImport("assert").getASuccessor*().getACall() and 188 | test = this.getFile() 189 | } 190 | 191 | /** 192 | * Gets a node in the (intra-procedural) backwards slice of this assertions. 193 | */ 194 | DataFlow::Node getANodeInBackwardsSlice() { 195 | result = this 196 | or 197 | // follow data flow 198 | DataFlow::localFlowStep(result, this.getANodeInBackwardsSlice()) 199 | or 200 | // follow taint flow 201 | TaintTracking::sharedTaintStep(result, this.getANodeInBackwardsSlice()) 202 | or 203 | // follow syntactic nesting: if an expression is in the backwards slice, 204 | // then so are all its subexpressions 205 | result.asExpr().getParent+() = this.getANodeInBackwardsSlice().asExpr() 206 | or 207 | // heuristic to approximate flow through callbacks: for `foo(bar, cb)` we 208 | // add both `foo` and `bar` to the backwards slice of any node in the callback 209 | // function `cb` to approximate inter-procedural data and control dependencies 210 | exists(DataFlow::InvokeNode call | 211 | call.getABoundCallbackParameter(_, _) = this.getANodeInBackwardsSlice() 212 | or 213 | exists(Function cb | cb = call.getAnArgument().getAFunctionValue().getFunction() | 214 | cb = this.getANodeInBackwardsSlice().getContainer() 215 | ) 216 | | 217 | result = call.getAnArgument() or 218 | result = call.getCalleeNode() 219 | ) 220 | or 221 | // heuristic to approximate side effects: for `foo(bar)` we assume that 222 | // `foo` may update any property of `bar`, and so we include `foo` in the 223 | // backwards slice of any other uses of `bar` 224 | exists(DataFlow::InvokeNode call, DataFlow::SsaDefinitionNode v | 225 | call.getAnArgument().getAPredecessor() = v and 226 | v = this.getANodeInBackwardsSlice() and 227 | result = call.getCalleeNode() 228 | ) 229 | } 230 | 231 | /** 232 | * Holds if this assertion is non-trivial, i.e., it semantically depends on 233 | * the package under test. 234 | */ 235 | predicate isNonTrivial() { 236 | exists(Require req | req = this.getANodeInBackwardsSlice().asExpr() | 237 | req.getImportedPath().getValue() = test.getPackageName() 238 | ) 239 | } 240 | } 241 | 242 | class PromptJson extends JsonObject { 243 | ReportJson report; 244 | 245 | PromptJson() { 246 | this.isTopLevel() and 247 | this.getFile().getBaseName() = "prompts.json" and 248 | this.getFile().getParentContainer() = report.getFile().getParentContainer() 249 | } 250 | 251 | /** Gets the report to which this prompt belongs. */ 252 | ReportJson getReport() { result = report } 253 | } 254 | 255 | class Prompt extends JsonObject { 256 | PromptJson prompts; 257 | 258 | Prompt() { this = prompts.getPropValue("prompts").(JsonArray).getElementValue(_) } 259 | 260 | ReportJson getReport() { result = prompts.getReport() } 261 | 262 | GeneratedTest getATest(boolean passes, boolean nontrivial) { 263 | exists(string testName | 264 | testName = this.getPropValue("tests").(JsonArray).getElementStringValue(_) and 265 | result = getReport().getTest(testName) 266 | ) and 267 | (if result.passes() then passes = true else passes = false) and 268 | (if result.isNonTrivial() then nontrivial = true else nontrivial = false) 269 | } 270 | 271 | int getId() { result = this.getPropValue("id").getIntValue() } 272 | 273 | private JsonObject getProvenanceInfo() { 274 | result = this.getPropValue("provenance") or 275 | result = this.getPropValue("provenance").(JsonArray).getElementValue(_) 276 | } 277 | 278 | predicate isRefinedFrom(Prompt originalPrompt, GeneratedTest test, string refiner) { 279 | exists(JsonObject provenance | provenance = getProvenanceInfo() | 280 | refiner = provenance.getPropStringValue("refiner") and 281 | test = this.getReport().getTestById(provenance.getPropValue("test").getIntValue()) and 282 | originalPrompt.getId() = provenance.getPropValue("originalPrompt").getIntValue() and 283 | originalPrompt.getReport() = this.getReport() 284 | ) 285 | } 286 | 287 | predicate isRefinedFrom(Prompt originalPrompt, string refiner) { 288 | this.isRefinedFrom(originalPrompt, _, refiner) 289 | } 290 | 291 | string getAProvenance() { 292 | not this.isRefinedFrom(_, _) and 293 | result = "" 294 | or 295 | exists(Prompt originalPrompt, string refiner | this.isRefinedFrom(originalPrompt, refiner) | 296 | result = originalPrompt.getAProvenance() + "," + refiner 297 | ) 298 | } 299 | 300 | /** Holds if this prompt can be generated without the given refiner. */ 301 | predicate doesNotNeed(Refiner refiner) { 302 | exists(string provenance | provenance = this.getAProvenance() | 303 | not provenance.regexpMatch(".*\\b\\Q" + refiner + "\\E\\b.*") 304 | ) 305 | } 306 | 307 | override string toString() { result = prompts.getReport() + ":prompt" + this.getId() } 308 | } 309 | 310 | class Refiner extends string { 311 | Refiner() { any(Prompt p).isRefinedFrom(_, this) } 312 | } 313 | 314 | /** A symbolic representation of a cause for test failure. */ 315 | class ErrorCategory extends string { 316 | ErrorCategory() { 317 | this = "AssertionError" or 318 | this = "FileSystemError" or 319 | this = "CorrectnessError" or 320 | this = "TimeoutError" or 321 | this = "OtherError" 322 | } 323 | } 324 | --------------------------------------------------------------------------------