├── .github └── workflows │ ├── barbosa23.yml │ ├── barbosa23flaky.yml │ ├── end2end.yml │ ├── end2endCustomContainers.yml │ └── smoketest.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── Tutorial.md ├── build.sh ├── configs ├── QL_output_config.json ├── README.md ├── build_only_config.json ├── custom_install_only.json ├── default_filter_config.json ├── verbose.json └── verbose_only.json ├── get_rel_project_reqs.js ├── input_list_scripts ├── README.md ├── get_dep_repos.sh ├── get_package_deps.js ├── get_package_repo_link.py └── package.json ├── output_proc_scripts ├── README.md ├── count_tests_run.py └── get_json_results.py ├── qlpack.yml ├── runDocker.sh ├── runParallelGitRepos.sh ├── runParallelGitReposDocker.sh ├── run_verbose_for_repo_and_config.sh ├── src ├── TestInfo.py ├── diagnose_github_repo.py ├── diagnose_npm_package.py ├── get_repo_links.py ├── middlewares.py ├── output_parsing │ └── test_output_proc.py ├── runQuery.sh └── test_JS_repo_lib.py └── tests ├── Dockerfile ├── README.md ├── build.sh ├── memfs__results_expected.json ├── prepTestDocker.sh ├── runTestDocker.sh └── runTests.sh /.github/workflows/barbosa23.yml: -------------------------------------------------------------------------------- 1 | name: Test on all of Barbosa23 JS (well, first 250ish) 2 | 3 | on: 4 | workflow_dispatch 5 | 6 | env: 7 | PROJECTS_JSON: | 8 | { projects: [ 9 | {"project_url": "adriantoine/enzyme-to-json", "project_sha": "7d90cdf5f1878815a46b3a53f4e1e1b63418b38f"}, 10 | {"project_url": "agenda/agenda", "project_sha": "41a2b3793400073f564c37f7d2d0ec2d7e237bf2"}, 11 | {"project_url": "airbnb/mocha-wrap", "project_sha": "e6bf4f6cff6d40425b2af323186cc1e69d05a270"}, 12 | {"project_url": "allenmyao/canvas-graph-creator", "project_sha": "fadcd223a82ff665ee34685a1845d8087b997ee3"}, 13 | {"project_url": "americanexpress/one-app-cli", "project_sha": "23a992558cc32cdc8a51c11e4fe80c2e2924aaf9"}, 14 | {"project_url": "amireh/happypack", "project_sha": "e45926e9754f42098d882ff129269b15907ef00e"}, 15 | {"project_url": "andreypopov/node-red-contrib-deconz", "project_sha": "7a7cdb10e4c9430a10dfe28fc9295abeaf107af5"}, 16 | {"project_url": "andyholmes/gnome-shell-extension-gsconnect", "project_sha": "370493b76ab4ee7f30ba154b1e5b554a02413703"}, 17 | {"project_url": "angular-translate/angular-translate", "project_sha": "1114534c064eddfb77fc4243b0deb61c37f5f41f"}, 18 | {"project_url": "angular-ui/ui-sortable", "project_sha": "e763b5765eea87743c8463ddf045a53015193c20"}, 19 | {"project_url": "apache/cordova-lib", "project_sha": "797286963eb526a2f5ad673291ff5733d6fb275b"}, 20 | {"project_url": "apache/incubator-ponymail-foal", "project_sha": "f5addb5824e0c4d08474b22840ce556deade48f6"}, 21 | {"project_url": "apiaryio/dredd", "project_sha": "5ab7b162afbbd8881cd716f27627dc2d05213eb7"}, 22 | {"project_url": "apiaryio/dredd-transactions", "project_sha": "57477169b82a2980cb279c80a9caae5825754826"}, 23 | {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"}, 24 | {"project_url": "appium/appium-desktop", "project_sha": "12a988aa08b9822e97056a09486c9bebb3aad8fe"}, 25 | {"project_url": "atom-community/atom", "project_sha": "0f7c5c14eaad9643bdc16cf80579b457baa2dd8a"}, 26 | {"project_url": "atom/atom", "project_sha": "1c3bd35ce238dc0491def9e1780d04748d8e18af"}, 27 | {"project_url": "atom/find-and-replace", "project_sha": "7871ad213e2c09f99e003c8f97cd7d4b7f9f2d82"}, 28 | {"project_url": "aurelia/cli", "project_sha": "82091bbeebcc4b08c9929e37a8cd91c5b5025791"}, 29 | {"project_url": "Automattic/kue", "project_sha": "c5647b1a8890319169fa4ce2cf4ed4122c1c704a"}, 30 | {"project_url": "avajs/ava", "project_sha": "568fe40c987dd6c593dfbcf4144d1d1627955d46"}, 31 | {"project_url": "axa-ch-webhub-cloud/pattern-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"}, 32 | {"project_url": "axa-ch/patterns-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"}, 33 | {"project_url": "azachar/protractor-screenshoter-plugin", "project_sha": "989f8e0b52b986f7ddb07831b5b92dca6dceeb07"}, 34 | {"project_url": "Azure/azure-iot-sdk-node", "project_sha": "450c672001eb96d99587eaeae5fe75ab0912e5d6"}, 35 | {"project_url": "babel/babel-eslint", "project_sha": "b5b9a09edbac4350e4e51033a4608dd95dad1f67"}, 36 | {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"}, 37 | {"project_url": "bbc/simorgh", "project_sha": "4c7e7d1ecc525dd62fb14bd98035a5e739c14290"}, 38 | {"project_url": "bcgov/name-examination", "project_sha": "b55fc1127e0db98dc4fe780ad80831f4b1a2872e"}, 39 | {"project_url": "bcoin-org/bcoin", "project_sha": "b0058696cc10c8f9b17190b31fd2cd907d85d047"}, 40 | {"project_url": "beakerbrowser/beaker", "project_sha": "764bdefeeed9558dbf10aec77df262a896f57236"}, 41 | {"project_url": "bee-queue/bee-queue", "project_sha": "f6d901308f3b6433f2531edc4a9ac354aab434e5"}, 42 | {"project_url": "bkimminich/juice-shop", "project_sha": "b156c969d7bc8f24544f162f482c6285f58b4285"}, 43 | {"project_url": "blocknative/assist", "project_sha": "3fb619e3994752eacbddba4078d2bf0cbc7e2c9c"}, 44 | {"project_url": "bmazzarol/atom-plsql-linter", "project_sha": "02f6a1d48c4b5dbaa375dfb13d52703fc14b90a0"}, 45 | {"project_url": "BookMyComics-Developers/BookMyComics", "project_sha": "1efe6adb3490d7f62e7b31e3d75ac15b3b981875"}, 46 | {"project_url": "brave/brave-browser", "project_sha": "870d381ff8b08cb70d2b9fdea4b320d17bfe68f7"}, 47 | {"project_url": "brion/ogv.js", "project_sha": "5ce404a6aa8f53b7cef220916b89e613ac58fd17"}, 48 | {"project_url": "busterjs/buster", "project_sha": "5e20f3e23aeb7ea996be7a669e520c054b8f1035"}, 49 | {"project_url": "CalebMorris/react-moment-proptypes", "project_sha": "89a61c17250ea7b71d55d2855f6739ae4071529a"}, 50 | {"project_url": "CartoDB/cartodb", "project_sha": "9518ec6917e4091a56dc7b9d5fbf089bcb003271"}, 51 | {"project_url": "cerner/terra-core", "project_sha": "15458289ff022f302144932e047a6669b6c461a5"}, 52 | {"project_url": "cerner/terra-dev-site", "project_sha": "80a0e471548f553b7e58e30a2a0b6e8c0e7682fc"}, 53 | {"project_url": "cloudfoundry-attic/cf-abacus", "project_sha": "68aad9e2d497335d3a2e0da736bb9f01fe54dfb3"}, 54 | {"project_url": "cncf/landscapeapp", "project_sha": "62fa27892cd9e9095567c0c7e5d84fd514149cd9"}, 55 | {"project_url": "codeceptjs/CodeceptJS", "project_sha": "3fb39ae1d4f9b00438b1398cefba0dc677260aeb"}, 56 | {"project_url": "codetheweb/tuyapi", "project_sha": "905670c7cf7a8ad5756ea08eeca115178121423b"}, 57 | {"project_url": "covidwatchorg/portal", "project_sha": "95e36eeb777fca76318b5b0680c82f43f502fee3"}, 58 | {"project_url": "cryptee/web-client", "project_sha": "10f96daff7214a0e5afb71e56eed7256e59e17b0"}, 59 | {"project_url": "ctrlplusb/react-universally", "project_sha": "83d533a9c780716d18f034f7fb52dbd3a1c4051b"}, 60 | {"project_url": "cypress-io/cypress-example-recipes", "project_sha": "292325e6638bb4626861bc2f6df99d26ab8e7bff"}, 61 | {"project_url": "DataDog/dd-trace-js", "project_sha": "71a5288dea5df31c6a492ce22ff8169552548d47"}, 62 | {"project_url": "DeComX/pantheon", "project_sha": "deepforge-dev - deepforge"}, 63 | {"project_url": "deepforge-dev/deepforge", "project_sha": "f9cb1ff12644f64c01ca4d71ca66e6e22506b084"}, 64 | {"project_url": "dhis2/ui", "project_sha": "625c9c9391cdc6f625c927d20a39eef37f550a4a"}, 65 | {"project_url": "digidem/mapeo-core", "project_sha": "fd782a55cebb5f54a45f2f042287218c849b5f35"}, 66 | {"project_url": "dmitriz/min-karma", "project_sha": "8f1bcd25315d34a304d0d358166b9cb95a8a7871"}, 67 | {"project_url": "Dogfalo/materialize", "project_sha": "824e78248b3de81e383445e76ffb04cc3264fe7d"}, 68 | {"project_url": "domvm/domvm", "project_sha": "67de1a0cdf1879ad87926dafde0b8961f660c906"}, 69 | {"project_url": "duckduckgo/tracker-radar-collector", "project_sha": "3e9f49e46e5051e9f3d26bcd3be054447af887e4"}, 70 | {"project_url": "dukecon/dukecon_pwa", "project_sha": "127e8425ccff201a394448864407403c4e80d691"}, 71 | {"project_url": "dustinspecker/generator-ng-poly", "project_sha": "53f0beec9ad9a33a9f6b47649ca34a4d6bae95f8"}, 72 | {"project_url": "EFForg/privacybadger", "project_sha": "6f81b217e5717c46867cfec9e9b378da9354a84a"}, 73 | {"project_url": "elastic/apm-agent-nodejs", "project_sha": "9f13472d69523109d69315c6bb212957e46809cb"}, 74 | {"project_url": "elifesciences/elife-xpub", "project_sha": "bccea1e199bd213eef8ad03fca33d66727e34ccd"}, 75 | {"project_url": "ember-app-scheduler/ember-app-scheduler", "project_sha": "fb0b4e0075cf8847664e5459cd59bf74a0a1d379"}, 76 | {"project_url": "ember-batcher/ember-batcher", "project_sha": "231fb12ae51fde5e42704fa0e1daece8dd371532"}, 77 | {"project_url": "ember-cli/ember-cli", "project_sha": "b851c0edcae99701335e3e90efe0c225951c4f0b"}, 78 | {"project_url": "emberjs/ember.js", "project_sha": "3fa9068831b1e3cf8e594647a880adc0809861f3"}, 79 | {"project_url": "eobrain/bajel", "project_sha": "ecbfe18a990e97f677e522a7240617df29d47cd6"}, 80 | {"project_url": "eslint/eslint", "project_sha": "9e3d77cba65d0e38e07996e57961fb04f30d9303"}, 81 | {"project_url": "ether/etherpad-lite", "project_sha": "7656c6b9f195a79bb07bd3b77b55de1393ab71f4"}, 82 | {"project_url": "ethereum/web3.js", "project_sha": "f8a2533c2b09ce0a62f8414f2f6eed83ab78ca1f"}, 83 | {"project_url": "ExpressGateway/express-gateway", "project_sha": "a294cac39c98d66f5750c424a24e0bb8ce351c1c"}, 84 | {"project_url": "facebook/metro", "project_sha": "c6a94bc170cf95a6bb21b5638929ec3311a9a5b7"}, 85 | {"project_url": "facebook/prepack", "project_sha": "5beedbe85bd5b9d2de1264abafbb3b76f8584297"}, 86 | {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"}, 87 | {"project_url": "fastify/fastify", "project_sha": "d1ad6c17ce9731f1bc28377318b010966ca339cd"}, 88 | {"project_url": "flow-typed/flow-typed", "project_sha": "0e28de5e8a69def522d61f06ddffb624d465bceb"}, 89 | {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"}, 90 | {"project_url": "FormidableLabs/nodejs-dashboard", "project_sha": "885fc96fec262b668da9282f57374966f7512b76"}, 91 | {"project_url": "freeboardgames/FreeBoardGames.org", "project_sha": "b11dbaa3715d71605bced4c8f04a40a79bd7cfef"}, 92 | {"project_url": "freedomjs/freedom", "project_sha": "9638e840aec9598c4d60383ed22444c525aefbf5"}, 93 | {"project_url": "freedomjs/freedom-for-chrome", "project_sha": "0154d345e99ac781460a790a31772c4352cb41b6"}, 94 | {"project_url": "freedomjs/freedom-for-firefox", "project_sha": "3a2922f378a9dbbb58f302b0216b56ec23cf17b3"}, 95 | {"project_url": "getgauge/taiko", "project_sha": "532c62c69da79852ef3cf8abd2325d2fff903a15"}, 96 | {"project_url": "GioBonvi/GoogleContactsEventsNotifier", "project_sha": "7e657a9e606f449fef22feae68d448d11083122b"}, 97 | {"project_url": "google/shaka-player", "project_sha": "a543b80648f429524c522295b0f4f60039c2e0ea"}, 98 | {"project_url": "googleads/videojs-ima", "project_sha": "11ecbefa37fbdbd6877fece63c38c11338b9e913"}, 99 | {"project_url": "GoogleChrome/lighthouse", "project_sha": "b981a38e7b3becc512f0a7985b1d2a64320da235"}, 100 | {"project_url": "GoogleChrome/workbox", "project_sha": "ee62b5b5b9ed321af457a2d962b2a34196a80263"}, 101 | {"project_url": "hack4impact-uiuc/life-after-hate", "project_sha": "9cad8555b52ff6bd98c7d15fae456e2f8b7a2a8a"}, 102 | {"project_url": "hapijs/lab", "project_sha": "aaaebb95108d3fdcb264a56e836c3459380844b1"}, 103 | {"project_url": "hapijs/nes", "project_sha": "977750a158e0b0105c719e0e2d4bd354154bf0a8"}, 104 | {"project_url": "hapijs/subtext", "project_sha": "ae0a2dd48ab8c6e2b8ebdebbc31baddb6b4c49b7"}, 105 | {"project_url": "hapipal/hpal", "project_sha": "4661f17ac8bdb1d3915695b2f819ff2336730131"}, 106 | {"project_url": "hapipal/schwifty", "project_sha": "088088572e7aac82b77a78d9c8ed05e7f1d5e957"}, 107 | {"project_url": "Haufe-Lexware/wicked.haufe.io", "project_sha": "1efadeabae7b7ccb4b17473e9aa5d0af60796adb"}, 108 | {"project_url": "hden/node-serf", "project_sha": "d176dede5c87e0285c383f7bbda3848584d6a2ad"}, 109 | {"project_url": "HSLdevcom/transitlog-ui", "project_sha": "316a7843c2a8e6d66db7f4c9181f775f95f926ed"}, 110 | {"project_url": "html-next/vertical-collection", "project_sha": "fd928512a33d44155a724ed65c5ba21cf7950d86"}, 111 | {"project_url": "Human-Connection/Human-Connection", "project_sha": "72a8f3d7f567442ca5e191672abfb47ea1b825a6"}, 112 | {"project_url": "hyperledger/cactus", "project_sha": "334612d251c56811a844b3308dc1561dcd6fc460"}, 113 | {"project_url": "IBM-Cloud/gp-js-client", "project_sha": "8ac9e9b0ebee3264d446d68ff487ef995173bff0"}, 114 | {"project_url": "ikydd/blackat", "project_sha": "26a8ba8dac8be027978b5fc046131936aadb76ec"}, 115 | {"project_url": "IMA-WorldHealth/bhima", "project_sha": "f76ac0085b2566d249cdd6ab135950faf0e10da3"}, 116 | {"project_url": "ing-bank/lion", "project_sha": "02e61285ddc83e4cb2ec7d2acc6d6a6620a94924"}, 117 | {"project_url": "iodide-project/iodide", "project_sha": "f9dd78a725ce1a2aa96784a46b527b740605431b"}, 118 | {"project_url": "ipfs-inactive/js-ipfs-http-client", "project_sha": "995abb41b83c8345b16cba67151e9ccb9cbea4de"}, 119 | {"project_url": "israelroldan/grunt-ssh", "project_sha": "7175b5548291bb2105a33a45d772573cb888430d"}, 120 | {"project_url": "istanbuljs/nyc", "project_sha": "ab7c53b2f340b458789a746dff2abd3e2e4790c3"}, 121 | {"project_url": "jaggedsoft/node-binance-api", "project_sha": "950d773a5f2c3a61c6e29b53e3af57594921a239"}, 122 | {"project_url": "jamesshore/quixote", "project_sha": "6b5c07b4d202d44e0ee6ecd99c22df4547558c17"}, 123 | {"project_url": "jamhall/s3rver", "project_sha": "f834192dbb07da4548b48c95066bae50cfaac819"}, 124 | {"project_url": "JeroenDeDauw/Maps", "project_sha": "f9bec919e77d671c4e96f9aa16d0452d17f700c7"}, 125 | {"project_url": "jivid/akobi", "project_sha": "ccd8d4de55b2066db9c11f9f00ffeed36ea33673"}, 126 | {"project_url": "jorgebucaran/hyperapp", "project_sha": "c3717e3ff78b6fa8663575d34d330d68929a0974"}, 127 | {"project_url": "jrcasso/mean-demo", "project_sha": "31f3e21420fd5ef13cc7555a56e3106a31dd4a36"}, 128 | {"project_url": "json-schema-faker/json-schema-faker", "project_sha": "9bbe0e895cc9ebce939d5f358385f151d72c739c"}, 129 | {"project_url": "jwplayer/jwplayer", "project_sha": "30353cd1e1f3017a96ef2854ef758fb4f479cd7a"}, 130 | {"project_url": "kaliber5/ember-bootstrap", "project_sha": "c92d1898b715da0ebd534a813a4ce592d1ed115c"}, 131 | {"project_url": "kategengler/ember-cli-code-coverage", "project_sha": "46dc079ab518bddc325fb305790d58adf2c28aae"}, 132 | {"project_url": "keystonejs/keystone", "project_sha": "67f0f2ce7fa58288cf06d198e4b1a5c51d265bcf"}, 133 | {"project_url": "kgiszewski/Archetype", "project_sha": "2e0bce99b9f386aa24a56be02fca8cd7388b39bd"}, 134 | {"project_url": "kiwicom/smart-faq", "project_sha": "2131be6290020a11dc6ad236eb82c5bde75945d8"}, 135 | {"project_url": "Lambda-School-Labs/labs-spa-starter", "project_sha": "2d1bbf41db2a97574c62cc3d6745cc0b2e644ead"}, 136 | {"project_url": "lekoder/consul-kv-object", "project_sha": "5cf3c44f416d28d11c567c9caab86b27e3e0f0a0"}, 137 | {"project_url": "liferay/senna.js", "project_sha": "fd89ca02de0ad57e7697c5088f4e490f8d181958"}, 138 | {"project_url": "linkeddata/dokieli", "project_sha": "52f9c3cc8519d45339996f2a926bae18c37bf5d8"}, 139 | {"project_url": "LLK/scratch-vm", "project_sha": "e4bb21f1817a2b7bbca9be19da6eba529291ed0c"}, 140 | {"project_url": "magda-io/magda", "project_sha": "754ec4cf2aff491549007cd82f676da4c3759061"}, 141 | {"project_url": "magento/pwa-studio", "project_sha": "836aa40608465ccc28066d4fbdddee3a6a560b75"}, 142 | {"project_url": "marcos8896/nutrition-care-node-api", "project_sha": "20b08a443d4d7714dc8ea137b3ffcce51f5524c0"}, 143 | {"project_url": "marionettejs/backbone.marionette", "project_sha": "85936fc518dd7bb0934faf231123172e3eee0169"}, 144 | {"project_url": "marklogic-community/marklogic-samplestack", "project_sha": "5449924fe9abd1712d3ef20ca2f25f2e291578e0"}, 145 | {"project_url": "material-components/material-components-web", "project_sha": "a9ff9866f237fbeebe94e655ae578b68ce675a04"}, 146 | {"project_url": "mbland/custom-links", "project_sha": "3e58bb2b4ea335451489d9b81226a414d7352c3f"}, 147 | {"project_url": "mcollina/autocannon", "project_sha": "ba3a2124fa68be6f263e860001be419d71de39d9"}, 148 | {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"}, 149 | {"project_url": "microsoft/ChakraCore", "project_sha": "c3ead3f8a6e0bb8e32e043adc091c68cba5935e9"}, 150 | {"project_url": "mikakaraila/node-red-contrib-opcua", "project_sha": "aec7272f4f7554a7473daf19136e6fa8c9dfc681"}, 151 | {"project_url": "milieuinfo/webcomponent-vl-ui-wizard", "project_sha": "efecc0c4f3659ac1348ae456604534d42e6b90b7"}, 152 | {"project_url": "mishoo/UglifyJS", "project_sha": "f0ca9cfbe65efc919149e7cd74cedd186d6413ee"}, 153 | {"project_url": "mitodl/open-discussions", "project_sha": "462c242eab04f68552e80a6f416c18c4b0b57cb0"}, 154 | {"project_url": "mocha-parallel/mocha-parallel-tests", "project_sha": "d1b2e88fa6bad71d0a5d7487809fcb4be4030b9d"}, 155 | {"project_url": "mohsen1/yawn-yaml", "project_sha": "aab6ee95ead9da9b7f1b1bbfb7325b2e90d7d3f5"}, 156 | {"project_url": "moorara/microservices-demo", "project_sha": "bc16c5eeb6091392e62d0c260d2acfe48aef4b06"}, 157 | {"project_url": "mozilla/blok", "project_sha": "faac2281c48cd226b4fb8c4e22de588a02328c31"}, 158 | {"project_url": "mui-org/material-ui", "project_sha": "6e8b99d133025c9e785a778a183fa81383998a42"}, 159 | {"project_url": "n5ro/aframe-extras", "project_sha": "5c20172a159aba54e7b6f7f243a864f76905448e"}, 160 | {"project_url": "nasa-gibs/worldview", "project_sha": "c4769a03394676dd4ec7126cc14a7c67dc7e4eaf"}, 161 | {"project_url": "NativeScript/nativescript-cli", "project_sha": "eb918011d6f0be9a8ccb6b569628e3960fd4f8b9"}, 162 | {"project_url": "nccgroup/tracy", "project_sha": "6ce4714a3b3b407503cecd8c9842132fe4dc37e4"}, 163 | {"project_url": "neffo/earth-view-wallpaper-gnome-extension", "project_sha": "016c982dccd9e7b454b84e9f50b4accc1b4348d6"}, 164 | {"project_url": "NetsBlox/NetsBlox", "project_sha": "419ca83482c562a0cfa5af1d2dd9907b7387f7ef"}, 165 | {"project_url": "nightwatchjs/nightwatch", "project_sha": "4b09cb57c8a9fb29d6b6795e59c64b4942bddf67"}, 166 | {"project_url": "noble/bleno", "project_sha": "72028bc995d55cb9dcf223f9b0ffce563d091212"}, 167 | {"project_url": "nock/nock", "project_sha": "8a38f41a28b36fef50d5723daa94cf21a6490fc5"}, 168 | {"project_url": "node-alarm-dot-com/homebridge-node-alarm-dot-com", "project_sha": "26516177a2324aa53b0cfbb8af52fb1354be78be"}, 169 | {"project_url": "nodejs/citgm", "project_sha": "460c3a008f1c33bda2e136631d0162479419ed36"}, 170 | {"project_url": "nodejs/node-chakracore", "project_sha": "770c8dcd1bc3e0fce2d4497b4eec3fe49d829d43"}, 171 | {"project_url": "nodejs/undici", "project_sha": "c415fbbb59e2b898c5db6a681265cf3da865d02c"}, 172 | {"project_url": "npm/cli", "project_sha": "29622c1349b38173924058a1fb0ede9edf8a5f6f"}, 173 | {"project_url": "NSWSESMembers/availability-poc", "project_sha": "7ebc17b6005a3c1573e6c68bd5411b0657c98f71"}, 174 | {"project_url": "nwjs-community/nw-builder", "project_sha": "a1d4fb5148255e2b6fa5bce4a2167c9be8cc71d6"}, 175 | {"project_url": "observablehq/plot", "project_sha": "4d3cd1586e7412b95687157d12c792fde84a2229"}, 176 | {"project_url": "ocadotechnology/rapid-router", "project_sha": "38adf70a3e76a05fa814a7d3c0e1c61e4ba125c2"}, 177 | {"project_url": "ONSdigital/eq-author-app", "project_sha": "8bb1621cd4973281730a38378765b1718b08ca54"}, 178 | {"project_url": "Ontotext-AD/graphdb.js", "project_sha": "d0880dabf966e82def44537a720bf620d6d29f5e"}, 179 | {"project_url": "open-wc/open-wc", "project_sha": "57ddb3ccfff6b00468d3a7ebabbc15cfe966f7a9"}, 180 | {"project_url": "OpenEnergyPlatform/oeplatform", "project_sha": "1ce978f8faade3effe4cf7d3eec7522e990df910"}, 181 | {"project_url": "openseadragon/openseadragon", "project_sha": "ebab356c207e626b6622f88ffcb0cd28b918f85d"}, 182 | {"project_url": "openstyles/stylus", "project_sha": "50a0a115d1c6587d221f3253feeb4cb88b6f5336"}, 183 | {"project_url": "Opentrons/opentrons", "project_sha": "f8f7e699d512f59e1a2f4a9969428744e86a6a22"}, 184 | {"project_url": "OpenZeppelin/openzeppelin-contracts", "project_sha": "604025400f9be5c32581bb6ab03a46bbc09c5562"}, 185 | {"project_url": "OriginProtocol/origin", "project_sha": "57c55c023188e3a53cb9ee3dfafe0bf3210e0cf8"}, 186 | {"project_url": "owncloud/contacts", "project_sha": "efb06fef530dbf1812cbb98d651ec87680de97a1"}, 187 | {"project_url": "palantir/eclipse-typescript", "project_sha": "007579ba58d2979a5989caf04733a9d5dfcc56de"}, 188 | {"project_url": "particle-iot/particle-cli", "project_sha": "07dfa4e7d928d9641be368881b2216c6fb017c6c"}, 189 | {"project_url": "perfsonar/toolkit", "project_sha": "d4c8906acdf7d8be49cf37b59939748945e526d9"}, 190 | {"project_url": "pingyhq/pingy-cli", "project_sha": "53721434b698f53ba195c4824ca8d1f87ea8b60c"}, 191 | {"project_url": "poanetwork/tokenbridge", "project_sha": "961b12b9f3545830a04044e109762277efcea6ef"}, 192 | {"project_url": "postmanlabs/newman", "project_sha": "89941554304362d0cfec2914d134f738348b27c5"}, 193 | {"project_url": "postmanlabs/postman-runtime", "project_sha": "7855b3ae5858734bfb6f0c5985592d8b2957f4d1"}, 194 | {"project_url": "pouchdb/pouchdb", "project_sha": "546c8bb696872f86816574d02d47131ace0b4d18"}, 195 | {"project_url": "PowerlineApp/powerline-mobile", "project_sha": "2030817dc80a07f3cfc2129bd830ce33ab50373d"}, 196 | {"project_url": "premasagar/sqwidget", "project_sha": "7edc6d21997bb18da7daa59068926a082028d6f0"}, 197 | {"project_url": "PrismJS/prism", "project_sha": "59e5a3471377057de1f401ba38337aca27b80e03"}, 198 | {"project_url": "probcomp/metaprob", "project_sha": "43c4bea80772ed8b2baa51cd5ac6c593a34a3a8b"}, 199 | {"project_url": "ProjectMirador/mirador", "project_sha": "3c121dbe99bae4eab910cb2df00e93904bc123ea"}, 200 | {"project_url": "Quicksaver/Tab-Groups", "project_sha": "29ea6517e73eb5d58b2f0b9fc2d65d589d910e8a"}, 201 | {"project_url": "regl-project/regl", "project_sha": "3d90d57d473b5dee6680dc97897f4a9fba465501"}, 202 | {"project_url": "reportportal/service-ui", "project_sha": "049abcb8fc70ee131625914e9da4a748e23d2230"}, 203 | {"project_url": "restify/node-restify", "project_sha": "89e7ac81a4cc885d153df6f07d5cf35ed75fd4d0"}, 204 | {"project_url": "rtfeldman/node-test-runner", "project_sha": "16cd4b9c8e5dab3ce297039f5d72d372bdd63de9"}, 205 | {"project_url": "ruiquelhas/blaine", "project_sha": "a69cdad6e59ebb19493018eacb7b7602f2225ce1"}, 206 | {"project_url": "ruiquelhas/copperfield", "project_sha": "fe5629ed8f5edea740ca4917dfac6a779e644b45"}, 207 | {"project_url": "ruiquelhas/electron-recipes", "project_sha": "9bcfc2520ad383c1e5bebe9c427214cab1d0a0da"}, 208 | {"project_url": "ruiquelhas/fischbacher", "project_sha": "35eb4dcf0225a8899e13a3ab63c3e878d9d434ca"}, 209 | {"project_url": "ruiquelhas/henning", "project_sha": "ac75e0b1cebdbb123eccb05277bc5c663f8e6696"}, 210 | {"project_url": "ruiquelhas/houdin", "project_sha": "4a700f66748b3a57a1c1ab6ee7bbe425ce20c526"}, 211 | {"project_url": "ruiquelhas/lafayette", "project_sha": "038578c360b22ff846daa7b3e6e0aeb712b145b2"}, 212 | {"project_url": "ruiquelhas/thurston", "project_sha": "071f9ee5265f64f47335b428a498df22895e549c"}, 213 | {"project_url": "sampotts/plyr", "project_sha": "0c9759455cbfcce888c66925c3b457ce06cee31e"}, 214 | {"project_url": "scalableminds/webknossos", "project_sha": "b91b15ff4180b2288c40ad9e3a86678258dcd5c9"}, 215 | {"project_url": "scality/Arsenal", "project_sha": "96cbaeb821d8045cbe8eabd00092290e13e46784"}, 216 | {"project_url": "SeleniumBuilder/se-builder", "project_sha": "8230ad58a526d3eb905d32a780daeaea1fb56a55"}, 217 | {"project_url": "serverless/serverless", "project_sha": "17d64e6c94b88a5daf36f28a4fa192c231052cfb"}, 218 | {"project_url": "SGrondin/bottleneck", "project_sha": "b83528333ba4d27cf70b81cc2be12e09d7ff692f"}, 219 | {"project_url": "signalapp/Signal-Desktop", "project_sha": "bd14b74e638dce03928e08ffbe2a83a6c047406e"}, 220 | {"project_url": "sindresorhus/npm-name", "project_sha": "7aef07b69ed35f584e0a8bf6cece96750becaf00"}, 221 | {"project_url": "sindresorhus/serialize-error", "project_sha": "a212a8c3902fa1ff1fdef8f7625dd0cc6d6e89a1"}, 222 | {"project_url": "skarfacegc/FlowTrack2", "project_sha": "990a2566f30b8dd84a61ea1ff6f58076016a7796"}, 223 | {"project_url": "solid/node-solid-server", "project_sha": "bbb8d78df7e8908e20e3052ae6655722aa6fa6de"}, 224 | {"project_url": "SolidarityEconomyAssociation/sea-map", "project_sha": "17fa76b9b4070354c31faae81ba0162b8f27bf1b"}, 225 | {"project_url": "soscripted/sox", "project_sha": "4be396373c06bb8340d740089018e364729bec70"}, 226 | {"project_url": "sourcecred/sourcecred", "project_sha": "3da222ebe44c110f265063cfa99316ed5c1fa0b3"}, 227 | {"project_url": "spark-notebook/spark-notebook", "project_sha": "69174f3923d0564d2078c0e0c70125245157d5b5"}, 228 | {"project_url": "stanford-oval/thingengine-core", "project_sha": "b69f7b0166d256428a08ba2dac3fc3ca8dddf611"}, 229 | {"project_url": "stealjs/steal-tools", "project_sha": "05f60d58e3ee56dbb8428c83121fdb6ee2b1825c"}, 230 | {"project_url": "stellar/js-stellar-sdk", "project_sha": "52947e81e487edf179a6003efb40a1425a4f7ff2"}, 231 | {"project_url": "stimulusreflex/stimulus_reflex", "project_sha": "52aa993165a656eccbe2cefaca9f5388509d014d"}, 232 | {"project_url": "streamr-dev/network", "project_sha": "4cdabba71db0a6c531c63368d1a78361fff01dce"}, 233 | {"project_url": "strongloop/loopback", "project_sha": "13371fd2a138a6f39db77e5a455b3170e5d4a0f5"}, 234 | {"project_url": "studentinsights/studentinsights", "project_sha": "4bb09f97eb9c0473a9ac6ee076171de12855e721"}, 235 | {"project_url": "sumup-oss/circuit-ui", "project_sha": "00ceacbd82b6cd3a71592ea9d2da5b95892f965b"}, 236 | {"project_url": "superscriptjs/superscript", "project_sha": "5e3e1b51654a54518dfada17c0cd9dc146c8e48a"}, 237 | {"project_url": "sveltejs/kit", "project_sha": "c4476c6d106b41dd8e6badbbdd0128b78be49d5c"}, 238 | {"project_url": "tarantool/graphql.0", "project_sha": "05f39946299cb2f35a97be326b992aace0205eaf"}, 239 | {"project_url": "testem/testem", "project_sha": "42fe29451b187bd1cd1e546228fa1bfbe11084f3"}, 240 | {"project_url": "thaliproject/jxcore", "project_sha": "d3ccd242a592416b6537dfea8ce539bd6208dd54"}, 241 | {"project_url": "thejoshwolfe/snakefall", "project_sha": "62bdfe3718f86ef85fc8c11e600bf621fa2a586c"}, 242 | {"project_url": "themgoncalves/react-loadable-ssr-addon", "project_sha": "2036a6f12e9048d8a6e3eb0a8097455fa0fe1ebc"}, 243 | {"project_url": "TheScienceMuseum/collectionsonline", "project_sha": "ef486c650bce9f2dccf25b7188dbe986d4b63c3c"}, 244 | {"project_url": "tmijs/tmi.js", "project_sha": "3904ae743a12b984aa1a175740e8b5bae08a03e4"}, 245 | {"project_url": "transloadit/uppy", "project_sha": "f07697e7f45e471ca16bac8751fa7221d9445605"}, 246 | {"project_url": "tristanHessell/mess-around", "project_sha": "19cdf7aa58eaf165a88ac7a3954fc7a33e5685bc"}, 247 | {"project_url": "trufflesuite/truffle", "project_sha": "0f17cf9680ac0dc7aa6a314ad3b78ad569daa896"}, 248 | {"project_url": "TryGhost/Ghost", "project_sha": "4da658e72ad42cf251e4fb100ca651a7d4dca79e"}, 249 | {"project_url": "tubbo/openrct2-benchwarmer", "project_sha": "504d75bfaf1b158dbe23e4bbfb926502189a0ff6"}, 250 | {"project_url": "tulios/kafkajs", "project_sha": "ff3b1117f316d527ae170b550bc0f772614338e9"}, 251 | {"project_url": "TypeStrong/ts-loader", "project_sha": "cf5326d9b5f1b804ff8d817f88fb127bc45ad9d1"}, 252 | {"project_url": "uber/baseweb", "project_sha": "65c791a6b5ac50722f34e2a7b1282b08c539f58a"}, 253 | {"project_url": "usdigitalresponse/neighbor-express", "project_sha": "130d9edd9ac09f2a8aa947b0d21f054d4dfc0462"}, 254 | {"project_url": "vega/vega", "project_sha": "b45cf431cd6c0d0c0e1567f087f9b3b55bc236fa"}, 255 | {"project_url": "video-dev/hls.js", "project_sha": "59d421479b5002993a5f3b36d4505adff3209fb5"}, 256 | {"project_url": "visgl/luma.gl", "project_sha": "044c0ef5f767cd56974e30475a30dd3f24305983"}, 257 | {"project_url": "w3c/aria-practices", "project_sha": "4adb78ea96b22db559577aa6ed64c9059596ab4a"}, 258 | {"project_url": "waiterio/api", "project_sha": "9948b542f5da1957c3f656d959c4f5957d364eb1"}, 259 | {"project_url": "web-animations/web-animations-js-legacy", "project_sha": "6a1c45473f9ba2db1ccad34f879bca829f77264d"}, 260 | {"project_url": "webdriverio/cucumber-boilerplate", "project_sha": "f91d34ff0bf9112d02830dc474f1a97ff6e8d9d3"}, 261 | {"project_url": "webex/webex-js-sdk", "project_sha": "cc743f187c646290dab21322431cbf8f1ce771a2"}, 262 | {"project_url": "webpack/webpack", "project_sha": "16143f5fa835ad8c7181b8aeedc52f9cdd0fd39d"}, 263 | {"project_url": "webpack/webpack-cli", "project_sha": "4e1c45ad8de888dea13247855c78848632475653"}, 264 | ]} 265 | 266 | jobs: 267 | build-matrix: 268 | runs-on: ubuntu-latest 269 | outputs: 270 | matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} 271 | steps: 272 | - id: set-matrix 273 | run: | 274 | echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT 275 | echo $PROJECTS_JSON >> $GITHUB_OUTPUT 276 | echo "__EOF__" >> $GITHUB_OUTPUT 277 | execute: 278 | needs: [build-matrix] 279 | strategy: 280 | matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} 281 | fail-fast: false 282 | uses: ./.github/workflows/end2end.yml 283 | with: 284 | project_url: ${{ matrix.projects.project_url }} 285 | project_sha: ${{ matrix.projects.project_sha }} 286 | -------------------------------------------------------------------------------- /.github/workflows/barbosa23flaky.yml: -------------------------------------------------------------------------------- 1 | name: Test on Barbosa23 JS projects with >=5 flaky tests 2 | 3 | on: 4 | push 5 | 6 | env: 7 | PROJECTS_JSON: | 8 | { projects: [ 9 | {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"}, 10 | {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"}, 11 | {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"}, 12 | {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"}, 13 | {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"}, 14 | {"project_url": "yui/yui3", "project_sha": "25264e3629b1c07fb779d203c4a25c0879ec862c"} 15 | ]} 16 | 17 | jobs: 18 | build-matrix: 19 | runs-on: ubuntu-latest 20 | outputs: 21 | matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} 22 | steps: 23 | - id: set-matrix 24 | run: | 25 | echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT 26 | echo $PROJECTS_JSON >> $GITHUB_OUTPUT 27 | echo "__EOF__" >> $GITHUB_OUTPUT 28 | execute: 29 | needs: [build-matrix] 30 | strategy: 31 | matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} 32 | fail-fast: false 33 | uses: ./.github/workflows/end2endCustomContainers.yml 34 | with: 35 | project_url: ${{ matrix.projects.project_url }} 36 | project_sha: ${{ matrix.projects.project_sha }} 37 | -------------------------------------------------------------------------------- /.github/workflows/end2end.yml: -------------------------------------------------------------------------------- 1 | name: Test NPMFilter End to End on a Project 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | project_url: 7 | description: 'GitHub suffix of project to test (username/project)' 8 | required: true 9 | type: string 10 | project_sha: 11 | description: 'SHA of project to test' 12 | required: true 13 | type: string 14 | workflow_call: 15 | inputs: 16 | project_url: 17 | description: 'GitHub suffix of project to test (username/project)' 18 | required: true 19 | type: string 20 | project_sha: 21 | description: 'SHA of project to test' 22 | required: true 23 | type: string 24 | jobs: 25 | execute: 26 | runs-on: self-hosted 27 | 28 | steps: 29 | - name: Checkout code 30 | uses: actions/checkout@v3 31 | - name: Build NPMFilter container 32 | run: docker build -t npmfilter . 33 | - name: Run NPMFilter 34 | id: run-npm-filter 35 | env: 36 | SHA: ${{ inputs.project_sha }} 37 | URL: ${{ inputs.project_url }} 38 | DOCKER_IMAGE: npmfilter:latest 39 | run: | 40 | IFS="/" read -r -a projectArray <<< "$URL" 41 | OrgName=${projectArray[0]} 42 | ProjectName=${projectArray[1]} 43 | LogDir=${URL//\//-} 44 | echo "LogDir=$LogDir" >> $GITHUB_OUTPUT 45 | 46 | echo "Running NPMFilter on $OrgName/$ProjectName@$SHA" 47 | 48 | mkdir -p docker_configs/ 49 | cat >docker_configs/debug_filter_config.json < tests-overview.csv 90 | 91 | # Check if tests were found 92 | TestData=$(cat tests-overview.csv) 93 | IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/) 94 | TestsRun=${testCount[0]} 95 | if [ $TestsRun -le 2 ]; then 96 | echo "ERROR: No tests found." 97 | exit -1 98 | else 99 | echo "OK: ${TestsRun} tests found!" 100 | fi 101 | 102 | echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT" 103 | - name: Upload output 104 | uses: actions/upload-artifact@v2 105 | with: 106 | name: ${{ steps.run-npm-filter.outputs.LOGNAME }} 107 | path: npm_filter_docker_results -------------------------------------------------------------------------------- /.github/workflows/end2endCustomContainers.yml: -------------------------------------------------------------------------------- 1 | name: Test NPMFilter End to End on a Project with custom-built containers per-project 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | project_url: 7 | description: 'GitHub suffix of project to test (username/project)' 8 | required: true 9 | type: string 10 | project_sha: 11 | description: 'SHA of project to test' 12 | required: true 13 | type: string 14 | workflow_call: 15 | inputs: 16 | project_url: 17 | description: 'GitHub suffix of project to test (username/project)' 18 | required: true 19 | type: string 20 | project_sha: 21 | description: 'SHA of project to test' 22 | required: true 23 | type: string 24 | jobs: 25 | execute: 26 | runs-on: self-hosted 27 | 28 | steps: 29 | - name: Checkout code 30 | uses: actions/checkout@v3 31 | - name: Build NPMFilter container 32 | run: | 33 | if [ -f "project-overrides/${OrgName}-${ProjectName}.sh" ]; then 34 | CUSTOM_INSTALL_SCRIPT="--build-arg CUSTOM_INSTALL_SCRIPT=project-overrides/${OrgName}-${ProjectName}.sh" 35 | fi 36 | docker build -t npmfilter --build-arg REPO_LINK=https://github.com/${{ inputs.project_url }} --build-arg REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . 37 | - name: Run NPMFilter 38 | id: run-npm-filter 39 | env: 40 | SHA: ${{ inputs.project_sha }} 41 | URL: ${{ inputs.project_url }} 42 | DOCKER_IMAGE: npmfilter:latest 43 | run: | 44 | IFS="/" read -r -a projectArray <<< "$URL" 45 | OrgName=${projectArray[0]} 46 | ProjectName=${projectArray[1]} 47 | LogDir=${URL//\//-} 48 | echo "LogDir=$LogDir" >> $GITHUB_OUTPUT 49 | 50 | echo "Running NPMFilter on $OrgName/$ProjectName@$SHA" 51 | 52 | mkdir -p docker_configs/ 53 | cat >docker_configs/debug_filter_config.json < tests-overview.csv 95 | 96 | # Check if tests were found 97 | TestData=$(cat tests-overview.csv) 98 | IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/) 99 | TestsRun=${testCount[0]} 100 | if [ $TestsRun -le 2 ]; then 101 | echo "ERROR: No tests found." 102 | exit -1 103 | else 104 | echo "OK: ${TestsRun} tests found!" 105 | fi 106 | 107 | echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT" 108 | - name: Upload output 109 | uses: actions/upload-artifact@v2 110 | with: 111 | name: ${{ steps.run-npm-filter.outputs.LOGNAME }} 112 | path: npm_filter_docker_results -------------------------------------------------------------------------------- /.github/workflows/smoketest.yml: -------------------------------------------------------------------------------- 1 | name: Test NPMFilter End to End on a toy project 2 | 3 | on: 4 | push: 5 | 6 | env: 7 | PROJECTS_JSON: | 8 | { projects: [ 9 | {"project_url": "mtiller/ts-jest-sample", "project_sha": "6739c576d4590c53296f3e4fcdf3074e582ae297"}, 10 | ]} 11 | 12 | jobs: 13 | build-matrix: 14 | runs-on: ubuntu-latest 15 | outputs: 16 | matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} 17 | steps: 18 | - id: set-matrix 19 | run: | 20 | echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT 21 | echo $PROJECTS_JSON >> $GITHUB_OUTPUT 22 | echo "__EOF__" >> $GITHUB_OUTPUT 23 | execute-specialized-container: 24 | needs: [build-matrix] 25 | strategy: 26 | matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} 27 | fail-fast: false 28 | uses: ./.github/workflows/end2endCustomContainers.yml 29 | with: 30 | project_url: ${{ matrix.projects.project_url }} 31 | project_sha: ${{ matrix.projects.project_sha }} 32 | execute-generic-container: 33 | needs: [build-matrix] 34 | strategy: 35 | matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} 36 | fail-fast: false 37 | uses: ./.github/workflows/end2end.yml 38 | with: 39 | project_url: ${{ matrix.projects.project_url }} 40 | project_sha: ${{ matrix.projects.project_sha }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | TESTING_REPOS/* 2 | QLDBs/* 3 | items.json 4 | *__page_data.html 5 | *__results.json 6 | *_verbose_test_report.json 7 | local_mount/* 8 | **/node_modules 9 | 10 | tests/diagnose-npm-package.py 11 | tests/TESTING_REPOS/* 12 | tests/default_filter_config.json 13 | tests/local_mount/* 14 | 15 | input_list_scripts/*_deps_repos.txt 16 | 17 | **/__pycache__/* 18 | **/.cache/* 19 | nohup.out 20 | job.log 21 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | ARG DEBIAN_FRONTEND=noninteractive 3 | 4 | # build arg: setting up for a specific repo? at a specific commit? custom install script? 5 | ARG REPO_LINK 6 | ARG REPO_COMMIT 7 | # placeholder: if this arg isn't specified, copy over the readme file in configs (can't copy no source, RIP) 8 | ARG CUSTOM_INSTALL_SCRIPT=configs/README.md 9 | 10 | RUN mkdir -p /home/npm-filter/results 11 | RUN mkdir /home/npm-filter/src 12 | RUN mkdir /home/npm-filter/configs 13 | 14 | COPY src /home/npm-filter/src 15 | # copy the custom install script if it exists 16 | RUN echo $CUSTOM_INSTALL_SCRIPT 17 | COPY ${CUSTOM_INSTALL_SCRIPT} configs/ /home/npm-filter/configs/ 18 | # delete the config readme: we don't need this in the docker. and it's a flag for no-custom-install 19 | # since the readme is the default for custom install 20 | RUN rm /home/npm-filter/configs/README.md 21 | # and name it the custom_install_script 22 | RUN if [ -f /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} ] ; then mv /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} /home/npm-filter/configs/custom_install_script ; fi 23 | COPY *.sh /home/npm-filter/ 24 | COPY get_rel_project_reqs.js /home/npm-filter 25 | 26 | RUN apt-get update \ 27 | && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel 28 | 29 | RUN apt -y install python3-pip 30 | RUN pip3 install bs4 scrapy xmltodict pandas 31 | 32 | WORKDIR /home/npm-filter 33 | 34 | RUN git config --global http.sslVerify "false" 35 | RUN ./build.sh $REPO_LINK $REPO_COMMIT 36 | # source the env variables produced by the build script (node version, etc) 37 | RUN . /envfile 38 | 39 | # add a default command for running the tests for repo_link and commit provided 40 | # this runs in verbose mode 41 | # need to use ENV instead of ARG in the CMD b/c docker is 10/10 42 | ENV ENV_REPO_COMMIT=$REPO_COMMIT 43 | ENV ENV_REPO_LINK=$REPO_LINK 44 | # gotta source our env vars so the command can run and use npm/node/etc :-) 45 | CMD . /envfile; ./run_verbose_for_repo_and_config.sh $ENV_REPO_LINK $ENV_REPO_COMMIT -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Ellen Arteca, Alexi Turcotte 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # npm-filter 2 | This tool takes a user-specified set of JavaScript/TypeScript packages, and installs/builds them. \ 3 | The primary use case is to automatically determine: 4 | * what the test commands are 5 | * what testing infrastructure is used 6 | * how many passing and failing tests there are 7 | 8 | Users can also specify: 9 | * custom scripts, or 10 | * [CodeQL](https://codeql.github.com/) static analyses 11 | to be run over the source code of the package. 12 | 13 | ## Usage options 14 | This tool can either take packages specified as GitHub repo links, or as npm packages. 15 | 16 | ### Running over GitHub repo links 17 | To run the tool over GitHub repo links, use the [`diagnose_github_repo.py` script](https://github.com/emarteca/npm-filter/blob/master/src/diagnose_github_repo.py), called as follows: 18 | ``` 19 | python src/diagnose_github_repo.py 20 | [--repo_list_file [rlistfile]] 21 | [--repo_link [rlink]] 22 | [--repo_link_and_SHA [rlink_and_SHA]] 23 | [--repo_local_dir [path_to_local_dir]] 24 | [--config [config_file]] 25 | [--output_dir [output_dir]] 26 | ``` 27 | 28 | #### Arguments 29 | All arguments are optional, although the tool will not do anything if no repo links are specified. So effectively, at least one of the three repo-link-specifying arguments must be specified for the tool to run. 30 | * `--repo_list_file [rlistfile]`: a file containing a list of GitHub repo links to be analyzed. \ 31 | Each line of the input file must specify one repo link, with an optional whitespace delimited commit SHA to check the repo out at. 32 | For example, a valid input file could be: 33 | ``` 34 | https://github.com/expressjs/body-parser d0a214b3beded8a9cd2dcb51d355f92c9ead81d4 35 | https://github.com/streamich/memfs 36 | ``` 37 | * `--repo_link [rlink]`: a link to a single GitHub repo to be analyzed, e.g., `https://github.com/expressjs/body-parser` 38 | * `--repo_link_and_SHA [rlink_and_SHA]`: a link to a single GitHub repo to be analyzed, followed by a space-delimited commit SHA to analyze the repo at, e.g., `https://github.com/expressjs/body-parser d0a214b3beded8a9cd2dcb51d355f92c9ead81d4` 39 | * `repo_local_dir`: path to a local directory containing the source code of a repo/package to be diagnosed 40 | * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) 41 | * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output)) 42 | 43 | ### Running over npm packages 44 | To run the tool over npm packages, use the [`diagnose_npm_package.py` script](https://github.com/emarteca/npm-filter/blob/master/src/diagnose_npm_package.py), called as follows: 45 | ``` 46 | python src/diagnose_npm_package.py 47 | --packages [list_of_packages] 48 | [--config [config_file]] 49 | [--html [html_file]] 50 | [--output_dir [output_dir]] 51 | ``` 52 | The back end of the npm package analyzer is a web scraper: given the name of an npm package, it finds the associated repository link on the npm page so that it can analyze the source code. This tool has some custom middleware to get around the rate limiting on the npm site, but if you are analyzing a large number of packages you will still see a significant performance hit compared to running on the GitHub repos directly. 53 | 54 | #### Arguments 55 | * `--packages [list_of_packages]`: list of npm packages to analyze. This is a required argument, and at least one package must be passed. 56 | * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) 57 | * `--html [html_file]`: path to an html file that represents the npm page for the package that is specified to be analyzed. This option only works for one package, so if you want to use this option on multiple packages you'll need to call the tool in sequence for each one. 58 | * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output)) 59 | 60 | ### Configuration file 61 | If you want to customize the behaviour of the tool, you can provide a custom configuration file. All fields in the configuration file are optional -- if not provided, defaults will be used. The [README in the configuration file directory](https://github.com/emarteca/npm-filter/tree/master/configs) goes through all the available options. 62 | 63 | ### Output 64 | The result of all the package diagnostics are output to a JSON file. The layout of the output is similar to that of the configuration file. 65 | The output is organized into the following top-level fields in the JSON, in order: 66 | * `setup`: an object with fields that are initialized in the presence of different setup errors that prevent the source code from being properly set up. For example, if the repo link is invalid (or if it can't be found on an npm package page), if there is an error checking out the specified commit, or if there is an error loading the `package.json`. 67 | * `installation`: an object listing the installer command for the package, and/or the presence of any errors in installation that prevent the analysis from continuing 68 | * `dependencies`: an object listing the dependencies of the package, if the configuration specified that they should be tracked 69 | * `build`: an object listing the build commands (in order, and if any) for the package, and/or the presence of any errors in the build commands that prevent the analysis from continuing 70 | * `testing`: an object with fields for each of the test commands in the package. The test commands are those specified in the configuration file. \ 71 | For each test command, the tool lists: 72 | * if it is a linter or a coverage tool, and if so what tool (`test_linters`, `test_coverage_tools`) 73 | * if it's not a linter or coverage tool, what testing infrastructure is being used (`test_infras`) 74 | * whether or not it runs new user tests (this is false in test commands that only call other test commands, or test commands that don't run any tests explicitly (e.g., linters, coverage tools) (`RUNS_NEW_USER_TESTS`) 75 | * if it runs other test commands, then a list of these commands are included (`nested_test_commands`) 76 | * whether or not it timed out (`timed_out`) 77 | * if it does run new user tests, then the number of passing and number of failing tests (`num_passing`, `num_failing`) 78 | * if verbose testing is specified as an option, then there will be an additional file of extra test output produced 79 | * `scripts_over_code`: an object with fields for each of the scripts run over the package source code. For each script, the tool lists its output and if there was an error. 80 | * `QL_queries`: an object with fields for each of the QL queries run over the package source code. For each script, the tool lists the output (if running in verbose mode), and if there was an error. 81 | * `metadata`: an object with fields for some metadata about the package: repository link, commit SHA if one was specified 82 | 83 | For example, the output of running `diagnose_github_repo` on `https://github.com/expressjs/body-parser` at commit SHA `d0a214b3beded8a9cd2dcb51d355f92c9ead81d4` with the default configuration file is as follows: 84 | ``` 85 | { 86 | "installation": { 87 | "installer_command": "npm install" 88 | }, 89 | "build": { 90 | "build_script_list": [] 91 | }, 92 | "testing": { 93 | "lint": { 94 | "test_linters": [ 95 | "eslint -- linter" 96 | ], 97 | "RUNS_NEW_USER_TESTS": false, 98 | "timed_out": false 99 | }, 100 | "test": { 101 | "num_passing": 231, 102 | "num_failing": 0, 103 | "test_infras": [ 104 | "mocha" 105 | ], 106 | "timed_out": false 107 | }, 108 | "test-ci": { 109 | "test_coverage_tools": [ 110 | "nyc -- coverage testing" 111 | ], 112 | "RUNS_NEW_USER_TESTS": false, 113 | "timed_out": false 114 | }, 115 | "test-cov": { 116 | "test_coverage_tools": [ 117 | "nyc -- coverage testing" 118 | ], 119 | "RUNS_NEW_USER_TESTS": false, 120 | "timed_out": false 121 | } 122 | }, 123 | "scripts_over_code": {}, 124 | "QL_queries": {}, 125 | "metadata": { 126 | "repo_link": "https://github.com/expressjs/body-parser", 127 | "repo_commit_SHA": "d0a214b3beded8a9cd2dcb51d355f92c9ead81d4" 128 | } 129 | } 130 | ``` 131 | 132 | #### QL Query output 133 | The output of each QL query is saved to a CSV file in the same directory as the JSON output, named `[package name]__[query name]__results.csv`. For example, if you run a query `myQuery.ql` over `body-parser`, the query results file will be `body-parser__myQuery__results.csv`. 134 | 135 | ### Running with docker 136 | To be safe, you should probably run any untrusted code in a sandbox. 137 | Since the entire point of this tool is to run code from a set of packages/projects you didn't write, we assume most of this code will fall into the untrusted category. 138 | 139 | We host the generic docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below. 140 | 141 | The generic docker container runs on any package or repo specified. 142 | However, it is pre-built with default versions of node and npm. 143 | There is also the option to build a _repo-specific_ docker container. 144 | In this case, the container is built with the particular version of node and npm specified in the repo's `package.json` configuration file. 145 | The container is also pre-built with the install and build phases of `npm-filter` run, so that you can then run the tests in the container without waiting for any setup. 146 | 147 | #### Building a container-specific docker 148 | If you want to build a container specific to a particular repo, use the following command: 149 | ``` 150 | # general use 151 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=[github link to repo] [--build-arg REPO_COMMIT=[specific commit SHA]] 152 | 153 | # specific example for memfs 154 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs 155 | 156 | # another example, for memfs at a specific commit 157 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs --build-arg REPO_COMMIT=863f373185837141504c05ed19f7a253232e0905 158 | ``` 159 | 160 | #### Building generic docker (if you've updated the npm-filter source code) 161 | Note: you don't need to do this if you're using npm-filter out of the box. 162 | In that case, you'll pull directly from DockerHub. 163 | ``` 164 | docker build -t npm-filter . 165 | ``` 166 | 167 | You'll also need to edit the `runDocker.sh` script and **remove the username `emarteca` before `npm-filter:latest` in the [`docker run` command](https://github.com/emarteca/npm-filter/blob/master/runDocker.sh#L18)**. 168 | 169 | #### Sandboxed usage 170 | ``` 171 | # general use 172 | ./runDocker.sh [regular command to run npm-filter] 173 | 174 | # example use 175 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link https://github.com/jprichardson/node-fs-extra 176 | 177 | # another example use 178 | ./runDocker.sh python3 src/diagnose_npm_package.py --packages body-parser 179 | 180 | ``` 181 | 182 | #### Docker: where the script needs to read from external files 183 | 184 | If you're running `npm-filter` with a custom config file, and running some custom scripts / QL queries over the package code, then you'll need to put these files in a specific folder called `docker_configs`. 185 | 186 | Also, anything referenced in the config file must be in this folder, and the locations relative. 187 | 188 | For example: 189 | ``` 190 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_list_file docker_configs/repo_links.txt --config docker_configs/custom_config.json 191 | 192 | ``` 193 | Here we're reading a list of repos from `repo_links.txt` in the `docker_configs` directory. 194 | There's also a custom config file. 195 | 196 | Now, if we wanted to run a script over the code, inside `custom_config.json` we'd have: 197 | ``` 198 | "meta_info": { 199 | "scripts_over_code": [ "myscript.sh" ], 200 | "QL_queries": [ "myquery.ql" ] 201 | } 202 | 203 | ``` 204 | And, `myscript.sh` and `myquery.ql` need to also be in `docker_configs` directory. 205 | 206 | Note that running outside of docker you can have different paths to the scripts/queries, but for running in docker they all need to be in the `docker_configs` directory. 207 | 208 | 209 | #### Results 210 | Results from running the docker will be output to a `npm_filter_docker_results` directory generated in the directory you run the container in. 211 | 212 | #### Parallel execution: also in docker 213 | ``` 214 | ./runParallelGitReposDocker.sh repo_link_file 215 | ``` 216 | Results are in `npm_filter_parallel_docker_results`. 217 | Note that it's execution in parallel in _one_ docker container, and _not_ parallel docker containers. 218 | 219 | ### Running locally 220 | You can also run this locally on your machine. 221 | To do so, you'll need to have the following installed: 222 | * python3 (running as python), with bs4 and scrapy libraries 223 | * git 224 | * npm 225 | * yarn 226 | * node 227 | 228 | 229 | ## Example uses 230 | Examples of common usages: 231 | 232 | ### Specifying packages as github repos 233 | ``` 234 | # running on a single repo 235 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser 236 | 237 | # running on a single repo with a custom config file 238 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser --config my_config.json 239 | 240 | # running on a single repo at a specific SHA 241 | python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 242 | 243 | # running on one repo from a link, and a list of repos from a file 244 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser --repo_list_file repo_links.txt 245 | ``` 246 | 247 | ### Specifying packages via npm package names 248 | ``` 249 | # running on a single package 250 | python src/diagnose_npm_package.py --packages body-parser 251 | 252 | # running on multiple packages 253 | python src/diagnose_npm_package.py --packages body-parser memfs fs-extra 254 | 255 | # running on multiple packages with a custom output directory (the parent directory) 256 | python src/diagnose_npm_package.py --packages body-parser memfs --output_dir .. 257 | ``` 258 | 259 | ## Common input generation 260 | 261 | npm-filter takes as input a list of package names or repositories to run over. The [`input_list_scripts` directory](https://github.com/emarteca/npm-filter/tree/master/input_list_scripts) contains scripts for common input generation strategies. 262 | 263 | ## Common output processing 264 | 265 | npm-filter produces JSON results files for each package or repo that is analyzed. The [`output_proc_scripts` directory](https://github.com/emarteca/npm-filter/tree/master/output_proc_scripts) constains scripts for common output processing. 266 | 267 | ## Running tests 268 | 269 | Instructions on setting up and running the npm-filter test suite are included [in the `tests` directory](https://github.com/emarteca/npm-filter/blob/master/tests). 270 | 271 | -------------------------------------------------------------------------------- /Tutorial.md: -------------------------------------------------------------------------------- 1 | ## Tutorial: example walk-through 2 | This is a simple tutorial giving an example walkthrough of npm-filter usage, one basic and one advanced. 3 | We assume you have `docker` installed. 4 | This tutorial will run the latest version of npm-filter [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter). 5 | 6 | ### Setup 7 | ``` 8 | git clone https://github.com/emarteca/npm-filter.git 9 | cd npm-filter 10 | ``` 11 | 12 | ### Usage example 1 13 | Basic usage: analyze GitHub repo at specified commit SHA, with default configuration 14 | ``` 15 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 16 | ``` 17 | 18 | Since this is using a specific commit SHA, the output should match exactly. 19 | The terminal output should be: 20 | ``` 21 | Diagnosing: memfs --- from: https://github.com/streamich/memfs 22 | Cloning package repository 23 | Checking out specified commit: 863f373185837141504c05ed19f7a253232e0905 24 | Running: yarn test 25 | Running: yarn test:coverage 26 | Running: yarn tslint 27 | ``` 28 | 29 | The output file should be in `npm_filter_docker_results/memfs__results.json`, and the contents of the file should be: 30 | ``` 31 | { 32 | "installation": { 33 | "installer_command": "yarn" 34 | }, 35 | "build": { 36 | "build_script_list": [ 37 | "build" 38 | ] 39 | }, 40 | "testing": { 41 | "test": { 42 | "num_passing": 265, 43 | "num_failing": 0, 44 | "test_infras": [ 45 | "jest" 46 | ], 47 | "timed_out": false 48 | }, 49 | "test:coverage": { 50 | "num_passing": 265, 51 | "num_failing": 0, 52 | "test_infras": [ 53 | "jest" 54 | ], 55 | "timed_out": false 56 | }, 57 | "tslint": { 58 | "test_linters": [ 59 | "tslint -- linter" 60 | ], 61 | "RUNS_NEW_USER_TESTS": false, 62 | "timed_out": false 63 | } 64 | }, 65 | "metadata": { 66 | "repo_link": "https://github.com/streamich/memfs", 67 | "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905" 68 | } 69 | } 70 | 71 | ``` 72 | 73 | 74 | ### Usage example 2 75 | Advanced usage: Analyze the same GitHub repo as above, but with a user-specified configuration file, running a script and a CodeQL query. 76 | Also track the package dependencies including the `devDependencies`. 77 | 78 | #### Custom script 79 | In this example, we will make a simple custom script. 80 | This will just list all the files in the directory. 81 | Open a file `docker_configs/ls.sh`, and give it the contents: 82 | ``` 83 | #!/bin/bash 84 | ls 85 | ``` 86 | **Note** you might need `sudo` to make this file, if you aren't in your `docker` group, since `docker` will own this directory if the container has already been run. 87 | 88 | Make it an executable: 89 | ``` 90 | chmod +x docker_configs/ls.sh 91 | ``` 92 | 93 | #### CodeQL query 94 | In this example, we will make a simple CodeQL query to list all the `await` expressions and the files they appear in the package source code. 95 | Open a file `docker_configs/await.ql` and give it the contents: 96 | ``` 97 | import javascript 98 | 99 | from AwaitExpr ae 100 | select ae, ae.getFile() 101 | ``` 102 | 103 | #### Custom configuration file 104 | Now, we need a configuration file to tell npm-filter to run this custom script and query. 105 | We only need to include the configuration fields that we're changing; all other settings not specified use their default values. 106 | Open a file `docker_configs/my_config.json` and give it the contents: 107 | ``` 108 | { 109 | "dependencies": { 110 | "track_deps": true, 111 | "include_dev_deps": true 112 | }, 113 | "meta_info": { 114 | "scripts_over_code": [ "ls.sh"], 115 | "QL_queries": [ "await.ql"] 116 | } 117 | } 118 | 119 | ``` 120 | 121 | #### Running and output 122 | Now, run npm-filter with the custom settings: 123 | ``` 124 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 --config dock 125 | er_configs/my_config.json 126 | ``` 127 | 128 | The terminal output should be: 129 | ``` 130 | Diagnosing: memfs --- from: https://github.com/streamich/memfs 131 | Cloning package repository 132 | Checking out specified commit: 863f373185837141504c05ed19f7a253232e0905 133 | Getting dependencies 134 | Running: yarn test 135 | Running: yarn test:coverage 136 | Running: yarn tslint 137 | Running script over code: /home/npm-filter/docker_configs/ls.sh 138 | Running QL query: /home/npm-filter/docker_configs/await.ql 139 | ``` 140 | 141 | The output file should be in `npm_filter_docker_results/memfs__results.json` again, and the contents of the file should be (with dependencies truncated for readability): 142 | ``` 143 | { 144 | "installation": { 145 | "installer_command": "yarn" 146 | }, 147 | "dependencies": { 148 | "dep_list": [ 149 | "is-descriptor", 150 | "is-plain-obj", 151 | "util-deprecate", 152 | "source-map-resolve", 153 | "duplexer3", 154 | "parse5", 155 | "boxen", 156 | "protoduck", 157 | "promise-inflight", 158 | "aws-sign2", 159 | "is-regex", 160 | "conventional-changelog-angular", 161 | "forever-agent", 162 | "signal-exit", 163 | ... 164 | "gauge", 165 | "extend", 166 | "lodash.ismatch" 167 | ], 168 | "includes_dev_deps": true 169 | }, 170 | "build": { 171 | "build_script_list": [ 172 | "build" 173 | ] 174 | }, 175 | "testing": { 176 | "test": { 177 | "num_passing": 265, 178 | "num_failing": 0, 179 | "test_infras": [ 180 | "jest" 181 | ], 182 | "timed_out": false 183 | }, 184 | "test:coverage": { 185 | "num_passing": 265, 186 | "num_failing": 0, 187 | "test_infras": [ 188 | "jest" 189 | ], 190 | "timed_out": false 191 | }, 192 | "tslint": { 193 | "test_linters": [ 194 | "tslint -- linter" 195 | ], 196 | "RUNS_NEW_USER_TESTS": false, 197 | "timed_out": false 198 | } 199 | }, 200 | "scripts_over_code": { 201 | "/home/npm-filter/docker_configs/ls.sh": { 202 | "output": "CHANGELOG.md\nCODE_OF_CONDUCT.md\nCONTRIBUTING.md\nLICENSE\nREADME.md\ncodecov.yml\ncoverage\ndemo\ndocs\nlib\nnode_modules\npackage.json\nprettier.config.js\nrenovate.json\nsrc\ 203 | ntsconfig.json\ntslint.json\nyarn.lock\n" 204 | } 205 | }, 206 | "QL_queries": { 207 | "/home/npm-filter/docker_configs/await.ql": {} 208 | }, 209 | "metadata": { 210 | "repo_link": "https://github.com/streamich/memfs", 211 | "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905" 212 | } 213 | } 214 | ``` 215 | 216 | The output from running the CodeQL query should be in `npm_filter_docker_results/memfs__await__results.csv`, and the contents should be (truncated for readability): 217 | ``` 218 | "ae","col1" 219 | "await p ... ', 'r')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 220 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 221 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 222 | "await f ... ('baz')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 223 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 224 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 225 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 226 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 227 | "await f ... (0o444)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 228 | ... 229 | "await p ... '/foo')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 230 | "await p ... '/bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 231 | "await p ... oo', 5)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 232 | "await p ... '/foo')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 233 | "await p ... arture)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 234 | "await p ... 'bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 235 | "await p ... ', 'w')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 236 | "await p ... 'bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 237 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts" 238 | ``` 239 | 240 | 241 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # can be building for one specific repo, at a specific commit 4 | # (if theyre not specified theyre just empty string, that's fine) 5 | repo_link=$1 6 | repo_commit=$2 7 | 8 | # install nvm, so we can then use specific versions of node and npm 9 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | /usr/bin/bash 10 | export NVM_DIR="$HOME/.nvm" 11 | [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # this loads nvm 12 | 13 | 14 | rm build.sh 15 | rm runDocker.sh 16 | if [ -d local_mount ]; then 17 | rm -r local_mount 18 | fi 19 | 20 | mkdir -p /home/codeql_home 21 | 22 | # cd /home/codeql_home 23 | # curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip 24 | # unzip codeql-linux64.zip 25 | # # clone stable version 26 | # git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo 27 | 28 | apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential 29 | apt-get update 30 | 31 | curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - 32 | echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list 33 | apt-get update 34 | 35 | curl https://sh.rustup.rs -sSf | sh -s -- -y 36 | source $HOME/.cargo/env 37 | 38 | pip3 install --upgrade setuptools setuptools_rust wheel 39 | 40 | echo "alias python=python3" >> /root/.bashrc 41 | echo "alias ipython=ipython3" >> /root/.bashrc 42 | echo "alias vi=vim" >> /root/.bashrc 43 | 44 | cd /home/npm-filter 45 | 46 | if [ -d TESTING_REPOS ]; then 47 | rm -rf TESTING_REPOS 48 | fi 49 | mkdir TESTING_REPOS 50 | 51 | node_version='v18.16.0' # default to just the latest LTS version 52 | npm_version='*' 53 | # if there's a repo_link specified 54 | if [ ! -z "$repo_link" ]; then 55 | cd TESTING_REPOS 56 | git clone $repo_link 57 | # repo dir will be the only thing in TESTING_REPOS 58 | repo_dir_name=`ls` 59 | if [ ! -z "$repo_commit" ]; then 60 | cd $repo_dir_name 61 | git checkout $repo_commit 62 | fi 63 | cd /home/npm-filter 64 | 65 | # this will make the node_version and npm_version variables 66 | # it's ok to use the generic version here -- just using it for the vars 67 | # need these dependencies for my get_rel_project_reqs.js script 68 | nvm install $node_version 69 | nvm use $node_version 70 | nvm install-latest-npm 71 | 72 | npm install semver node-fetch 73 | 74 | # script to set the env variables for node_version etc 75 | echo "#!/bin/bash" > req_vars.sh 76 | node get_rel_project_reqs.js TESTING_REPOS/${repo_dir_name} >> req_vars.sh 77 | chmod 700 req_vars.sh 78 | # source in current shell: so we set the variables in the current shell 79 | . req_vars.sh 80 | rm req_vars.sh 81 | 82 | echo $node_version 83 | `$set_req_vars` 84 | rm -r node_modules 85 | 86 | if [[ $node_version == "*" ]]; then 87 | node_version=node 88 | fi 89 | fi 90 | 91 | # set up node and npm, and also add this node/npm config to the bashrc 92 | # so that it runs on docker startup too 93 | 94 | nvm install $node_version 95 | nvm use $node_version 96 | 97 | if [[ $npm_version == "*" ]]; then 98 | nvm install-latest-npm 99 | else 100 | npm install -g npm@${npm_version} 101 | fi 102 | 103 | NVM_DIR=/root/.nvm 104 | NODE_VERSION=`node --version` 105 | 106 | echo "export NODE_VERSION=\"$NODE_VERSION\"" >> /envfile 107 | echo "export NVM_DIR=$NVM_DIR" >> /envfile 108 | echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /envfile 109 | echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /envfile 110 | 111 | cat /envfile >> /root/.bashrc 112 | 113 | # permissive 114 | npm config set strict-ssl false 115 | 116 | # install the dependencies: but use the current version of npm 117 | npm install -g jest mocha tap ava nyc yarn next 118 | 119 | config_file=configs/build_only_config.json 120 | if [ -f "/home/npm-filter/configs/custom_install_script" ]; then 121 | chmod +x /home/npm-filter/configs/custom_install_script 122 | config_file=configs/custom_install_only.json 123 | fi 124 | 125 | if [ ! -z "$repo_link" ]; then 126 | cd /home/npm-filter 127 | # do the install and build only (build_only_config.json config file) 128 | if [ ! -z "$repo_commit" ]; then 129 | python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results 130 | else 131 | python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results 132 | fi 133 | fi 134 | 135 | -------------------------------------------------------------------------------- /configs/QL_output_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "install": { 3 | "timeout": 600 4 | }, 5 | "dependencies": { 6 | "track_deps": false, 7 | "include_dev_deps": false 8 | }, 9 | "build": { 10 | "tracked_build_commands": ["build", "compile", "init"], 11 | "timeout": 300 12 | }, 13 | "test": { 14 | "track_tests": true, 15 | "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 16 | "mocha", "jest", "ava", "tap", "jasmine"], 17 | "timeout": 300 18 | }, 19 | "meta_info": { 20 | "VERBOSE_MODE": false, 21 | "ignored_commands": ["watch", "debug"], 22 | "ignored_substrings": ["--watch", "nodemon"], 23 | "rm_after_cloning": true 24 | }, 25 | "QL_output": { 26 | "QL_cutoff": 5 27 | } 28 | } -------------------------------------------------------------------------------- /configs/README.md: -------------------------------------------------------------------------------- 1 | # npm-filter configuration file 2 | The configuration file is a JSON, organized by stages of npm-filter analysis. 3 | The stages are as follows: 4 | * `install`: package installation. Users can specify: 5 | * `timeout`: number of millisections after which, if the install is not complete, the process bails and is considered timed out 6 | * `do_install`: if false, skip the install stage 7 | * `dependencies`: package dependency tracking (this is the libraries the current package depends on, both directly and transitively). Users can specify: 8 | * `track_deps`: if true, this specifies to compute the package dependencies 9 | * `include_dev_deps`: if true, this specifies to include the `devDependencies` in the dependency computation 10 | * `timeout`: timeout in milliseconds 11 | * `build`: package compile/build stage. Users can specify: 12 | * `tracked_build_commands`: a list of build commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the build stage. 13 | * `timeout`: timeout in milliseconds, per build command 14 | * `track_build`: if false, skip the build stage 15 | * `test`: package test stage. Users can specify: 16 | * `track_tests`: if true, then the tool will run this testing diagnostic stage 17 | * `tracked_test_commands`: a list of test commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the test stage. 18 | * `timeout`: timeout in milliseconds, per test command 19 | * `test_verbose_all_output`: an object with two fields to configure the "verbose" test tracking option: here, output and some metrics (runtime, pass/fail, etc) for each test is output to a specified file. Note that currently we only support this option for the `jest` and `mocha` test infras. 20 | * `do_verbose_tracking`: if true, do this verbose test tracking 21 | * `verbose_json_output_file`: name of the file to which to save this verbose output 22 | * `meta_info`: any analysis-level configurations. Users can specify: 23 | * `VERBOSE_MODE`: if true, then the output JSON file will include the full output of all the commands run. Mainly for debugging. 24 | * `ignored_commands`: commands to ignore: if these are present in the npm script name, then they are not run even if they otherwise fall into a category of commands to run (mainly used to exclude any interactive-mode commands, such as tests with `watch`) 25 | * `ignored_substrings`: commands to ignore: if these strings are present in the command string itself, then these npm scripts are not run (same as `ignored_commands`, but for the command strings instead of the npm script names) 26 | * `rm_after_cloning`: if true, delete the package source code after the tool is done running. Strongly recommended if running over a large batch of packages. 27 | * `scripts_over_code`: list of paths to script files to run over the package source code. Note that these paths are relative to the location of **the config file**. 28 | * `QL_queries`: list of paths to QL query files to run over the package source code. Like the scripts, these paths are relative to the location of the config file. 29 | * `custom_setup_scripts`: list of paths to script files to run over the package code after cloning, but before any of the stages of `npm-filter` are actually run. Commonly used to replace the default install stage (i.e., set `do_install` to `false`). Like all the other scripts, these paths are relative to the location of the config file. 30 | 31 | Users can customize any of the configuration fields, by providing a JSON file with the desired fields modified. 32 | Default values are used for any fields not specified. 33 | 34 | As a demonstrative example, the default configuration is included below. 35 | ``` 36 | { 37 | "install": { 38 | "timeout": 1000, 39 | "do_install": true 40 | }, 41 | "dependencies": { 42 | "track_deps": false, 43 | "include_dev_deps": false 44 | }, 45 | "build": { 46 | "track_build": true, 47 | "tracked_build_commands": ["build", "compile", "init"], 48 | "timeout": 1000 49 | }, 50 | "test": { 51 | "track_tests": true, 52 | "test_verbose_all_output": { 53 | "do_verbose_tracking": false, 54 | "verbose_json_output_file": "verbose_test_report.json" 55 | }, 56 | "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 57 | "mocha", "jest", "ava", "tap", "jasmine"], 58 | "timeout": 1000 59 | }, 60 | "meta_info": { 61 | "VERBOSE_MODE": false, 62 | "ignored_commands": ["watch", "debug"], 63 | "ignored_substrings": ["--watch", "nodemon"], 64 | "rm_after_cloning": false, 65 | "scripts_over_code": [ ], 66 | "QL_queries": [ ], 67 | "custom_setup_scripts": [ ] 68 | } 69 | } 70 | ``` 71 | 72 | ## Infrastructures tracked 73 | npm-filter is configured to track the following infrastructures: 74 | * Testing infrastructures: mocha, jest, jasmine, tap, lab, ava, gulp. \ 75 | Any test commands that run other infrastructures (such as custom node scripts) will still be parsed, but whether or not the correct number of passing/failing tests is determined depends on the shape of the output. 76 | * Linters: eslint, tslint, xx, standard, prettier, gulp lint 77 | * Coverage tools: istanbul, nyc, coveralls, c8 78 | 79 | If you have another infrastructure you'd like support for, you can send an email with a request, or add it yourself and submit a PR. [This is the relevant code](https://github.com/emarteca/npm-filter/blob/master/src/test_JS_repo_lib.py#L144) that you'd need to extend. 80 | -------------------------------------------------------------------------------- /configs/build_only_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "test": { 3 | "track_tests": false 4 | } 5 | } -------------------------------------------------------------------------------- /configs/custom_install_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "test": { 3 | "track_tests": false 4 | }, 5 | "meta_info": { 6 | "custom_setup_scripts": [ "custom_install_script" ] 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /configs/default_filter_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "install": { 3 | "timeout": 1000, 4 | "do_install": true 5 | }, 6 | "dependencies": { 7 | "track_deps": false, 8 | "include_dev_deps": false 9 | }, 10 | "build": { 11 | "track_build": true, 12 | "tracked_build_commands": ["build", "compile", "init"], 13 | "timeout": 1000 14 | }, 15 | "test": { 16 | "test_command_repeats": 1, 17 | "track_tests": true, 18 | "test_verbose_all_output": { 19 | "do_verbose_tracking": false, 20 | "verbose_json_output_file": "verbose_test_report.json" 21 | }, 22 | "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 23 | "mocha", "jest", "ava", "tap", "jasmine"], 24 | "timeout": 1000 25 | }, 26 | "meta_info": { 27 | "VERBOSE_MODE": false, 28 | "ignored_commands": ["watch", "debug"], 29 | "ignored_substrings": ["--watch", "nodemon"], 30 | "rm_after_cloning": false, 31 | "scripts_over_code": [ ], 32 | "QL_queries": [ ], 33 | "custom_setup_scripts": [ ] 34 | } 35 | } -------------------------------------------------------------------------------- /configs/verbose.json: -------------------------------------------------------------------------------- 1 | { 2 | "test": { 3 | "test_command_repeats": 1, 4 | "test_verbose_all_output": { "do_verbose_tracking": true } 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /configs/verbose_only.json: -------------------------------------------------------------------------------- 1 | { 2 | "install": { 3 | "do_install": false 4 | }, 5 | "build": { 6 | "track_build": false 7 | }, 8 | "test": { 9 | "test_verbose_all_output": { "do_verbose_tracking": true } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /get_rel_project_reqs.js: -------------------------------------------------------------------------------- 1 | // get the build requirements for the project, if they're present 2 | // these are: 3 | // - npm version 4 | // - node version 5 | // - OS 6 | // 7 | // some notes: 8 | // - devs can specify a range of engines (npm, node) that their project works on. 9 | // If a range is specified we just get one version in the valid range 10 | // - if the project specifically doesn't work on linux, then we're bailing -- this 11 | // only makes linux docker containers 12 | 13 | // also this is in JS instead of python bc the python semver library is garbage 14 | 15 | const semver = require('semver'); 16 | const subproc = require('child_process'); 17 | const fs = require('fs').promises; 18 | 19 | // can specify OS version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#os 20 | // can specify node/npm version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#engines 21 | async function get_reqs_from_pkg_json(pkg_json) { 22 | let reqs = {} 23 | 24 | let engines = pkg_json["engines"] || {}; 25 | // if not specified, "*" any version 26 | let npm_req = engines["npm"] || "*"; 27 | let node_req = engines["node"] || "*"; 28 | 29 | // if a range is specified, get a version in the valid range 30 | let { node_version, npm_version } = await get_versions_in_range(node_req, npm_req); 31 | reqs["node_version"] = node_version; 32 | reqs["npm_version"] = npm_version; 33 | 34 | 35 | oss = engines["os"] || []; 36 | // explicit versions and linux is not listed 37 | if (oss.length > 0 && oss.indexOf("linux") == -1) 38 | reqs["linux"] = false 39 | // explicitly excluding linux :'( 40 | else if (oss.indexOf("!linux") != -1) 41 | reqs["linux"] = false 42 | else 43 | reqs["linux"] = true 44 | 45 | return reqs 46 | } 47 | 48 | const BANNED_VERSION_SUBSTRINGS = ["beta", "alpha", "pre"] 49 | 50 | // using semver, let's get a version that matches our specs 51 | async function get_versions_in_range(node_version, npm_version) { 52 | let node_npm_version_pairs = []; 53 | try { 54 | node_npm_version_pairs = await get_node_npm_version_pairs(); 55 | } catch(e) { 56 | console.log("Error getting npm/node pairs -- proceeding blind: " + e); 57 | } 58 | 59 | // normal route: we have the data. 60 | // now just need to find a pair that matches 61 | if (node_npm_version_pairs.length > 0) { 62 | for (const pair of node_npm_version_pairs) { 63 | if (is_banned(pair["npm"]) || is_banned(pair["node"])) { 64 | continue; 65 | } 66 | if (semver.satisfies(pair["npm"], npm_version) && semver.satisfies(pair["node"], node_version)) { 67 | return { "node_version": pair["node"], "npm_version": pair["npm"] } 68 | } 69 | } 70 | } 71 | 72 | // if we get here we didn't return in the if above 73 | // we don't have the data: get the list of all node versions from nvm: `nvm ls-remote` 74 | // and all npm versions from npm itself: `npm view npm versions` 75 | // NOTE: node version takes precedence over the npm version bc it's more commonly specified, 76 | // and because it's more important 77 | if (node_version !== "*" ) { 78 | // then we care about the node version 79 | subproc.exec('nvm ls-remote', { shell: '/bin/bash'}, (err, stdout, stderr) => { 80 | let versions = stdout.split("\n").map(v => v.trim().split(" ")[0]); // strip formatting and any space-delimited labels (LTS, etc) 81 | for (vers of versions) { 82 | if (is_banned(vers)) { 83 | continue; 84 | } 85 | if (semver.satisfies(vers, node_version)) { 86 | return { "node_version": vers, "npm_version": "*" } 87 | } 88 | } 89 | }) 90 | } 91 | 92 | // if we get here, then we didn't have the version pair data, and we also didn't care about the node version 93 | // so let's get an npm version 94 | if (npm_version !== "*") { 95 | // then we care about the npm version 96 | subproc.exec('npm view npm versions --json', { shell: '/bin/bash'}, (err, stdout, stderr) => { 97 | let versions = JSON.parse(stdout); 98 | for (vers of versions) { 99 | if (is_banned(vers)) { 100 | continue; 101 | } 102 | if (semver.satisfies(vers, npm_version)) { 103 | return { "node_version": "*", "npm_version": vers } 104 | } 105 | } 106 | }) 107 | } 108 | 109 | // no matching pairs: we're flying blind folks 110 | return { "node_version": "*", "npm_version": "*" } 111 | } 112 | 113 | // versions of node and the versions of npm they are bundled with 114 | // see: https://stackoverflow.com/questions/51238643/which-versions-of-npm-came-with-which-versions-of-node 115 | // read this file in -- from it we can get all the valid versions of npm and node 116 | // for fetch usage: https://stackoverflow.com/questions/2499567/how-to-make-a-json-call-to-an-url/2499647#2499647 117 | const NODE_NPM_VERSIONS_URL = 'https://nodejs.org/dist/index.json'; 118 | async function get_node_npm_version_pairs() { 119 | let resp = await fetch(NODE_NPM_VERSIONS_URL); 120 | // look for errors: 121 | if (!resp.ok) { 122 | throw new Error("Uh oh: error reaching npm/node version pairs"); 123 | } 124 | let all_data = await resp.json(); 125 | let node_npm_pairs = []; 126 | for (const vers_data of all_data) { 127 | let node_version = vers_data["version"]; 128 | let npm_version = vers_data["npm"]; 129 | // if both were in the version data 130 | if (node_version && npm_version) 131 | node_npm_pairs.push({node: node_version, npm: npm_version}) 132 | } 133 | return node_npm_pairs; 134 | } 135 | 136 | // check if a version is banned 137 | function is_banned(vers) { 138 | for (const banned of BANNED_VERSION_SUBSTRINGS) { 139 | if (vers.indexOf(banned) > -1) { 140 | return true; 141 | } 142 | } 143 | return false; 144 | } 145 | 146 | function print_as_bash_vars(reqs) { 147 | for ( key in reqs) { 148 | console.log("export " + key + "=" + reqs[key]); 149 | } 150 | } 151 | 152 | async function main(proj_dir) { 153 | let pkg_json = {}; 154 | try { 155 | pkg_json = JSON.parse(await fs.readFile(proj_dir + "/package.json", 'utf8')); 156 | } catch(e) { 157 | console.error("Error, bailing out: " + proj_dir + " invalid directory, could not load package.json"); 158 | process.exit(); 159 | } 160 | // get the node and npm versions 161 | let reqs = await get_reqs_from_pkg_json(pkg_json); 162 | print_as_bash_vars(reqs); 163 | } 164 | 165 | if (process.argv.length != 3) { 166 | console.error("Usage: node get_rel_project_req.js path_to_project_dir") 167 | process.exit() 168 | } 169 | 170 | let proj_dir = process.argv[2]; 171 | main(proj_dir); 172 | -------------------------------------------------------------------------------- /input_list_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Common input generation 2 | npm-filter takes a list of package names or repositories to run over. This list could come from anywhere, but this directory has scripts to automate some of the most common input generation patterns. 3 | 4 | ## All of a package's direct dependents 5 | A common analysis target is the set of direct dependents of a package -- this is all of the packages that have the specified package as a dependency. We've included a script to automate the computation of the repository links for the direct dependents. 6 | ``` 7 | # general case: 8 | ./get_dep_repos.sh [package_name] 9 | 10 | # specific example: 11 | ./get_dep_repos.sh memfs 12 | 13 | # generates memfs_deps_repos.txt 14 | ``` 15 | This generates a file `[package_name]_deps_repos.txt` where each line is a repo link for the direct dependents of the specified package. 16 | 17 | ### Disclaimer 18 | Note that the dependency computation is done using [the npm package `dependent-packages`](https://www.npmjs.com/package/dependent-packages), which is based on an a static version of the npm registry. Therefore, any dependencies computed with this script will be accurate modulo what was present in the version of the npm registry that `dependent-packages` is using. 19 | -------------------------------------------------------------------------------- /input_list_scripts/get_dep_repos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pkg_name=$1 4 | 5 | # script to get repo links for all dependencies of a given package 6 | node get_package_deps.js --package $pkg_name --output_file temp_repos.out 7 | 8 | echo "Done getting deps: now getting repo links" 9 | python3 get_package_repo_link.py --package_file temp_repos.out --good_repo_list_mode True > `echo $pkg_name`_deps_repos.txt 10 | rm temp_repos.out 11 | -------------------------------------------------------------------------------- /input_list_scripts/get_package_deps.js: -------------------------------------------------------------------------------- 1 | const {directDependents} = require('dependent-packages'); 2 | const {argv} = require('yargs'); 3 | const fs = require('fs'); 4 | 5 | let package_name = argv.package; 6 | if (!package_name) { 7 | console.log("Usage: node get_package_deps.js --package npm_package_name [--output_file output_file_name]"); 8 | process.exit(1); 9 | } 10 | 11 | let deps_list = directDependents(package_name); 12 | 13 | if (!argv.output_file) { 14 | console.log(directDependents(package_name)); 15 | } else { 16 | fs.writeFile( argv.output_file, deps_list.join("\n"), (err)=> { 17 | if(err) { 18 | console.log("Error printing to: " + argv.output_file); 19 | process.exit(1); 20 | } 21 | console.log("Done getting deps for: " + package_name); 22 | }); 23 | } 24 | -------------------------------------------------------------------------------- /input_list_scripts/get_package_repo_link.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from scrapy import signals 3 | from scrapy.crawler import CrawlerProcess 4 | from bs4 import BeautifulSoup 5 | import re 6 | import json 7 | import logging 8 | import argparse 9 | import time 10 | import sys 11 | import os 12 | sys.path.append(os.path.abspath('../src')) 13 | import middlewares 14 | 15 | logging.getLogger('scrapy').propagate = False 16 | 17 | class NPMRepoSpider(scrapy.Spider): 18 | name = "npm-repos" 19 | 20 | def __init__(self, packages=None, good_repo_list_mode=None, *args, **kwargs): 21 | if packages is not None: 22 | self.packages = packages 23 | self.start_urls = ['https://www.npmjs.com/package/' + pkg for pkg in self.packages] 24 | self.pkg_repolink_pairs = [] 25 | # dispatcher.connect(self.spider_closed, signals.spider_closed) 26 | self.good_repo_list_mode = good_repo_list_mode 27 | super(NPMRepoSpider, self).__init__(*args, **kwargs) 28 | 29 | def parse(self, response): 30 | cur_pkg = response.url[ len("https://www.npmjs.com/package/"):] 31 | # TODO should we handle specific response codes? 32 | # successful responses are those in the 200s 33 | # source: https://doc.scrapy.org/en/latest/topics/spider-middleware.html#module-scrapy.spidermiddlewares.httperror 34 | if response.status > 299 or response.status < 200: 35 | self.pkg_repolink_pairs += [(cur_pkg, "ERROR")] 36 | else: 37 | soup = BeautifulSoup(response.body, 'html.parser') 38 | script = soup.find('script', text=re.compile('window\.__context__')) 39 | json_text = re.search(r'^\s*window\.__context__\s*=\s*({.*?})\s*$', 40 | script.string, flags=re.DOTALL | re.MULTILINE).group(1) 41 | data = json.loads(json_text) 42 | repo_link = "" 43 | try: 44 | repo_link = data['context']['packument']['repository'] 45 | except KeyError: 46 | repo_link = "ERROR" 47 | self.pkg_repolink_pairs += [(cur_pkg, repo_link)] 48 | def closed(self, reason): 49 | # second param is instance of spder about to be closed. 50 | if not self.good_repo_list_mode: 51 | print(self.pkg_repolink_pairs) 52 | else: 53 | good_repos = [rp[1] for rp in self.pkg_repolink_pairs if rp[1] != "ERROR" and rp[1] != ""] 54 | print("\n".join(good_repos)) 55 | 56 | process = CrawlerProcess(settings={ 57 | "FEEDS": { 58 | "items.json": {"format": "json"}, 59 | }, 60 | "HTTPERROR_ALLOW_ALL": True, 61 | "RETRY_HTTP_CODES" : [429], 62 | # next couple settings are for beating the npm request rate limiter 63 | #"DOWNLOAD_DELAY": 0.75, # 3/4 second delay 64 | "RETRY_TIMES": 6, 65 | #"CONCURRENT_REQUESTS_PER_DOMAIN" : 2, 66 | "DOWNLOADER_MIDDLEWARES": { 67 | "scrapy.downloadermiddlewares.retry.RetryMiddleware": None, 68 | "middlewares.TooManyRequestsRetryMiddleware": 543, 69 | } 70 | }) 71 | 72 | 73 | argparser = argparse.ArgumentParser(description="Get repo link for packages") 74 | argparser.add_argument("--packages", metavar="package", type=str, nargs='*', help="a package to get repo link for") 75 | argparser.add_argument("--package_file", metavar="package_file", type=str, nargs='?', help="file with list of packages to get links for") 76 | argparser.add_argument("--good_repo_list_mode", metavar="good_repo_list_mode", type=bool, nargs='?', help="if true, print only the repo links with no errors") 77 | args = argparser.parse_args() 78 | 79 | packages=[] 80 | if args.packages: 81 | packages += args.packages 82 | if args.package_file: 83 | with open(args.package_file) as f: 84 | packages += f.read().split("\n") 85 | 86 | process.crawl(NPMRepoSpider, packages=packages, good_repo_list_mode=args.good_repo_list_mode) 87 | process.start() # the script will block here until the crawling is finished 88 | 89 | 90 | -------------------------------------------------------------------------------- /input_list_scripts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "dependent-packages": "^2.2.2", 4 | "yargs": "*" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /output_proc_scripts/README.md: -------------------------------------------------------------------------------- 1 | # Common output processing 2 | 3 | npm-filter produces JSON results files for each package or repo that is analyzed. This directory has a python script that does some common output processing: 4 | given a directory with results JSON files, this script finds the list of all the analyzed packages/repos for which there were no setup/install errors, and for which there is at least one test command that has >= 1 passing test and no failing tests. 5 | 6 | ## Usage 7 | 8 | The script takes one optional argument: the directory in which to look at results files. If not provided, the current directory is used as a default. 9 | ``` 10 | # general case 11 | python get_json_results.py [output directory to look for results JSON files in] 12 | 13 | # specific case: look at current directory 14 | python get_json_results.py 15 | 16 | # specific case: look at another directory (here, the parent directory) 17 | python get_json_results.py .. 18 | ``` 19 | 20 | ### Example output 21 | This script generates a list of all the analyzed packages/repos that successfully ran and for which there is at least one test command that has >= 1 passing test and no failing tests. 22 | This list is printed to the console newline-delimited, the repo/package name paired with the relevant test command. 23 | 24 | For example, running this script on a directory containing the results of running npm-filter on `body-parser` at SHA `d0a214b3beded8a9cd2dcb51d355f92c9ead81d4 25 | ` as given in the working example will produce the following output: 26 | ``` 27 | Following is a list of all projects with commands meeting the criteria, paired with these commands 28 | ('..//body-parser__results.json', ['test']) 29 | ``` 30 | This means that the `body-parser` package has a test command `test` that has passing test(s) and no failing tests. 31 | 32 | ## Customization 33 | This script is hardcoded to exclude packages with setup/install errors, and only report packages with a test command that has >= 1 passing test(s) and no failing tests. 34 | It can easily be modified for different search parameters. 35 | 36 | ### Exclusion of packages 37 | Exclusion of packages is done via a `JSON_filter` JSON object, hardcoded at the beginning of the script. To exclude packages with particular results, simply add the fields in the results JSON you want to exclude to this object. 38 | For example, if you want to additionally exclude packages that have no build commands, then you would extend the `JSON_filter` variable with the `build` field as follows: 39 | ``` 40 | JSON_filter = { 41 | "setup": { 42 | "repo_cloning_ERROR": True, 43 | "pkg_json_ERROR": True 44 | }, 45 | "installation": { 46 | "ERROR": True 47 | }, 48 | + "build": { 49 | + "build_script_list": [] 50 | + } 51 | } 52 | ``` 53 | 54 | ### Filtering for criteria other than all-passing test commands 55 | The script is hardcoded to only report non-excluded packages for which there is a test command with >= 1 passing test and no failing tests. 56 | To modify this criteria, either modify the `get_passing_test_commands` function or write a new function that reports the criteria you want and call that where `get_passing_test_commands` is called currently. 57 | 58 | For example, to get packages that run a linter, you could add the function: 59 | ``` 60 | def get_successful_linter_commands(json_check): 61 | test_dict = json_check.get("testing", {}) 62 | passing_commands = [] 63 | for test_com, test_out in test_dict.items(): 64 | if test_out.get("timed_out", False) or test_out.get("ERROR", False): 65 | continue 66 | if test_out.get("test_linters", []) == []: 67 | continue 68 | passing_commands += [test_com] 69 | return( passing_commands) 70 | ``` 71 | And then, instead of calling `get_passing_test_commands`, call `get_successful_linter_commands`. 72 | In this case, running the script over the directory with `body-parser__results.json` would yield the output: 73 | ``` 74 | Following is a list of all projects with commands meeting the criteria, paired with these commands 75 | ('..//body-parser__results.json', ['lint']) 76 | ``` 77 | 78 | -------------------------------------------------------------------------------- /output_proc_scripts/count_tests_run.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | 5 | # simple, unrefined script for parsing npm-filter output files 6 | # for the current directory, get all files named *__results.json 7 | # (wildcard represents the project name) 8 | # prints out (Number of tests passing),(Number of tests failing) 9 | 10 | 11 | # JSON specifying possible errors 12 | # that should be avoided if an input JSON will pass the filter check 13 | 14 | JSON_filter = { 15 | "setup": { 16 | "repo_cloning_ERROR": True, 17 | "pkg_json_ERROR": True 18 | }, 19 | "installation": { 20 | "ERROR": True 21 | }, 22 | } 23 | 24 | # input to the function is a JSON of undesirable elements 25 | # return true if the JSON to be filtered has any of the filter elements 26 | def json_contains_issues(json_check, json_filter): 27 | contains_issues = False 28 | for filter_key, filter_val in json_filter.items(): 29 | # recursive case 30 | if isinstance( filter_val, dict): 31 | contains_issues = contains_issues or json_contains_issues( json_check.get(filter_key, {}), filter_val) 32 | # base case 33 | contains_issues = contains_issues or (json_check.get(filter_key, {}) == filter_val) 34 | return( contains_issues) 35 | 36 | # by default, there needs to be at least one passing test 37 | def get_num_tests_run(json_check): 38 | test_dict = json_check.get("testing", {}) 39 | num_passing = 0 40 | num_failing = 0 41 | passing_commands = [] 42 | for test_com, test_out in test_dict.items(): 43 | if test_out.get("timed_out", False) or (not test_out.get("RUNS_NEW_USER_TESTS", True)) or test_out.get("ERROR", False): 44 | continue 45 | num_passing += test_out.get("num_passing") 46 | num_failing += test_out.get("num_failing") 47 | return [num_passing, num_failing] 48 | 49 | output_proc_dir = "." 50 | if len(sys.argv) == 2: 51 | output_proc_dir = sys.argv[1] 52 | else: 53 | print("No output directory specified: looking at current directory") 54 | 55 | # get all relevant files 56 | all_files = [ output_proc_dir + "/" + fname for fname in os.listdir(output_proc_dir) if fname.find("__results.json") != -1] 57 | passing_files = [] 58 | total_passing_tests = 0 59 | total_failing_tests = 0 60 | for file in all_files: 61 | with open(file) as f: 62 | json_check = json.load(f) 63 | proj_name = file[ : file.index("__results.json")] 64 | if json_contains_issues( json_check, JSON_filter): 65 | # print(proj_name + " has setup/install errors") 66 | continue 67 | num_tests = get_num_tests_run( json_check) 68 | total_passing_tests += num_tests[0] 69 | total_failing_tests += num_tests[1] 70 | 71 | print(f"{total_passing_tests},{total_failing_tests}") -------------------------------------------------------------------------------- /output_proc_scripts/get_json_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | 5 | # simple, unrefined script for parsing npm-filter output files 6 | # for the current directory, get all files named *__results.json 7 | # (wildcard represents the project name) 8 | # from this list, filter for projects with specific characteristics 9 | 10 | 11 | # JSON specifying possible errors 12 | # that should be avoided if an input JSON will pass the filter check 13 | 14 | JSON_filter = { 15 | "setup": { 16 | "repo_cloning_ERROR": True, 17 | "pkg_json_ERROR": True 18 | }, 19 | "installation": { 20 | "ERROR": True 21 | }, 22 | } 23 | 24 | # input to the function is a JSON of undesirable elements 25 | # return true if the JSON to be filtered has any of the filter elements 26 | def json_contains_issues(json_check, json_filter): 27 | contains_issues = False 28 | for filter_key, filter_val in json_filter.items(): 29 | # recursive case 30 | if isinstance( filter_val, dict): 31 | contains_issues = contains_issues or json_contains_issues( json_check.get(filter_key, {}), filter_val) 32 | # base case 33 | contains_issues = contains_issues or (json_check.get(filter_key, {}) == filter_val) 34 | return( contains_issues) 35 | 36 | # by default, there needs to be at least one passing test 37 | def get_passing_test_commands(json_check, min_passing=1): 38 | test_dict = json_check.get("testing", {}) 39 | passing_commands = [] 40 | for test_com, test_out in test_dict.items(): 41 | if test_out.get("timed_out", False) or (not test_out.get("RUNS_NEW_USER_TESTS", True)) or test_out.get("ERROR", False): 42 | continue 43 | if test_out.get("num_failing", 0) > 0: 44 | continue 45 | if test_out.get("num_passing", 0) < min_passing: 46 | continue 47 | passing_commands += [test_com] 48 | return( passing_commands) 49 | 50 | output_proc_dir = "." 51 | if len(sys.argv) == 2: 52 | output_proc_dir = sys.argv[1] 53 | else: 54 | print("No output directory specified: looking at current directory") 55 | 56 | # get all relevant files 57 | all_files = [ output_proc_dir + "/" + fname for fname in os.listdir(output_proc_dir) if fname.find("__results.json") != -1] 58 | passing_files = [] 59 | for file in all_files: 60 | with open(file) as f: 61 | json_check = json.load(f) 62 | proj_name = file[ : file.index("__results.json")] 63 | if json_contains_issues( json_check, JSON_filter): 64 | # print(proj_name + " has setup/install errors") 65 | continue 66 | passing_commands = get_passing_test_commands( json_check) 67 | if len(passing_commands) > 0: 68 | passing_files += [(file, passing_commands)] 69 | print("\nFollowing is a list of all projects with commands meeting the criteria, paired with these commands") 70 | print("\n".join([str(pf) for pf in passing_files])) -------------------------------------------------------------------------------- /qlpack.yml: -------------------------------------------------------------------------------- 1 | name: npm-filter-queries 2 | version: 0.0.0 3 | libraryPathDependencies: codeql-javascript 4 | -------------------------------------------------------------------------------- /runDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | npm_filter_command=$@ 4 | 5 | if [ ! -d local_mount ]; then 6 | mkdir local_mount 7 | fi 8 | 9 | # create the dir ourselves so we have write privilege to it 10 | if [ ! -d npm_filter_docker_results ]; then 11 | mkdir npm_filter_docker_results 12 | fi 13 | 14 | if [ -v $DOCKER_IMAGE ]; then 15 | DOCKER_IMAGE=emarteca/npm-filter:latest 16 | fi 17 | 18 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ 19 | --volume `pwd`/npm_filter_docker_results:/home/npm-filter/results \ 20 | --volume `pwd`/docker_configs:/home/npm-filter/docker_configs\ 21 | -w /home/npm-filter \ 22 | $DOCKER_IMAGE \ 23 | bash -c "source /envfile; PATH=/home/codeql_home/codeql:\$PATH; $npm_filter_command --output_dir results" 24 | rm -r local_mount -------------------------------------------------------------------------------- /runParallelGitRepos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | repo_link_file=$1 4 | config_file=$2 5 | output_dir=$3 6 | 7 | if [ ! -f $config_file ]; then 8 | config_file="configs/QL_output_config.json" 9 | fi 10 | 11 | if [ ! -d $output_dir ]; then 12 | output_dir=`pwd` 13 | fi 14 | 15 | # you'll probably want to bg this 16 | nohup parallel -j 20 -a $repo_link_file --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config $config_file --output_dir $output_dir 17 | -------------------------------------------------------------------------------- /runParallelGitReposDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | repo_link_file=$1 4 | config_file=$2 5 | 6 | if [ ! -f $config_file ] || [ ! $config_file ]; then 7 | config_file="configs/QL_output_config.json" 8 | fi 9 | 10 | if [ ! -d local_mount ]; then 11 | mkdir local_mount 12 | fi 13 | 14 | # copy config files to a shared volume with the container 15 | if [ ! -d npm_filter_parallel_docker_results ]; then 16 | mkdir npm_filter_parallel_docker_results 17 | fi 18 | cp $repo_link_file npm_filter_parallel_docker_results/repo_links.txt 19 | cp $config_file npm_filter_parallel_docker_results/config.json 20 | 21 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ 22 | --volume `pwd`/npm_filter_parallel_docker_results:/home/npm-filter/results \ 23 | -w /home/npm-filter \ 24 | -it emarteca/npm-filter:latest \ 25 | bash -c "nohup parallel -j 20 -a results/repo_links.txt --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config results/config.json --output_dir results" 26 | 27 | rm -r local_mount 28 | rm npm_filter_parallel_docker_results/repo_links.txt npm_filter_parallel_docker_results/config.json 29 | 30 | -------------------------------------------------------------------------------- /run_verbose_for_repo_and_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # run npm-filter on a specified repo with verbose, at an optional commit 4 | # output to the "results" directory 5 | 6 | # usage: ./run_for_repo_and_config.sh repo_link repo_commit 7 | 8 | repo_link=$1 9 | config_file=configs/verbose_only.json 10 | repo_commit=$2 11 | 12 | if [ ! -z "$repo_link" ] && [ ! -z "$config_file" ]; then 13 | if [ ! -z "$repo_commit" ]; then 14 | python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results 15 | else 16 | python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results 17 | fi 18 | fi -------------------------------------------------------------------------------- /src/TestInfo.py: -------------------------------------------------------------------------------- 1 | import re 2 | import output_parsing.test_output_proc as TestOutputProc 3 | 4 | class TestInfo: 5 | OUTPUT_CHECKERS = { 6 | "mocha": 7 | { 8 | "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '.*', 9 | "passing": ("passing", -1), 10 | "failing": ("failing", -1) 11 | }, 12 | "jest": 13 | { 14 | "output_regex_fct" : lambda condition: r'Tests:.*\d+ ' + condition, 15 | "passing": ("passed", -1), 16 | "failing": ("failed", -1) 17 | }, 18 | "tap": { 19 | "output_regex_fct" : lambda condition: r'# ' + condition + '.*\d+', 20 | "passing": ("pass", 1), 21 | "failing": ("fail", 1) 22 | }, 23 | "tap_raw": { 24 | "output_regex_fct" : lambda condition: r'' + condition + ' \d+ - (?!.*time=).*$', 25 | "passing": (r'^.*(?!not )ok', None), # this "passing" is a regex: count "ok" but not "not ok" 26 | "failing": (r'^.*not ok', None) 27 | }, 28 | "ava": 29 | { 30 | "output_regex_fct": lambda condition: r'.*\d+ tests? ' + condition, 31 | "passing": ("passed", -2), 32 | "failing": ("failed", -2) 33 | }, 34 | "ava_2": 35 | { 36 | "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '$', 37 | "passing": ("passed", -1), 38 | "failing": ("failed", -1) 39 | }, 40 | } 41 | # extra args, their position in the arg list, and any post-processing required 42 | # post-processing is a function that takes 2 arguments: input file and output file 43 | # CAUTION: DO NOT PUT ANY MORE ARGS AFTER PLACEHOLDER_OUTPUT_FILE_NAME. THE CODE THAT 44 | # PARSES THE OUTPUT RELIES ON THIS BEING THE *LAST* ARGUMENT 45 | VERBOSE_TESTS_EXTRA_ARGS = { 46 | "jest": { 47 | "args": " --verbose --json -i --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", 48 | "position": -1, 49 | "post_processing": TestOutputProc.parse_jest_json_to_csv 50 | }, 51 | "mocha": { 52 | "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", 53 | "position": -1, 54 | "post_processing": TestOutputProc.parse_mocha_json_to_csv 55 | } 56 | } 57 | TRACKED_INFRAS = { 58 | "mocha": { 59 | "name": "mocha", 60 | "output_checkers": [ "mocha", "tap" ], 61 | "verbose_tests_extra_args": [ "mocha" ] 62 | }, 63 | "jest": { 64 | "name": "jest", 65 | "output_checkers": [ "jest" ], 66 | "verbose_tests_extra_args": [ "jest" ] 67 | }, 68 | "jasmine": { 69 | "name": "jasmine", 70 | "output_checkers": [ "mocha" ] 71 | }, 72 | "tap": { 73 | "name": "tap", 74 | "output_checkers": [ "tap", "tap_raw" ] 75 | }, 76 | "lab": { 77 | "name": "lab", 78 | "output_checkers": [] 79 | }, 80 | "ava": { 81 | "name": "ava", 82 | "output_checkers": [ "ava", "ava_2" ] 83 | }, 84 | "gulp": { 85 | "name": "gulp", 86 | "output_checkers": [ "mocha" ] 87 | }, 88 | } 89 | TRACKED_COVERAGE = { 90 | "istanbul": "istanbul -- coverage testing", 91 | "nyc": "nyc -- coverage testing", 92 | "coveralls": "coveralls -- coverage testing", 93 | "c8": "c8 -- coverage testing" 94 | } 95 | TRACKED_LINTERS = { 96 | "eslint": "eslint -- linter", 97 | "tslint": "tslint -- linter", 98 | "xx": "xx -- linter", 99 | "standard": "standard -- linter", 100 | "prettier": "prettier -- linter", 101 | "gulp lint": "gulp lint -- linter" 102 | } 103 | 104 | TRACKED_RUNNERS = [ "node", "babel-node", "grunt", "lerna" ] 105 | 106 | def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): 107 | self.success = success 108 | self.error_stream = error_stream 109 | self.output_stream = output_stream 110 | self.manager = manager 111 | # start all other fields as None 112 | self.test_infras = None 113 | self.test_covs = None 114 | self.test_lints = None 115 | self.nested_test_commands = None 116 | self.num_passing = None 117 | self.num_failing = None 118 | self.timed_out = False 119 | self.VERBOSE_MODE = VERBOSE_MODE 120 | self.test_verbosity_output = None 121 | self.startTime = 0 122 | self.endTime = 0 123 | 124 | def set_test_command( self, test_command): 125 | self.test_command = test_command 126 | 127 | def set_test_verbosity_output( self, verbose_output): 128 | self.test_verbosity_output = verbose_output 129 | 130 | def get_test_infras_list( test_command, manager): 131 | test_infras = [] 132 | test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, test_command, manager) ] 133 | test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, test_command, manager) ] 134 | return( test_infras) 135 | 136 | def compute_test_infras( self): 137 | self.test_infras = [] 138 | self.test_covs = [] 139 | self.test_lints = [] 140 | self.nested_test_commands = [] 141 | if self.test_command: 142 | self.test_infras += TestInfo.get_test_infras_list(self.test_command, self.manager) 143 | self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ] 144 | self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ] 145 | self.test_infras = list(set(self.test_infras)) 146 | self.test_covs = list(set(self.test_covs)) 147 | self.test_lints = list(set(self.test_lints)) 148 | # TODO: maybe we can also figure it out from the output stream 149 | 150 | def compute_nested_test_commands( self, test_commands): 151 | # one might think that we should only check the package's own manager 152 | # however, it's common to mix and match (esp. to run commands with "npm run" even if the package manager is yarn) 153 | self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "npm run " + tc, self.test_command, self.manager) ] 154 | self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "yarn " + tc, self.test_command, self.manager) ] 155 | 156 | def compute_test_stats( self): 157 | if not self.test_infras or self.test_infras == []: 158 | return 159 | test_output = self.output_stream.decode('utf-8') + self.error_stream.decode('utf-8') 160 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 161 | test_output = ansi_escape.sub('', test_output) 162 | self.num_passing = 0 163 | self.num_failing = 0 164 | self.timed_out = (self.error_stream.decode('utf-8') == "TIMEOUT ERROR") 165 | for infra in self.test_infras: 166 | output_checker_names = TestInfo.TRACKED_INFRAS.get(infra, {}).get("output_checkers", []) 167 | if infra in TestInfo.TRACKED_RUNNERS and output_checker_names == []: 168 | output_checker_names = self.OUTPUT_CHECKERS.keys() # all the checkers 169 | for checker_name in output_checker_names: 170 | div_factor = 2 if checker_name == "ava_2" else 1 171 | checker = self.OUTPUT_CHECKERS[ checker_name] 172 | self.num_passing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["passing"][0], checker["passing"][1]) / div_factor) 173 | self.num_failing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["failing"][0], checker["failing"][1]) / div_factor) 174 | 175 | def get_json_rep( self): 176 | json_rep = {} 177 | if self.VERBOSE_MODE: 178 | json_rep["test_debug"] = "" 179 | if not self.success: 180 | json_rep["ERROR"] = True 181 | if self.VERBOSE_MODE: 182 | json_rep["test_debug"] += "\nError output: " + self.error_stream.decode('utf-8') 183 | if self.num_passing is not None and self.num_failing is not None: 184 | json_rep["num_passing"] = self.num_passing 185 | json_rep["num_failing"] = self.num_failing 186 | if self.VERBOSE_MODE: 187 | json_rep["test_debug"] += "\nOutput stream: " + self.output_stream.decode('utf-8') 188 | if self.test_infras and self.test_infras != []: 189 | json_rep["test_infras"] = [TestInfo.TRACKED_INFRAS.get(infra, {}).get("name", "Custom Testing: " + infra) for infra in self.test_infras] 190 | if self.test_covs and self.test_covs != []: 191 | json_rep["test_coverage_tools"] = self.test_covs 192 | if self.test_lints and self.test_lints != []: 193 | json_rep["test_linters"] = self.test_lints 194 | if self.nested_test_commands and self.nested_test_commands != []: 195 | json_rep["nested_test_commands"] = self.nested_test_commands 196 | if "test_infras" not in json_rep: 197 | json_rep["RUNS_NEW_USER_TESTS"] = False 198 | if self.test_verbosity_output: 199 | json_rep["test_verbosity_output"] = self.test_verbosity_output 200 | json_rep["timed_out"] = self.timed_out 201 | json_rep["start_time"] = self.start_time 202 | json_rep["end_time"] = self.end_time 203 | return( json_rep) 204 | 205 | def __str__(self): 206 | to_ret = "" 207 | if not self.success: 208 | to_ret += "ERROR" 209 | if self.VERBOSE_MODE: 210 | to_ret += "\nError output: " + self.error_stream.decode('utf-8') 211 | else: 212 | to_ret += "SUCCESS" 213 | if self.num_passing is not None and self.num_failing is not None: 214 | to_ret += "\nPassing tests: " + str(self.num_passing) + "\nFailing tests: " + str(self.num_failing) 215 | if self.VERBOSE_MODE: 216 | to_ret += "\nOutput stream: " + self.output_stream.decode('utf-8') 217 | if self.test_infras and self.test_infras != []: 218 | to_ret += "\nTest infras: " + str([TestInfo.TRACKED_INFRAS[infra]["name"] for infra in self.test_infras]) 219 | if self.test_covs and self.test_covs != []: 220 | to_ret += "\nCoverage testing: " + str(self.test_covs) 221 | if self.test_lints and self.test_lints != []: 222 | to_ret += "\nLinter: " + str(self.test_lints) 223 | if self.nested_test_commands and self.nested_test_commands != []: 224 | to_ret += "\nNested test commands: " + str(self.nested_test_commands) 225 | to_ret += "\nTimed out: " + str(self.timed_out) 226 | return( to_ret) 227 | 228 | def called_in_command( str_comm, command, manager): 229 | # command ends with command terminator (this list includes \0 end-of-string, 230 | # but this is not available to check in Python so we use endswith) 231 | post_command_chars = [ "" ] if command.endswith(str_comm) else [ " ", "\t", ";"] 232 | for pcc in post_command_chars: 233 | check_comm = str_comm + pcc 234 | if command.find( check_comm) == 0: 235 | return( True) 236 | if command.find( "&&" + check_comm) > -1 or command.find( "&& " + check_comm) > -1: 237 | return( True) 238 | if command.find( "cross-env NODE_ENV=test " + check_comm) > -1 or command.find( "cross-env NODE_ENV=production " + check_comm) > -1: 239 | return( True) 240 | if command.find( "cross-env CI=true " + check_comm) > -1: 241 | return( True) 242 | if command.find( "cross-env TZ=utc " + check_comm) > -1: 243 | return( True) 244 | if command.find( "opener " + check_comm) > -1: 245 | return( True) 246 | if command.find( "gulp " + check_comm) > -1: 247 | return( True) 248 | if command.find( "nyc " + check_comm) > -1: 249 | return( True) 250 | return( False) 251 | 252 | def test_cond_count( test_output, regex_fct, condition, offset): 253 | ptrn = re.compile( regex_fct(condition), re.MULTILINE) 254 | results = ptrn.findall( test_output) 255 | if offset is None: 256 | return( len( results)) # just count the number of hits, each hit is an individual test (example: tap "ok" vs "not ok") 257 | num_cond = 0 258 | for r in results: 259 | temp = r.split() 260 | try: 261 | num_cond += int( temp[temp.index(condition) + offset]) 262 | except ValueError: 263 | num_cond += 0 264 | return( num_cond) -------------------------------------------------------------------------------- /src/diagnose_github_repo.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import subprocess 4 | import os 5 | import argparse 6 | from test_JS_repo_lib import * 7 | import get_repo_links as GetLinks 8 | 9 | # expecting links to look like : 10 | # https://github.com/user/reponame [optional commit SHA] 11 | def get_name_from_link(link): 12 | # split first on whitespace and take the first word 13 | # to make sure we ignore the optional commit SHA 14 | return( link.split()[0].split("/")[-1]) 15 | 16 | def get_repo_and_SHA_from_repo_link(repo): 17 | split_res = repo.split() 18 | commit_SHA = None 19 | if len(split_res) > 1: 20 | commit_SHA = split_res[1] 21 | return(split_res[0], commit_SHA) 22 | 23 | # same format as getting the name from the repo link: we want the name of the dir, 24 | # so after the last slash (and if there's no slash the whole name is returned) 25 | def get_name_from_path(repo_local_path): 26 | return( repo_local_path.split("/")[-1]) 27 | 28 | 29 | class RepoWalker(): 30 | name = "npm-pkgs" 31 | VERBOSE_MODE = False 32 | RM_AFTER_CLONING = False 33 | SCRIPTS_OVER_CODE = [] 34 | CUSTOM_SETUP_SCRIPTS = [] 35 | CUSTOM_LOCK_FILES = [] 36 | QL_QUERIES = [] 37 | 38 | DO_INSTALL = True 39 | INCLUDE_DEV_DEPS = False 40 | COMPUTE_DEP_LISTS = False 41 | TRACK_BUILD = True 42 | TRACK_TESTS = True 43 | TEST_VERBOSE_ALL_OUTPUT = False 44 | TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" 45 | TEST_COMMAND_REPEATS = 1 46 | 47 | TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 48 | "mocha", "jest", "ava", "tap", "jasmine"] 49 | IGNORED_COMMANDS = ["watch", "debug"] 50 | IGNORED_SUBSTRINGS = ["--watch", "nodemon"] 51 | TRACKED_BUILD_COMMANDS = ["build", "compile", "init"] 52 | 53 | # timeouts for stages, in seconds 54 | INSTALL_TIMEOUT = 10800 # 3 hours 55 | # note: these are timeouts per *script* in the stage of the process 56 | BUILD_TIMEOUT = 10800 # 3 hours 57 | TEST_TIMEOUT = 10800 # 3 hours 58 | 59 | QL_CUTOFF = 5 # ignore if there are < 5 results 60 | 61 | def __init__(self, config_file="", output_dir = "."): 62 | self.set_up_config( config_file) 63 | self.output_dir = os.path.abspath(output_dir) 64 | 65 | def set_repo_links(self, repo_links): 66 | self.repo_links = repo_links 67 | 68 | def set_local_repo_path(self, repo_local_dir): 69 | self.repo_local_dir = repo_local_dir 70 | 71 | def set_up_config( self, config_file): 72 | if not os.path.exists(config_file): 73 | if config_file != "": 74 | print("Could not find config file: " + config_file + " --- using defaults") 75 | return 76 | 77 | config_json = {} 78 | try: 79 | with open( config_file, 'r') as f: 80 | config_json = json.loads(f.read()) 81 | except: 82 | print("Error reading config file: " + config_file + " --- using defaults") 83 | 84 | # now, read the relevant config info from the file 85 | cf_dict = config_json.get( "meta_info", {}) 86 | self.VERBOSE_MODE = cf_dict.get("VERBOSE_MODE", self.VERBOSE_MODE) 87 | self.IGNORED_COMMANDS = cf_dict.get( "ignored_commands", self.IGNORED_COMMANDS) 88 | self.IGNORED_SUBSTRINGS = cf_dict.get( "ignored_substrings", self.IGNORED_SUBSTRINGS) 89 | self.RM_AFTER_CLONING = cf_dict.get( "rm_after_cloning", self.RM_AFTER_CLONING) 90 | # scripts and query file location is relative to the config file 91 | self.SCRIPTS_OVER_CODE = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 92 | for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)] 93 | self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 94 | for p in cf_dict.get( "QL_queries", self.QL_QUERIES)] 95 | self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 96 | for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)] 97 | 98 | cf_dict = config_json.get( "dependencies", {}) 99 | self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS) 100 | self.COMPUTE_DEP_LISTS = cf_dict.get("track_deps", self.COMPUTE_DEP_LISTS) 101 | 102 | cf_dict = config_json.get( "install", {}) 103 | self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) 104 | self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) 105 | self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 106 | for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] 107 | 108 | cf_dict = config_json.get( "build", {}) 109 | self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) 110 | self.BUILD_TIMEOUT = cf_dict.get("timeout", self.BUILD_TIMEOUT) 111 | self.TRACKED_BUILD_COMMANDS = cf_dict.get("tracked_build_commands", self.TRACKED_BUILD_COMMANDS) 112 | 113 | cf_dict = config_json.get("test", {}) 114 | self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) 115 | self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) 116 | self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) 117 | self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS) 118 | test_verbose_config = cf_dict.get("test_verbose_all_output", {}) 119 | self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) 120 | self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) 121 | 122 | cf_dict = config_json.get("QL_output", {}) 123 | self.QL_CUTOFF = cf_dict.get("QL_cutoff", self.QL_CUTOFF) 124 | 125 | def iterate_over_repos( self): 126 | for repo in self.repo_links: 127 | [repo_link, commit_SHA] = get_repo_and_SHA_from_repo_link(repo) 128 | package_name = get_name_from_link( repo_link) 129 | json_results = diagnose_package( repo_link, self, commit_SHA) 130 | json_results["metadata"] = {} 131 | json_results["metadata"]["repo_link"] = repo_link 132 | # if not None 133 | if commit_SHA: 134 | json_results["metadata"]["repo_commit_SHA"] = commit_SHA 135 | with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: 136 | json.dump( json_results, f, indent=4) 137 | if self.repo_local_dir: 138 | package_name = get_name_from_path( self.repo_local_dir) 139 | json_results = diagnose_local_dir(self.repo_local_dir, self) 140 | json_results["metadata"] = {} 141 | json_results["metadata"]["repo_local_dir"] = repo_local_dir 142 | with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: 143 | json.dump( json_results, f, indent=4) 144 | 145 | 146 | argparser = argparse.ArgumentParser(description="Diagnose github repos, from a variety of sources") 147 | argparser.add_argument("--repo_list_file", metavar="rlistfile", type=str, nargs='?', help="file with list of github repo links") 148 | argparser.add_argument("--repo_link", metavar="rlink", type=str, nargs='?', help="single repo link") 149 | argparser.add_argument("--repo_local_dir", metavar="rlocallink", type=str, nargs='?', help="path to local directory that has the repo code") 150 | argparser.add_argument("--repo_link_and_SHA", metavar="rlink_and_SHA", type=str, nargs='*', help="single repo link, with optional commit SHA") 151 | argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file") 152 | argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to") 153 | args = argparser.parse_args() 154 | 155 | config = args.config if args.config else "" 156 | output_dir = args.output_dir if args.output_dir else "." 157 | 158 | walker = RepoWalker(config_file=config, output_dir=output_dir) 159 | 160 | repo_local_dir = None 161 | if args.repo_local_dir: 162 | repo_local_dir = os.path.abspath(args.repo_local_dir) 163 | 164 | repo_links = [] 165 | if args.repo_list_file: 166 | try: 167 | repo_links += GetLinks.from_list_of_repos(args.repo_list_file) 168 | except: 169 | print("Error reading list of repos file: " + args.repo_list_file + " --- no repos to try") 170 | repo_links += [] 171 | 172 | 173 | if args.repo_link: 174 | repo_links += [args.repo_link] 175 | 176 | if args.repo_link_and_SHA: 177 | # repo_link_and_SHA can have an optional commit SHA: if so it's space delimited 178 | # so we join all the repo_link args into a space-delimited string 179 | repo_links += [' '.join(args.repo_link_and_SHA)] 180 | walker.set_repo_links( repo_links) 181 | walker.set_local_repo_path(repo_local_dir) 182 | walker.iterate_over_repos() 183 | 184 | 185 | -------------------------------------------------------------------------------- /src/diagnose_npm_package.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from scrapy import signals 3 | from scrapy.crawler import CrawlerProcess 4 | from scrapy.selector import Selector 5 | from bs4 import BeautifulSoup 6 | import json 7 | import re 8 | import subprocess 9 | import os 10 | import logging 11 | import argparse 12 | from test_JS_repo_lib import * 13 | import middlewares 14 | 15 | logging.getLogger('scrapy').propagate = False 16 | 17 | class NPMSpider(scrapy.Spider): 18 | name = "npm-pkgs" 19 | VERBOSE_MODE = False 20 | RM_AFTER_CLONING = False 21 | SCRIPTS_OVER_CODE = [] 22 | CUSTOM_SETUP_SCRIPTS = [] 23 | CUSTOM_LOCK_FILES = [] 24 | QL_QUERIES = [] 25 | 26 | DO_INSTALL = True 27 | INCLUDE_DEV_DEPS = False 28 | COMPUTE_DEP_LISTS = False 29 | TRACK_BUILD = True 30 | TRACK_TESTS = True 31 | TEST_VERBOSE_ALL_OUTPUT = False 32 | TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" 33 | TEST_COMMAND_REPEATS = 1 34 | 35 | TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 36 | "mocha", "jest", "ava", "tap", "jasmine"] 37 | IGNORED_COMMANDS = ["watch", "debug"] 38 | IGNORED_SUBSTRINGS = ["--watch", "nodemon"] 39 | TRACKED_BUILD_COMMANDS = ["build", "compile", "init"] 40 | 41 | # timeouts for stages, in seconds 42 | INSTALL_TIMEOUT = 1000 43 | # note: these are timeouts per *script* in the stage of the process 44 | BUILD_TIMEOUT = 1000 45 | TEST_TIMEOUT = 1000 46 | 47 | def __init__(self, packages=None, config_file="", output_dir=".", *args, **kwargs): 48 | if packages is not None: 49 | self.packages = packages 50 | self.start_urls = ['https://www.npmjs.com/package/' + pkg for pkg in self.packages] 51 | self.set_up_config( config_file) 52 | self.output_dir = os.path.abspath(output_dir) 53 | super(NPMSpider, self).__init__(*args, **kwargs) 54 | 55 | def set_up_config( self, config_file): 56 | if not os.path.exists(config_file): 57 | if config_file != "": 58 | print("Could not find config file: " + config_file + " --- using defaults") 59 | return 60 | 61 | config_json = {} 62 | try: 63 | with open( config_file, 'r') as f: 64 | config_json = json.loads(f.read()) 65 | except: 66 | print("Error reading config file: " + config_file + " --- using defaults") 67 | 68 | # now, read the relevant config info from the file 69 | cf_dict = config_json.get( "meta_info", {}) 70 | self.VERBOSE_MODE = cf_dict.get("VERBOSE_MODE", self.VERBOSE_MODE) 71 | self.IGNORED_COMMANDS = cf_dict.get( "ignored_commands", self.IGNORED_COMMANDS) 72 | self.IGNORED_SUBSTRINGS = cf_dict.get( "ignored_substrings", self.IGNORED_SUBSTRINGS) 73 | self.RM_AFTER_CLONING = cf_dict.get( "rm_after_cloning", self.RM_AFTER_CLONING) 74 | # script and query file location is relative to the config file 75 | self.SCRIPTS_OVER_CODE = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 76 | for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)] 77 | self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 78 | for p in cf_dict.get( "QL_queries", self.QL_QUERIES)] 79 | self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 80 | for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)] 81 | 82 | cf_dict = config_json.get( "dependencies", {}) 83 | self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS) 84 | self.COMPUTE_DEP_LISTS = cf_dict.get("track_deps", self.COMPUTE_DEP_LISTS) 85 | 86 | cf_dict = config_json.get( "install", {}) 87 | self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) 88 | self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) 89 | self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 90 | for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] 91 | 92 | cf_dict = config_json.get( "build", {}) 93 | self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) 94 | self.BUILD_TIMEOUT = cf_dict.get("timeout", self.BUILD_TIMEOUT) 95 | self.TRACKED_BUILD_COMMANDS = cf_dict.get("tracked_build_commands", self.TRACKED_BUILD_COMMANDS) 96 | 97 | cf_dict = config_json.get("test", {}) 98 | self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) 99 | self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) 100 | self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) 101 | self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS) 102 | test_verbose_config = cf_dict.get("test_verbose_all_output", {}) 103 | self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) 104 | self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) 105 | 106 | def parse(self, response): 107 | # TODO should we handle specific response codes? 108 | # successful responses are those in the 200s 109 | # source: https://doc.scrapy.org/en/latest/topics/spider-middleware.html#module-scrapy.spidermiddlewares.httperror 110 | if response.status > 299 or response.status < 200: 111 | json_results = { "http_error_code": response.status, "message": "Could not analyze url: " + response.url } 112 | with open( response.url[ len("https://www.npmjs.com/package/"):] + '__results.json', 'w') as f: 113 | json.dump( json_results, f, indent=4) 114 | return 115 | package_name = self.parse_process(response.body) 116 | with open(self.output_dir + "/" + package_name + '__page_data.html', 'wb') as f: 117 | f.write(response.body) 118 | 119 | def parse_process( self, html_text): 120 | soup = BeautifulSoup(html_text, 'html.parser') 121 | # print(soup.prettify()) 122 | script = soup.find('script', text=re.compile('window\.__context__')) 123 | json_text = re.search(r'^\s*window\.__context__\s*=\s*({.*?})\s*$', 124 | script.string, flags=re.DOTALL | re.MULTILINE).group(1) 125 | data = json.loads(json_text) 126 | 127 | num_dependents = data['context']['dependents']['dependentsCount'] 128 | repo_link = data['context']['packument']['repository'] 129 | package_name = data['context']['packument']['name'] 130 | 131 | json_results = diagnose_package( repo_link, self) 132 | 133 | json_results["metadata"] = {} 134 | json_results["metadata"]["package_name"] = package_name 135 | json_results["metadata"]["repo_link"] = repo_link 136 | json_results["metadata"]["num_dependents"] = num_dependents 137 | 138 | with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: 139 | json.dump( json_results, f, indent=4) 140 | return(package_name) 141 | 142 | def iterate_over_pkgs_from_files( self): 143 | for pkg_name in self.packages: 144 | with open(pkg_name + '__page_data.html', 'rb') as f: 145 | html_text = f.read() 146 | self.parse_process(html_text) 147 | 148 | process = CrawlerProcess(settings={ 149 | "FEEDS": { 150 | "items.json": {"format": "json"}, 151 | }, 152 | "HTTPERROR_ALLOW_ALL": True, 153 | "RETRY_HTTP_CODES" : [429], 154 | "DOWNLOADER_MIDDLEWARES": { 155 | "scrapy.downloadermiddlewares.retry.RetryMiddleware": None, 156 | "middlewares.TooManyRequestsRetryMiddleware": 543, 157 | } 158 | }) 159 | 160 | 161 | argparser = argparse.ArgumentParser(description="Diagnose npm packages") 162 | argparser.add_argument("--packages", metavar="package", type=str, nargs='+', help="a package to be diagnosed") 163 | argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file") 164 | argparser.add_argument("--html", metavar="html_file", type=bool, nargs='?', help="read from existing html instead of scraping") 165 | argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to") 166 | args = argparser.parse_args() 167 | 168 | output_dir = args.output_dir if args.output_dir else "." 169 | 170 | config = args.config if args.config else "" 171 | html = args.html if args.html else False 172 | 173 | if not args.html: 174 | process.crawl(NPMSpider, packages=args.packages, config_file=config, output_dir=output_dir) 175 | process.start() # the script will block here until the crawling is finished 176 | else: 177 | # reading from a config file 178 | spider = NPMSpider(args.packages, config_file=config, output_dir=output_dir) 179 | spider.iterate_over_pkgs_from_files() 180 | 181 | 182 | -------------------------------------------------------------------------------- /src/get_repo_links.py: -------------------------------------------------------------------------------- 1 | 2 | # set of functions for extracting lists of repos to clone 3 | # from a variety of sources 4 | 5 | # from a file that's just a list of github repos 6 | # of the form: https://github.com/username/reponame 7 | # optionally, users can specify a particular commit SHA to run over 8 | # this should be separated from the repo by some whitespace 9 | def from_list_of_repos( filename): 10 | with open(filename) as f: 11 | file_lines = f.read().split("\n") 12 | # filter out empty lines and return 13 | return( [ f for f in file_lines if len(f) > 0]) -------------------------------------------------------------------------------- /src/middlewares.py: -------------------------------------------------------------------------------- 1 | from scrapy.downloadermiddlewares.retry import RetryMiddleware 2 | from scrapy.utils.response import response_status_message 3 | 4 | import time 5 | 6 | class TooManyRequestsRetryMiddleware(RetryMiddleware): 7 | 8 | def __init__(self, crawler): 9 | super(TooManyRequestsRetryMiddleware, self).__init__(crawler.settings) 10 | self.crawler = crawler 11 | 12 | @classmethod 13 | def from_crawler(cls, crawler): 14 | return cls(crawler) 15 | 16 | def process_response(self, request, response, spider): 17 | if request.meta.get('dont_retry', False): 18 | return response 19 | elif response.status == 429: 20 | self.crawler.engine.pause() 21 | time.sleep(60) # If the rate limit is renewed in a minute, put 60 seconds, and so on. 22 | self.crawler.engine.unpause() 23 | reason = response_status_message(response.status) 24 | return self._retry(request, reason, spider) or response 25 | elif response.status in self.retry_http_codes: 26 | reason = response_status_message(response.status) 27 | return self._retry(request, reason, spider) or response 28 | return response 29 | -------------------------------------------------------------------------------- /src/output_parsing/test_output_proc.py: -------------------------------------------------------------------------------- 1 | import json 2 | import xmltodict 3 | import pandas as pd 4 | 5 | # parse the output of mocha xunit reporter to a csv 6 | # does not delete the original xunit output file 7 | # outputs include, per test (in this order): 8 | # - test suite it's a part of 9 | # - name of the test itself 10 | # - runtime of the test 11 | # - stdout of the test (if any) 12 | # - pass/fail status (could also be "pending") 13 | def parse_mocha_json_to_csv(output_file, new_output_file=None): 14 | if new_output_file is None: 15 | new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension 16 | # convert an xml file to json 17 | # used to convert the xunit reporter output from mocha into json 18 | # code from https://www.geeksforgeeks.org/python-xml-to-json/ 19 | data_dict = {} 20 | try: 21 | with open(output_file) as xml_file: 22 | data_dict = xmltodict.parse(xml_file.read()).get("testsuite", {}) 23 | except: 24 | data_dict = {} 25 | # the format: all the tests are in a top-level list called "testcase" 26 | test_suites = [] 27 | test_names = [] 28 | test_runtimes = [] 29 | test_stdout = [] 30 | test_pass_fail = [] 31 | for test in data_dict.get("testcase", []): 32 | test_suites += [test.get("@classname", "").strip()] 33 | test_names += [test.get("@name", "").strip()] 34 | test_runtimes += [float(test.get("@time", "NaN"))] 35 | if test.get("failure", False): 36 | test_stdout += [test["failure"]] 37 | test_pass_fail += ["failed"] 38 | else: 39 | test_stdout += [""] 40 | test_pass_fail += ["passed"] 41 | res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) 42 | try: 43 | res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] 44 | with open(new_output_file, 'w') as csv_file: 45 | csv_file.write(res_df.to_csv()) 46 | except: 47 | print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") 48 | 49 | # parse the output of jest xunit reporter to a csv 50 | # this does the same thing as for mocha, to produce the same data fields 51 | # does not delete the original xunit output file 52 | # outputs include, per test (in this order): 53 | # - test suite it's a part of 54 | # - name of the test itself 55 | # - runtime of the test 56 | # - stdout of the test (if any) 57 | # - pass/fail status (could also be "pending") 58 | def parse_jest_json_to_csv(output_file, new_output_file=None): 59 | if new_output_file is None: 60 | new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension 61 | data_dict = {} 62 | try: 63 | with open(output_file) as json_file: 64 | data_dict = json.loads(json_file.read()) 65 | except: 66 | data_dict = {} 67 | # the format: all tests are in a top level list called "testResults" 68 | # this is a list of objects that have "assertionResults" representing the test suites 69 | # "assertionResults" is a list of objects that have the test data 70 | test_suites = [] 71 | test_names = [] 72 | test_runtimes = [] 73 | test_stdout = [] 74 | test_pass_fail = [] 75 | for test_suite in data_dict.get("testResults", []): 76 | test_suite_results = test_suite.get("assertionResults", []) 77 | test_suite_name = test_suite.get("name", "") 78 | for test_results in test_suite_results: 79 | test_status = test_results.get("status", "failed") 80 | test_duration = test_results.get("duration") 81 | # if it can't convert to a string, could be missing/nonetype (None duration for pending tests) 82 | try: 83 | test_duration = float(test_duration) 84 | except: 85 | test_duration = float("NaN") 86 | test_suites += [test_suite_name] 87 | test_names += [test_results.get("fullName", "")] 88 | test_runtimes += [test_duration] 89 | test_stdout += [";".join(test_results.get("failureMessages", []))] 90 | test_pass_fail += [test_status] # passed/failed/pending -- if not present assume failed 91 | res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) 92 | try: 93 | res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] 94 | with open(new_output_file, 'w') as csv_file: 95 | csv_file.write(res_df.to_csv()) 96 | except: 97 | print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") -------------------------------------------------------------------------------- /src/runQuery.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | projRoot=$1 4 | projName=$2 5 | query=$3 6 | outputDir="." 7 | 8 | if [ $# == 4 ]; then 9 | outputDir=$4 10 | fi 11 | 12 | # if there is no QLDBs folder yet, create it 13 | if [ ! -d "QLDBs" ]; then 14 | mkdir QLDBs 15 | fi 16 | 17 | # make the QL DB and upgrade it, if it doesnt already exist 18 | 19 | if [ ! -d "QLDBs/$projName" ]; then 20 | #export LGTM_INDEX_FILTERS='include:/' 21 | codeql database create --language=javascript --source-root $projRoot QLDBs/$projName 22 | codeql database upgrade QLDBs/$projName 23 | fi 24 | 25 | # run the query 26 | codeql query run --database QLDBs/${projName} --output=${projName}_tempOut.bqrs $query 27 | codeql bqrs decode --format=csv ${projName}_tempOut.bqrs > $outputDir/${projName}__`basename $query .ql`__results.csv 28 | rm ${projName}_tempOut.bqrs -------------------------------------------------------------------------------- /src/test_JS_repo_lib.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | import json 4 | import os 5 | import time 6 | from TestInfo import * 7 | 8 | def run_command( commands, timeout=None): 9 | for command in commands.split(";"): 10 | try: 11 | process = subprocess.run( command.split(), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout) 12 | except subprocess.TimeoutExpired: 13 | error_string = "TIMEOUT ERROR: for user-specified timeout " + str(timeout) + " seconds" 14 | error = "TIMEOUT ERROR" 15 | return( error.encode('utf-8'), error_string.encode('utf-8'), 1) # non-zero return code 16 | return( process.stderr, process.stdout, process.returncode) 17 | 18 | def run_installation( pkg_json, crawler): 19 | installation_command = "" 20 | installation_debug = "Running Installation\n" 21 | manager = "" 22 | 23 | # if there is a yarn lock file use yarn 24 | # if there is a package-lock, use npm 25 | # if there is neither, try npm first, and if that fails use yarn 26 | if os.path.exists( "yarn.lock"): 27 | installation_debug += "\nyarn detected -- installing using yarn" 28 | manager = "yarn " 29 | installation_command = "yarn" 30 | error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT) 31 | elif os.path.exists( "package-lock.json"): 32 | installation_debug += "\npackage-lock detected -- installing using npm" 33 | manager = "npm run " 34 | installation_command = "npm install" 35 | error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT) 36 | else: 37 | installation_debug += "\nNo installer detected -- trying npm" 38 | manager = "npm run " 39 | installation_command = "npm install" 40 | error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT) 41 | if retcode != 0: 42 | installation_debug += "No installer detected -- tried npm, error, now trying yarn" 43 | manager = "yarn " 44 | installation_command = "yarn" 45 | error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT) 46 | return( (manager, retcode, installation_command, installation_debug)) 47 | 48 | def get_deps(): 49 | deps = [] 50 | for d in os.listdir("node_modules"): 51 | # if a folder's name starts with '.', ignore it. 52 | if d[0] == '.': 53 | continue 54 | # if a folder's name starts with '@', count subfolders in it. 55 | if d[0] == '@': 56 | subFolder = os.path.join("node_modules/", d) 57 | for f in os.listdir(subFolder): 58 | deps.append(d + '/' + f) 59 | 60 | else: 61 | deps.append(d) 62 | 63 | return deps 64 | 65 | # note: no timeout option for get_dependencies, so "None" is passed as a default timeout argument to run_command 66 | def get_dependencies( pkg_json, manager, include_dev_deps): 67 | if pkg_json.get("devDependencies", None) and not include_dev_deps: 68 | run_command( "rm -r node_modules") 69 | run_command( "mv package.json TEMP_package.json_TEMP") 70 | dev_deps = pkg_json["devDependencies"] 71 | pkg_json["devDependencies"] = {} 72 | with open("package.json", 'w') as f: 73 | json.dump( pkg_json, f) 74 | run_command( "npm install" if manager == "npm run " else manager) 75 | pkg_json["devDependencies"] = dev_deps 76 | # get the list of deps, excluding hidden directories 77 | deps = [] if not os.path.isdir("node_modules") else get_deps() 78 | # then, reset the deps (if required) 79 | if pkg_json.get("devDependencies", None) and not include_dev_deps: 80 | run_command( "rm -r node_modules") 81 | run_command( "mv TEMP_package.json_TEMP package.json") 82 | run_command( "npm install" if manager == "npm run " else manager) 83 | return( deps) 84 | 85 | 86 | def run_build( manager, pkg_json, crawler): 87 | build_debug = "" 88 | build_script_list = [] 89 | retcode = 0 90 | if len(crawler.TRACKED_BUILD_COMMANDS) == 0: 91 | return(retcode, build_script_list, build_debug) 92 | build_scripts = [b for b in pkg_json.get("scripts", {}).keys() if not set([ b.find(b_com) for b_com in crawler.TRACKED_BUILD_COMMANDS]) == {-1}] 93 | build_scripts = [b for b in build_scripts if set([b.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}] 94 | build_scripts = [b for b in build_scripts if set([pkg_json.get("scripts", {})[b].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}] 95 | for b in build_scripts: 96 | build_debug += "Running: " + manager + b 97 | error, output, retcode = run_command( manager + b, crawler.BUILD_TIMEOUT) 98 | if retcode != 0 and build_scripts.count(b) < 2: 99 | build_debug += "ERROR running command: " + b 100 | build_scripts += [b] # re-add it onto the end of the list, and try running it again after the other build commands 101 | elif retcode == 0: 102 | build_script_list += [b] 103 | return( retcode, build_script_list, build_debug) 104 | 105 | def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): 106 | test_json_summary = {} 107 | retcode = 0 108 | if len(crawler.TRACKED_TEST_COMMANDS) == 0: 109 | return(retcode, test_json_summary) 110 | test_scripts = [t for t in pkg_json.get("scripts", {}).keys() if not set([ t.find(t_com) for t_com in crawler.TRACKED_TEST_COMMANDS]) == {-1}] 111 | test_scripts = [t for t in test_scripts if set([t.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}] 112 | test_scripts = [t for t in test_scripts if set([pkg_json.get("scripts", {})[t].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}] 113 | for test_index, t in enumerate(test_scripts): 114 | test_output_rep = {} 115 | for test_rep_index in range(crawler.TEST_COMMAND_REPEATS): 116 | test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index) 117 | print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t) 118 | test_command = pkg_json.get("scripts", {})[t] 119 | test_infras = TestInfo.get_test_infras_list(test_command, manager) 120 | test_verbosity_output = {} 121 | # initialize these variables for timing; they'll be set before/after running test commands (resp) 122 | start_time = 0 123 | end_time = 0 124 | # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) 125 | # then, we rerun the test commands with all the commands for adding verbose_mode to 126 | # each of the test infras involved (individually) 127 | if crawler.TEST_VERBOSE_ALL_OUTPUT: 128 | # we're gonna be adding our new custom scripts for verbosity testing 129 | run_command( "mv package.json TEMP_package.json_TEMP") 130 | for verbosity_index, test_infra in enumerate(test_infras): 131 | verbose_test_json = crawler.output_dir + "/" \ 132 | + "repo_" + repo_name + "_" \ 133 | + "test_" + str(test_index) + "_"\ 134 | + "infra_" + str(verbosity_index) + "_" \ 135 | + ("" if test_rep_id == "" else test_rep_id + "_") \ 136 | + crawler.TEST_VERBOSE_OUTPUT_JSON 137 | infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS.get(test_infra) 138 | if not infra_verbosity_config: # checks if it's an empty object 139 | print("TEST VERBOSE MODE: unsupported test infra " + test_infra) 140 | test_verbosity_output[test_infra] = { "error": True } 141 | continue 142 | infra_verbosity_args = infra_verbosity_config.get("args", "") 143 | infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end 144 | infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) 145 | infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_command, test_infra, infra_verbosity_args, 146 | verbose_test_json, infra_verbosity_args_pos) 147 | verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) 148 | pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command 149 | with open("package.json", 'w') as f: 150 | json.dump( pkg_json, f) 151 | print("Running verbosity: " + manager + infra_verbosity_command) 152 | # time how long the next line takes 153 | start_time = time.time() 154 | error, output, retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) 155 | end_time = time.time() 156 | # if there's post-processing to be done 157 | if not infra_verbosity_post_proc is None: 158 | for out_file_obj in out_files: 159 | infra_verbosity_post_proc(out_file_obj["output_file"]) 160 | verbosity_index += 1 161 | # get the output 162 | test_verbosity_infra = {} 163 | test_verbosity_infra["command"] = infra_verbosity_command 164 | test_verbosity_infra["output_files"] = out_files 165 | if crawler.VERBOSE_MODE: 166 | test_verbosity_infra["test_debug"] = "\nError output: " + error.decode('utf-8') \ 167 | + "\nOutput stream: " + output.decode('utf-8') 168 | test_verbosity_output[test_infra] = test_verbosity_infra 169 | # put the package.json back 170 | run_command( "mv TEMP_package.json_TEMP package.json") 171 | # not verbose test mode -- just run the normal test command 172 | # if start and end time are both still zero then no instrumented test commands ran 173 | # and so we also rerun here 174 | if (not crawler.TEST_VERBOSE_ALL_OUTPUT) or (start_time == 0 and end_time == 0): 175 | start_time = time.time() 176 | error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) 177 | end_time = time.time() 178 | test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) 179 | # the below info on the test infras etc is independent of verbose mode: just based on the command itself 180 | test_info.set_test_command( test_command) 181 | test_info.compute_test_infras() 182 | test_info.compute_nested_test_commands( test_scripts) 183 | test_info.start_time = start_time 184 | test_info.end_time = end_time 185 | # note: if we're running in verbose mode, then the stats will be that of the last executed verbose mode 186 | # instrumented version of the test command 187 | test_info.compute_test_stats() 188 | if crawler.TEST_VERBOSE_ALL_OUTPUT: 189 | test_info.set_test_verbosity_output(test_verbosity_output) 190 | # if we're not doing any repeats then don't make another layer of jsons 191 | if crawler.TEST_COMMAND_REPEATS == 1: 192 | test_output_rep = test_info.get_json_rep() 193 | else: 194 | test_output_rep[test_rep_id] = test_info.get_json_rep() 195 | test_json_summary[t] = test_output_rep 196 | return( retcode, test_json_summary) 197 | 198 | # instrument the test command specified to make it produce verbose output to a file 199 | def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos): 200 | # replace the output file name with the custom output filename 201 | # add an index to the filename for the 2nd,+ time the filename shows up 202 | # so as to avoid overwriting the files 203 | num_files = 0 204 | new_infra_verbosity_args = "" 205 | output_files = [] 206 | for i, sub in enumerate(infra_verbosity_args.split("$PLACEHOLDER_OUTPUT_FILE_NAME$")): 207 | out_file_object = { "test_script": test_script, "test_infra": test_infra } 208 | # not the file name 209 | if sub != "": 210 | new_infra_verbosity_args += sub 211 | else: 212 | path_index = verbose_test_json.rfind("/") 213 | if path_index == -1: 214 | output_file = "out_" + str(num_files) + "_" + verbose_test_json 215 | new_infra_verbosity_args += output_file 216 | out_file_object["output_file"] = output_file 217 | else: 218 | output_file = verbose_test_json[:path_index] + "/out_" + str(num_files) + "_" + verbose_test_json[path_index + 1:] 219 | print(output_file) 220 | new_infra_verbosity_args += output_file 221 | out_file_object["output_file"] = output_file 222 | output_files += [ out_file_object ] 223 | num_files += 1 224 | infra_verbosity_args = new_infra_verbosity_args 225 | # split into sub-commands 226 | command_split_chars = [ "&&", ";"] 227 | infra_calls = test_script.split(test_infra) 228 | real_calls = [] 229 | for maybe_call in infra_calls: 230 | # if the last char in the string is not whitespace and not a command delimiter, 231 | # and it's not the last string in the split 232 | # then it's a string that is appended to the front of the name of the infra (e.g., "\"jest\"") 233 | # and not a call 234 | # rebuild it 235 | if i < len(infra_calls) - 1 and maybe_call != "" and (not maybe_call[-1].isspace()) and (not any([maybe_call.endswith(s) for s in command_split_chars])): 236 | if len(real_calls) > 0: 237 | real_calls[-1] += test_infra + maybe_call 238 | continue 239 | # if the first char in the string is not whitespace and not a command delimiter, 240 | # and it's not the first string in the split 241 | # then it's a string that is appended to the back of the name of the infra (e.g., jest".config.js") 242 | # and not a call either 243 | # rebuild it 244 | if i > 0 and maybe_call != "" and (not maybe_call[0].isspace()) and (not any([maybe_call.startswith(s) for s in command_split_chars])): 245 | if len(real_calls) > 0: 246 | real_calls[-1] += test_infra + maybe_call 247 | continue 248 | real_calls += [ maybe_call ] 249 | infra_calls = real_calls 250 | instrumented_test_command = [] 251 | for i, infra_call in enumerate(infra_calls): 252 | # if the current call is empty string 253 | # then this is the call to the testing infra and the next is the arguments 254 | # so, skip this one 255 | # if there are no args (i.e. no next string), then just instrument this one 256 | if infra_call == "" and i < len(infra_calls) - 1: 257 | instrumented_test_command += [ "" ] 258 | continue 259 | # if the first call is non-empty and there's more than one call, then it's pre-test-infra and we skip it too 260 | elif len(infra_calls) > 1 and infra_call != "" and i == 0: 261 | instrumented_test_command += [ "" ] 262 | continue 263 | # get the arguments, splitting off from any other non-test commands that might be 264 | # in this command (note: we know all the commands started with test_infra) 265 | end_command_pos = re.search(r'|'.join(command_split_chars), infra_call) 266 | end_command_pos = end_command_pos.start() if not end_command_pos is None else -1 267 | sub_command_args = (infra_call[0:end_command_pos] if end_command_pos > -1 else infra_call).split(" ") 268 | if infra_verbosity_args_pos != -1: 269 | sub_command_args.insert(infra_verbosity_args_pos, infra_verbosity_args) 270 | else: 271 | sub_command_args.append(infra_verbosity_args) 272 | # rebuild the command, re-attaching any extra sub-commands 273 | instrumented_test_command += [ " ".join(sub_command_args) + (infra_call[end_command_pos:] if end_command_pos > -1 else "") ] 274 | return(test_infra.join(instrumented_test_command), output_files) 275 | 276 | def on_diagnose_exit( json_out, crawler, cur_dir, repo_name): 277 | # if we still have the temp package.json, restore it 278 | if os.path.isfile("TEMP_package.json_TEMP"): 279 | run_command( "mv TEMP_package.json_TEMP package.json") 280 | # move back to the original working directory 281 | if repo_name != "": 282 | os.chdir( cur_dir) 283 | if crawler.RM_AFTER_CLONING: 284 | run_command( "rm -rf TESTING_REPOS/" + repo_name) 285 | return( json_out) 286 | 287 | def diagnose_package( repo_link, crawler, commit_SHA=None): 288 | json_out = {} 289 | 290 | repo_name = "" 291 | cur_dir = os.getcwd() 292 | try: 293 | repo_name = repo_link[len(repo_link) - (repo_link[::-1].index("/")):] 294 | except: 295 | print("ERROR cloning the repo -- malformed repo link. Exiting now.") 296 | json_out["setup"] = {} 297 | json_out["setup"]["repo_cloning_ERROR"] = True 298 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 299 | 300 | print("Diagnosing: " + repo_name + " --- from: " + repo_link) 301 | 302 | if not os.path.isdir("TESTING_REPOS"): 303 | os.mkdir("TESTING_REPOS") 304 | os.chdir("TESTING_REPOS") 305 | 306 | # first step: cloning the package's repo 307 | 308 | # if the repo already exists, dont clone it 309 | if not os.path.isdir( repo_name): 310 | print( "Cloning package repository") 311 | error, output, retcode = run_command( "git clone " + repo_link) 312 | if retcode != 0: 313 | print("ERROR cloning the repo. Exiting now.") 314 | json_out["setup"] = {} 315 | json_out["setup"]["repo_cloning_ERROR"] = True 316 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 317 | else: 318 | print( "Package repository already exists. Using existing directory: " + repo_name) 319 | 320 | # diagnose the repo dir 321 | return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=commit_SHA)) 322 | 323 | def diagnose_local_dir(repo_dir, crawler): 324 | json_out = {} 325 | repo_name = "" 326 | cur_dir = os.getcwd() 327 | repo_name = repo_dir.split("/")[-1] 328 | if not os.path.isdir(repo_dir): 329 | print("ERROR using local directory: " + repo_dir + " invalid directory path") 330 | json_out["setup"] = {} 331 | json_out["setup"]["local_dir_ERROR"] = True 332 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 333 | 334 | print("Diagnosing: " + repo_name + " --- from: " + repo_dir) 335 | if not os.path.isdir("TESTING_REPOS"): 336 | os.mkdir("TESTING_REPOS") 337 | os.chdir("TESTING_REPOS") 338 | 339 | # if the repo already exists, dont clone it 340 | if not os.path.isdir( repo_name): 341 | print( "Copying package directory") 342 | error, output, retcode = run_command( "cp -r " + repo_dir + " " + repo_name) 343 | if retcode != 0: 344 | print("ERROR copying the directory. Exiting now.") 345 | json_out["setup"] = {} 346 | json_out["setup"]["local_dir_ERROR"] = True 347 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 348 | else: 349 | print( "Package directory already exists. Using existing directory: " + repo_name) 350 | # diagnose the repo dir 351 | return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir)) 352 | 353 | def diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=None): 354 | # move into the repo and begin testing 355 | os.chdir( repo_name) 356 | 357 | # checkout the specified commit if needed 358 | if commit_SHA: 359 | print("Checking out specified commit: " + commit_SHA) 360 | error, output, retcode = run_command( "git checkout " + commit_SHA) 361 | if retcode != 0: 362 | print("ERROR checking out specified commit. Exiting now.") 363 | json_out["setup"] = {} 364 | json_out["setup"]["repo_commit_checkout_ERROR"] = True 365 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 366 | 367 | 368 | pkg_json = None 369 | try: 370 | with open('package.json') as f: 371 | pkg_json = json.load(f) 372 | except: 373 | print("ERROR reading the package.json. Exiting now.") 374 | json_out["setup"] = {} 375 | json_out["setup"]["pkg_json_ERROR"] = True 376 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 377 | 378 | manager = "" 379 | # if there's custom lock files, copy them into the repo (repo is "." since we're in the repo currently) 380 | if crawler.CUSTOM_LOCK_FILES != []: 381 | for custom_lock in crawler.CUSTOM_LOCK_FILES: 382 | run_command("cp " + custom_lock + " .") 383 | 384 | # first, check if there is a custom install 385 | # this runs custom scripts the same way as the scripts_over_code below; only 386 | # difference is it's before the npm-filter run 387 | if crawler.CUSTOM_SETUP_SCRIPTS != []: 388 | json_out["custom_setup_scripts"] = {} 389 | for script in crawler.CUSTOM_SETUP_SCRIPTS: 390 | print("Running custom setup script script over code: " + script) 391 | json_out["custom_setup_scripts"][script] = {} 392 | error, output, retcode = run_command( script) 393 | script_output = output.decode('utf-8') + error.decode('utf-8') 394 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 395 | script_output = ansi_escape.sub('', script_output) 396 | json_out["custom_setup_scripts"][script]["output"] = script_output 397 | if retcode != 0: 398 | json_out["custom_setup_scripts"][script]["ERROR"] = True 399 | 400 | # check if the install is done (check if there is a node_modules folder) 401 | already_installed = os.path.isdir("node_modules") 402 | 403 | # then, the install 404 | if crawler.DO_INSTALL: 405 | (new_manager, retcode, installer_command, installer_debug) = run_installation( pkg_json, crawler) 406 | if manager == "": 407 | manager = new_manager 408 | json_out["installation"] = {} 409 | json_out["installation"]["installer_command"] = installer_command 410 | if crawler.VERBOSE_MODE: 411 | json_out["installation"]["installer_debug"] = installer_debug 412 | if retcode != 0: 413 | print("ERROR -- installation failed") 414 | json_out["installation"]["ERROR"] = True 415 | if not already_installed: 416 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 417 | else: 418 | json_out["installation"] = { "do_install": False } 419 | 420 | if manager == "": # default the manager to npm if it wasn't already IDd 421 | manager = "npm run " 422 | 423 | if crawler.COMPUTE_DEP_LISTS: 424 | json_out["dependencies"] = {} 425 | if not crawler.DO_INSTALL: 426 | print("Can't get dependencies without installing (do_install: false) -- skipping") 427 | else: 428 | print("Getting dependencies") 429 | dep_list = get_dependencies( pkg_json, manager, crawler.INCLUDE_DEV_DEPS) 430 | json_out["dependencies"]["dep_list"] = dep_list 431 | json_out["dependencies"]["includes_dev_deps"] = crawler.INCLUDE_DEV_DEPS 432 | 433 | # now, proceed with the build 434 | if crawler.TRACK_BUILD: 435 | json_out["build"] = {} 436 | if not crawler.DO_INSTALL and not already_installed: 437 | print("Can't do build without installing (do_install: false and not already installed) -- skipping") 438 | else: 439 | (retcode, build_script_list, build_debug) = run_build( manager, pkg_json, crawler) 440 | json_out["build"]["build_script_list"] = build_script_list 441 | if crawler.VERBOSE_MODE: 442 | json_out["build"]["build_debug"] = build_debug 443 | if retcode != 0: 444 | print("ERROR -- build failed. Continuing anyway...") 445 | json_out["build"]["ERROR"] = True 446 | else: 447 | json_out["build"] = { "track_build": False } 448 | 449 | # then, the testing 450 | if crawler.TRACK_TESTS: 451 | json_out["testing"] = {} 452 | if not crawler.DO_INSTALL and not already_installed: 453 | print("Can't run tests without installing (do_install: false and not already installed) -- skipping") 454 | else: 455 | (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, repo_name, cur_dir) 456 | json_out["testing"] = test_json_summary 457 | else: 458 | json_out["testing"] = { "track_tests": False } 459 | 460 | if crawler.SCRIPTS_OVER_CODE != []: 461 | json_out["scripts_over_code"] = {} 462 | for script in crawler.SCRIPTS_OVER_CODE: 463 | print("Running script over code: " + script) 464 | json_out["scripts_over_code"][script] = {} 465 | error, output, retcode = run_command( script) 466 | script_output = output.decode('utf-8') + error.decode('utf-8') 467 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 468 | script_output = ansi_escape.sub('', script_output) 469 | json_out["scripts_over_code"][script]["output"] = script_output 470 | if retcode != 0: 471 | json_out["scripts_over_code"][script]["ERROR"] = True 472 | if crawler.QL_QUERIES != []: 473 | # first, move back out of the repo 474 | os.chdir(cur_dir) 475 | json_out["QL_queries"] = {} 476 | for query in crawler.QL_QUERIES: 477 | print("Running QL query: " + query) 478 | json_out["QL_queries"][query] = {} 479 | # runQuery.sh does the following: 480 | # - create QL database (with name repo_name) 481 | # - save the result of the query.ql in repo_name__query__results.csv 482 | # - clean up: delete the bqrs file 483 | error, output, retcode = run_command( "src/runQuery.sh TESTING_REPOS/" + repo_name + " " 484 | + repo_name + " " + query + " " + crawler.output_dir) 485 | if crawler.VERBOSE_MODE: 486 | query_output = output.decode('utf-8') + error.decode('utf-8') 487 | ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') 488 | query_output = ansi_escape.sub('', query_output) 489 | json_out["QL_queries"][query]["output"] = query_output 490 | if retcode != 0: 491 | json_out["QL_queries"][query]["ERROR"] = True 492 | if crawler.RM_AFTER_CLONING: 493 | run_command( "rm -rf QLDBs/" + repo_name) 494 | os.chdir( "TESTING_REPOS/" + repo_name) 495 | 496 | 497 | return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) 498 | -------------------------------------------------------------------------------- /tests/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update \ 4 | && DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends python3 python3-pip git vim curl wget 5 | 6 | RUN mkdir -p /home/playground 7 | 8 | COPY . /home/playground/ 9 | 10 | WORKDIR /home/playground 11 | 12 | RUN ./build.sh 13 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | ## Testing 2 | 3 | The tests run on a specific commit SHA of `memfs`. 4 | This docker also has specific, hardcoded versions of nodejs, npm, and yarn to ensure consistency of results. 5 | 6 | ``` 7 | # setup the docker container to run the tests 8 | ./prepTestDocker.sh 9 | 10 | # actually run the tests 11 | ./runTestDocker.sh 12 | ``` 13 | 14 | The test docker is constructed using the version of the npm-filter source code in the `src` directory. This test should be run on any updates to the source code, to ensure that the functionality is preserved. 15 | 16 | The tests run `src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905`, inside the constructed test docker. The output JSON file produced is `diff`ed against the expected output file; any difference would case the test to fail. 17 | 18 | If the tests pass, you should see the following output: 19 | ``` 20 | memfs: test passed 21 | ``` 22 | If the tests fail, then the `diff` will be printed to the terminal. 23 | 24 | If you extend the npm-filter functionality, then [the expected JSON output file](https://github.com/emarteca/npm-filter/blob/master/tests/memfs__results_expected.json) will need to be updated accordingly. 25 | -------------------------------------------------------------------------------- /tests/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # specific versions to lock at 4 | # note that nodejs comes with npm 5 | YARN_VERSION="1.22.17" 6 | NODEJS_VERSION="16.10.0" 7 | 8 | apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential 9 | curl -sL https://deb.nodesource.com/setup_12.x | bash - 10 | apt-get update 11 | 12 | curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - 13 | echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list 14 | apt-get update 15 | 16 | curl https://sh.rustup.rs -sSf | sh -s -- -y 17 | source $HOME/.cargo/env 18 | 19 | pip3 install --upgrade setuptools setuptools_rust wheel 20 | pip3 install scrapy bs4 21 | 22 | rm build.sh 23 | 24 | mkdir TESTING_REPOS 25 | 26 | # install specific version of node 27 | # https://askubuntu.com/questions/957439/how-to-install-a-specific-version-of-node-on-ubuntu-server 28 | wget https://nodejs.org/dist/v${NODEJS_VERSION}/node-v${NODEJS_VERSION}-linux-x64.tar.gz 29 | mkdir -p /opt/nodejs 30 | tar -xvzf node-v${NODEJS_VERSION}-linux-x64.tar.gz -C /opt/nodejs 31 | cd /opt/nodejs 32 | mv node-v${NODEJS_VERSION}-linux-x64 ${NODEJS_VERSION} 33 | ln -s ${NODEJS_VERSION} current 34 | ln -s /opt/nodejs/current/bin/node /usr/bin/node 35 | 36 | # link npm and use it to install yarn and common testing packages 37 | ln -s /opt/nodejs/current/bin/npm /usr/bin/npm 38 | 39 | npm install -g yarn@${YARN_VERSION} 40 | npm install -g jest mocha tap ava nyc 41 | 42 | echo PATH=/opt/nodejs/current/bin/:$PATH >> /root/.bashrc 43 | -------------------------------------------------------------------------------- /tests/memfs__results_expected.json: -------------------------------------------------------------------------------- 1 | { 2 | "installation": { 3 | "installer_command": "yarn" 4 | }, 5 | "build": { 6 | "build_script_list": [ 7 | "build" 8 | ] 9 | }, 10 | "testing": { 11 | "test": { 12 | "num_passing": 265, 13 | "num_failing": 0, 14 | "test_infras": [ 15 | "jest" 16 | ], 17 | "timed_out": false 18 | }, 19 | "test:coverage": { 20 | "num_passing": 265, 21 | "num_failing": 0, 22 | "test_infras": [ 23 | "jest" 24 | ], 25 | "timed_out": false 26 | }, 27 | "tslint": { 28 | "test_linters": [ 29 | "tslint -- linter" 30 | ], 31 | "RUNS_NEW_USER_TESTS": false, 32 | "timed_out": false 33 | } 34 | }, 35 | "metadata": { 36 | "repo_link": "https://github.com/streamich/memfs", 37 | "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905" 38 | } 39 | } -------------------------------------------------------------------------------- /tests/prepTestDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cp -r ../src ../configs/default_filter_config.json . 4 | docker build -t npm-filter . 5 | 6 | rm -r src 7 | rm default_filter_config.json 8 | -------------------------------------------------------------------------------- /tests/runTestDocker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -d local_mount ]; then 4 | mkdir local_mount 5 | fi 6 | 7 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ 8 | -it npm-filter:latest \ 9 | bash -c "./runTests.sh" 10 | rm -r local_mount 11 | -------------------------------------------------------------------------------- /tests/runTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # if we have a custom version of node, add it to the PATH 4 | if [ -d /opt/nodejs/current/bin ]; then 5 | PATH=/opt/nodejs/current/bin/:$PATH 6 | fi 7 | 8 | # memfs 9 | if [[ -d TESTING_REPOS/memfs ]]; then 10 | rm -rf TESTING_REPOS/memfs 11 | fi 12 | 13 | python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 >/dev/null 2>&1 14 | 15 | pkg_diff=`diff memfs__results.json memfs__results_expected.json` 16 | if [ "$pkg_diff" = "" ]; then 17 | echo "memfs: test passed" 18 | else 19 | echo "memfs: test failed" 20 | echo "memfs failing diff: " $pkg_diff 21 | fi 22 | --------------------------------------------------------------------------------