├── .github
    └── workflows
    │   ├── barbosa23.yml
    │   ├── barbosa23flaky.yml
    │   ├── end2end.yml
    │   ├── end2endCustomContainers.yml
    │   └── smoketest.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── Tutorial.md
├── build.sh
├── configs
    ├── QL_output_config.json
    ├── README.md
    ├── build_only_config.json
    ├── custom_install_only.json
    ├── default_filter_config.json
    ├── verbose.json
    └── verbose_only.json
├── get_rel_project_reqs.js
├── input_list_scripts
    ├── README.md
    ├── get_dep_repos.sh
    ├── get_package_deps.js
    ├── get_package_repo_link.py
    └── package.json
├── output_proc_scripts
    ├── README.md
    ├── count_tests_run.py
    └── get_json_results.py
├── qlpack.yml
├── runDocker.sh
├── runParallelGitRepos.sh
├── runParallelGitReposDocker.sh
├── run_verbose_for_repo_and_config.sh
├── src
    ├── TestInfo.py
    ├── diagnose_github_repo.py
    ├── diagnose_npm_package.py
    ├── get_repo_links.py
    ├── middlewares.py
    ├── output_parsing
    │   └── test_output_proc.py
    ├── runQuery.sh
    └── test_JS_repo_lib.py
└── tests
    ├── Dockerfile
    ├── README.md
    ├── build.sh
    ├── memfs__results_expected.json
    ├── prepTestDocker.sh
    ├── runTestDocker.sh
    └── runTests.sh


/.github/workflows/barbosa23.yml:
--------------------------------------------------------------------------------
  1 | name: Test on all of Barbosa23 JS (well, first 250ish)
  2 | 
  3 | on:
  4 |   workflow_dispatch
  5 | 
  6 | env:
  7 |   PROJECTS_JSON: |
  8 |     { projects: [
  9 |       {"project_url": "adriantoine/enzyme-to-json", "project_sha": "7d90cdf5f1878815a46b3a53f4e1e1b63418b38f"},
 10 |       {"project_url": "agenda/agenda", "project_sha": "41a2b3793400073f564c37f7d2d0ec2d7e237bf2"},
 11 |       {"project_url": "airbnb/mocha-wrap", "project_sha": "e6bf4f6cff6d40425b2af323186cc1e69d05a270"},
 12 |       {"project_url": "allenmyao/canvas-graph-creator", "project_sha": "fadcd223a82ff665ee34685a1845d8087b997ee3"},
 13 |       {"project_url": "americanexpress/one-app-cli", "project_sha": "23a992558cc32cdc8a51c11e4fe80c2e2924aaf9"},
 14 |       {"project_url": "amireh/happypack", "project_sha": "e45926e9754f42098d882ff129269b15907ef00e"},
 15 |       {"project_url": "andreypopov/node-red-contrib-deconz", "project_sha": "7a7cdb10e4c9430a10dfe28fc9295abeaf107af5"},
 16 |       {"project_url": "andyholmes/gnome-shell-extension-gsconnect", "project_sha": "370493b76ab4ee7f30ba154b1e5b554a02413703"},
 17 |       {"project_url": "angular-translate/angular-translate", "project_sha": "1114534c064eddfb77fc4243b0deb61c37f5f41f"},
 18 |       {"project_url": "angular-ui/ui-sortable", "project_sha": "e763b5765eea87743c8463ddf045a53015193c20"},
 19 |       {"project_url": "apache/cordova-lib", "project_sha": "797286963eb526a2f5ad673291ff5733d6fb275b"},
 20 |       {"project_url": "apache/incubator-ponymail-foal", "project_sha": "f5addb5824e0c4d08474b22840ce556deade48f6"},
 21 |       {"project_url": "apiaryio/dredd", "project_sha": "5ab7b162afbbd8881cd716f27627dc2d05213eb7"},
 22 |       {"project_url": "apiaryio/dredd-transactions", "project_sha": "57477169b82a2980cb279c80a9caae5825754826"},
 23 |       {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"},
 24 |       {"project_url": "appium/appium-desktop", "project_sha": "12a988aa08b9822e97056a09486c9bebb3aad8fe"},
 25 |       {"project_url": "atom-community/atom", "project_sha": "0f7c5c14eaad9643bdc16cf80579b457baa2dd8a"},
 26 |       {"project_url": "atom/atom", "project_sha": "1c3bd35ce238dc0491def9e1780d04748d8e18af"},
 27 |       {"project_url": "atom/find-and-replace", "project_sha": "7871ad213e2c09f99e003c8f97cd7d4b7f9f2d82"},
 28 |       {"project_url": "aurelia/cli", "project_sha": "82091bbeebcc4b08c9929e37a8cd91c5b5025791"},
 29 |       {"project_url": "Automattic/kue", "project_sha": "c5647b1a8890319169fa4ce2cf4ed4122c1c704a"},
 30 |       {"project_url": "avajs/ava", "project_sha": "568fe40c987dd6c593dfbcf4144d1d1627955d46"},
 31 |       {"project_url": "axa-ch-webhub-cloud/pattern-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"},
 32 |       {"project_url": "axa-ch/patterns-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"},
 33 |       {"project_url": "azachar/protractor-screenshoter-plugin", "project_sha": "989f8e0b52b986f7ddb07831b5b92dca6dceeb07"},
 34 |       {"project_url": "Azure/azure-iot-sdk-node", "project_sha": "450c672001eb96d99587eaeae5fe75ab0912e5d6"},
 35 |       {"project_url": "babel/babel-eslint", "project_sha": "b5b9a09edbac4350e4e51033a4608dd95dad1f67"},
 36 |       {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"},
 37 |       {"project_url": "bbc/simorgh", "project_sha": "4c7e7d1ecc525dd62fb14bd98035a5e739c14290"},
 38 |       {"project_url": "bcgov/name-examination", "project_sha": "b55fc1127e0db98dc4fe780ad80831f4b1a2872e"},
 39 |       {"project_url": "bcoin-org/bcoin", "project_sha": "b0058696cc10c8f9b17190b31fd2cd907d85d047"},
 40 |       {"project_url": "beakerbrowser/beaker", "project_sha": "764bdefeeed9558dbf10aec77df262a896f57236"},
 41 |       {"project_url": "bee-queue/bee-queue", "project_sha": "f6d901308f3b6433f2531edc4a9ac354aab434e5"},
 42 |       {"project_url": "bkimminich/juice-shop", "project_sha": "b156c969d7bc8f24544f162f482c6285f58b4285"},
 43 |       {"project_url": "blocknative/assist", "project_sha": "3fb619e3994752eacbddba4078d2bf0cbc7e2c9c"},
 44 |       {"project_url": "bmazzarol/atom-plsql-linter", "project_sha": "02f6a1d48c4b5dbaa375dfb13d52703fc14b90a0"},
 45 |       {"project_url": "BookMyComics-Developers/BookMyComics", "project_sha": "1efe6adb3490d7f62e7b31e3d75ac15b3b981875"},
 46 |       {"project_url": "brave/brave-browser", "project_sha": "870d381ff8b08cb70d2b9fdea4b320d17bfe68f7"},
 47 |       {"project_url": "brion/ogv.js", "project_sha": "5ce404a6aa8f53b7cef220916b89e613ac58fd17"},
 48 |       {"project_url": "busterjs/buster", "project_sha": "5e20f3e23aeb7ea996be7a669e520c054b8f1035"},
 49 |       {"project_url": "CalebMorris/react-moment-proptypes", "project_sha": "89a61c17250ea7b71d55d2855f6739ae4071529a"},
 50 |       {"project_url": "CartoDB/cartodb", "project_sha": "9518ec6917e4091a56dc7b9d5fbf089bcb003271"},
 51 |       {"project_url": "cerner/terra-core", "project_sha": "15458289ff022f302144932e047a6669b6c461a5"},
 52 |       {"project_url": "cerner/terra-dev-site", "project_sha": "80a0e471548f553b7e58e30a2a0b6e8c0e7682fc"},
 53 |       {"project_url": "cloudfoundry-attic/cf-abacus", "project_sha": "68aad9e2d497335d3a2e0da736bb9f01fe54dfb3"},
 54 |       {"project_url": "cncf/landscapeapp", "project_sha": "62fa27892cd9e9095567c0c7e5d84fd514149cd9"},
 55 |       {"project_url": "codeceptjs/CodeceptJS", "project_sha": "3fb39ae1d4f9b00438b1398cefba0dc677260aeb"},
 56 |       {"project_url": "codetheweb/tuyapi", "project_sha": "905670c7cf7a8ad5756ea08eeca115178121423b"},
 57 |       {"project_url": "covidwatchorg/portal", "project_sha": "95e36eeb777fca76318b5b0680c82f43f502fee3"},
 58 |       {"project_url": "cryptee/web-client", "project_sha": "10f96daff7214a0e5afb71e56eed7256e59e17b0"},
 59 |       {"project_url": "ctrlplusb/react-universally", "project_sha": "83d533a9c780716d18f034f7fb52dbd3a1c4051b"},
 60 |       {"project_url": "cypress-io/cypress-example-recipes", "project_sha": "292325e6638bb4626861bc2f6df99d26ab8e7bff"},
 61 |       {"project_url": "DataDog/dd-trace-js", "project_sha": "71a5288dea5df31c6a492ce22ff8169552548d47"},
 62 |       {"project_url": "DeComX/pantheon", "project_sha": "deepforge-dev - deepforge"},
 63 |       {"project_url": "deepforge-dev/deepforge", "project_sha": "f9cb1ff12644f64c01ca4d71ca66e6e22506b084"},
 64 |       {"project_url": "dhis2/ui", "project_sha": "625c9c9391cdc6f625c927d20a39eef37f550a4a"},
 65 |       {"project_url": "digidem/mapeo-core", "project_sha": "fd782a55cebb5f54a45f2f042287218c849b5f35"},
 66 |       {"project_url": "dmitriz/min-karma", "project_sha": "8f1bcd25315d34a304d0d358166b9cb95a8a7871"},
 67 |       {"project_url": "Dogfalo/materialize", "project_sha": "824e78248b3de81e383445e76ffb04cc3264fe7d"},
 68 |       {"project_url": "domvm/domvm", "project_sha": "67de1a0cdf1879ad87926dafde0b8961f660c906"},
 69 |       {"project_url": "duckduckgo/tracker-radar-collector", "project_sha": "3e9f49e46e5051e9f3d26bcd3be054447af887e4"},
 70 |       {"project_url": "dukecon/dukecon_pwa", "project_sha": "127e8425ccff201a394448864407403c4e80d691"},
 71 |       {"project_url": "dustinspecker/generator-ng-poly", "project_sha": "53f0beec9ad9a33a9f6b47649ca34a4d6bae95f8"},
 72 |       {"project_url": "EFForg/privacybadger", "project_sha": "6f81b217e5717c46867cfec9e9b378da9354a84a"},
 73 |       {"project_url": "elastic/apm-agent-nodejs", "project_sha": "9f13472d69523109d69315c6bb212957e46809cb"},
 74 |       {"project_url": "elifesciences/elife-xpub", "project_sha": "bccea1e199bd213eef8ad03fca33d66727e34ccd"},
 75 |       {"project_url": "ember-app-scheduler/ember-app-scheduler", "project_sha": "fb0b4e0075cf8847664e5459cd59bf74a0a1d379"},
 76 |       {"project_url": "ember-batcher/ember-batcher", "project_sha": "231fb12ae51fde5e42704fa0e1daece8dd371532"},
 77 |       {"project_url": "ember-cli/ember-cli", "project_sha": "b851c0edcae99701335e3e90efe0c225951c4f0b"},
 78 |       {"project_url": "emberjs/ember.js", "project_sha": "3fa9068831b1e3cf8e594647a880adc0809861f3"},
 79 |       {"project_url": "eobrain/bajel", "project_sha": "ecbfe18a990e97f677e522a7240617df29d47cd6"},
 80 |       {"project_url": "eslint/eslint", "project_sha": "9e3d77cba65d0e38e07996e57961fb04f30d9303"},
 81 |       {"project_url": "ether/etherpad-lite", "project_sha": "7656c6b9f195a79bb07bd3b77b55de1393ab71f4"},
 82 |       {"project_url": "ethereum/web3.js", "project_sha": "f8a2533c2b09ce0a62f8414f2f6eed83ab78ca1f"},
 83 |       {"project_url": "ExpressGateway/express-gateway", "project_sha": "a294cac39c98d66f5750c424a24e0bb8ce351c1c"},
 84 |       {"project_url": "facebook/metro", "project_sha": "c6a94bc170cf95a6bb21b5638929ec3311a9a5b7"},
 85 |       {"project_url": "facebook/prepack", "project_sha": "5beedbe85bd5b9d2de1264abafbb3b76f8584297"},
 86 |       {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"},
 87 |       {"project_url": "fastify/fastify", "project_sha": "d1ad6c17ce9731f1bc28377318b010966ca339cd"},
 88 |       {"project_url": "flow-typed/flow-typed", "project_sha": "0e28de5e8a69def522d61f06ddffb624d465bceb"},
 89 |       {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"},
 90 |       {"project_url": "FormidableLabs/nodejs-dashboard", "project_sha": "885fc96fec262b668da9282f57374966f7512b76"},
 91 |       {"project_url": "freeboardgames/FreeBoardGames.org", "project_sha": "b11dbaa3715d71605bced4c8f04a40a79bd7cfef"},
 92 |       {"project_url": "freedomjs/freedom", "project_sha": "9638e840aec9598c4d60383ed22444c525aefbf5"},
 93 |       {"project_url": "freedomjs/freedom-for-chrome", "project_sha": "0154d345e99ac781460a790a31772c4352cb41b6"},
 94 |       {"project_url": "freedomjs/freedom-for-firefox", "project_sha": "3a2922f378a9dbbb58f302b0216b56ec23cf17b3"},
 95 |       {"project_url": "getgauge/taiko", "project_sha": "532c62c69da79852ef3cf8abd2325d2fff903a15"},
 96 |       {"project_url": "GioBonvi/GoogleContactsEventsNotifier", "project_sha": "7e657a9e606f449fef22feae68d448d11083122b"},
 97 |       {"project_url": "google/shaka-player", "project_sha": "a543b80648f429524c522295b0f4f60039c2e0ea"},
 98 |       {"project_url": "googleads/videojs-ima", "project_sha": "11ecbefa37fbdbd6877fece63c38c11338b9e913"},
 99 |       {"project_url": "GoogleChrome/lighthouse", "project_sha": "b981a38e7b3becc512f0a7985b1d2a64320da235"},
100 |       {"project_url": "GoogleChrome/workbox", "project_sha": "ee62b5b5b9ed321af457a2d962b2a34196a80263"},
101 |       {"project_url": "hack4impact-uiuc/life-after-hate", "project_sha": "9cad8555b52ff6bd98c7d15fae456e2f8b7a2a8a"},
102 |       {"project_url": "hapijs/lab", "project_sha": "aaaebb95108d3fdcb264a56e836c3459380844b1"},
103 |       {"project_url": "hapijs/nes", "project_sha": "977750a158e0b0105c719e0e2d4bd354154bf0a8"},
104 |       {"project_url": "hapijs/subtext", "project_sha": "ae0a2dd48ab8c6e2b8ebdebbc31baddb6b4c49b7"},
105 |       {"project_url": "hapipal/hpal", "project_sha": "4661f17ac8bdb1d3915695b2f819ff2336730131"},
106 |       {"project_url": "hapipal/schwifty", "project_sha": "088088572e7aac82b77a78d9c8ed05e7f1d5e957"},
107 |       {"project_url": "Haufe-Lexware/wicked.haufe.io", "project_sha": "1efadeabae7b7ccb4b17473e9aa5d0af60796adb"},
108 |       {"project_url": "hden/node-serf", "project_sha": "d176dede5c87e0285c383f7bbda3848584d6a2ad"},
109 |       {"project_url": "HSLdevcom/transitlog-ui", "project_sha": "316a7843c2a8e6d66db7f4c9181f775f95f926ed"},
110 |       {"project_url": "html-next/vertical-collection", "project_sha": "fd928512a33d44155a724ed65c5ba21cf7950d86"},
111 |       {"project_url": "Human-Connection/Human-Connection", "project_sha": "72a8f3d7f567442ca5e191672abfb47ea1b825a6"},
112 |       {"project_url": "hyperledger/cactus", "project_sha": "334612d251c56811a844b3308dc1561dcd6fc460"},
113 |       {"project_url": "IBM-Cloud/gp-js-client", "project_sha": "8ac9e9b0ebee3264d446d68ff487ef995173bff0"},
114 |       {"project_url": "ikydd/blackat", "project_sha": "26a8ba8dac8be027978b5fc046131936aadb76ec"},
115 |       {"project_url": "IMA-WorldHealth/bhima", "project_sha": "f76ac0085b2566d249cdd6ab135950faf0e10da3"},
116 |       {"project_url": "ing-bank/lion", "project_sha": "02e61285ddc83e4cb2ec7d2acc6d6a6620a94924"},
117 |       {"project_url": "iodide-project/iodide", "project_sha": "f9dd78a725ce1a2aa96784a46b527b740605431b"},
118 |       {"project_url": "ipfs-inactive/js-ipfs-http-client", "project_sha": "995abb41b83c8345b16cba67151e9ccb9cbea4de"},
119 |       {"project_url": "israelroldan/grunt-ssh", "project_sha": "7175b5548291bb2105a33a45d772573cb888430d"},
120 |       {"project_url": "istanbuljs/nyc", "project_sha": "ab7c53b2f340b458789a746dff2abd3e2e4790c3"},
121 |       {"project_url": "jaggedsoft/node-binance-api", "project_sha": "950d773a5f2c3a61c6e29b53e3af57594921a239"},
122 |       {"project_url": "jamesshore/quixote", "project_sha": "6b5c07b4d202d44e0ee6ecd99c22df4547558c17"},
123 |       {"project_url": "jamhall/s3rver", "project_sha": "f834192dbb07da4548b48c95066bae50cfaac819"},
124 |       {"project_url": "JeroenDeDauw/Maps", "project_sha": "f9bec919e77d671c4e96f9aa16d0452d17f700c7"},
125 |       {"project_url": "jivid/akobi", "project_sha": "ccd8d4de55b2066db9c11f9f00ffeed36ea33673"},
126 |       {"project_url": "jorgebucaran/hyperapp", "project_sha": "c3717e3ff78b6fa8663575d34d330d68929a0974"},
127 |       {"project_url": "jrcasso/mean-demo", "project_sha": "31f3e21420fd5ef13cc7555a56e3106a31dd4a36"},
128 |       {"project_url": "json-schema-faker/json-schema-faker", "project_sha": "9bbe0e895cc9ebce939d5f358385f151d72c739c"},
129 |       {"project_url": "jwplayer/jwplayer", "project_sha": "30353cd1e1f3017a96ef2854ef758fb4f479cd7a"},
130 |       {"project_url": "kaliber5/ember-bootstrap", "project_sha": "c92d1898b715da0ebd534a813a4ce592d1ed115c"},
131 |       {"project_url": "kategengler/ember-cli-code-coverage", "project_sha": "46dc079ab518bddc325fb305790d58adf2c28aae"},
132 |       {"project_url": "keystonejs/keystone", "project_sha": "67f0f2ce7fa58288cf06d198e4b1a5c51d265bcf"},
133 |       {"project_url": "kgiszewski/Archetype", "project_sha": "2e0bce99b9f386aa24a56be02fca8cd7388b39bd"},
134 |       {"project_url": "kiwicom/smart-faq", "project_sha": "2131be6290020a11dc6ad236eb82c5bde75945d8"},
135 |       {"project_url": "Lambda-School-Labs/labs-spa-starter", "project_sha": "2d1bbf41db2a97574c62cc3d6745cc0b2e644ead"},
136 |       {"project_url": "lekoder/consul-kv-object", "project_sha": "5cf3c44f416d28d11c567c9caab86b27e3e0f0a0"},
137 |       {"project_url": "liferay/senna.js", "project_sha": "fd89ca02de0ad57e7697c5088f4e490f8d181958"},
138 |       {"project_url": "linkeddata/dokieli", "project_sha": "52f9c3cc8519d45339996f2a926bae18c37bf5d8"},
139 |       {"project_url": "LLK/scratch-vm", "project_sha": "e4bb21f1817a2b7bbca9be19da6eba529291ed0c"},
140 |       {"project_url": "magda-io/magda", "project_sha": "754ec4cf2aff491549007cd82f676da4c3759061"},
141 |       {"project_url": "magento/pwa-studio", "project_sha": "836aa40608465ccc28066d4fbdddee3a6a560b75"},
142 |       {"project_url": "marcos8896/nutrition-care-node-api", "project_sha": "20b08a443d4d7714dc8ea137b3ffcce51f5524c0"},
143 |       {"project_url": "marionettejs/backbone.marionette", "project_sha": "85936fc518dd7bb0934faf231123172e3eee0169"},
144 |       {"project_url": "marklogic-community/marklogic-samplestack", "project_sha": "5449924fe9abd1712d3ef20ca2f25f2e291578e0"},
145 |       {"project_url": "material-components/material-components-web", "project_sha": "a9ff9866f237fbeebe94e655ae578b68ce675a04"},
146 |       {"project_url": "mbland/custom-links", "project_sha": "3e58bb2b4ea335451489d9b81226a414d7352c3f"},
147 |       {"project_url": "mcollina/autocannon", "project_sha": "ba3a2124fa68be6f263e860001be419d71de39d9"},
148 |       {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"},
149 |       {"project_url": "microsoft/ChakraCore", "project_sha": "c3ead3f8a6e0bb8e32e043adc091c68cba5935e9"},
150 |       {"project_url": "mikakaraila/node-red-contrib-opcua", "project_sha": "aec7272f4f7554a7473daf19136e6fa8c9dfc681"},
151 |       {"project_url": "milieuinfo/webcomponent-vl-ui-wizard", "project_sha": "efecc0c4f3659ac1348ae456604534d42e6b90b7"},
152 |       {"project_url": "mishoo/UglifyJS", "project_sha": "f0ca9cfbe65efc919149e7cd74cedd186d6413ee"},
153 |       {"project_url": "mitodl/open-discussions", "project_sha": "462c242eab04f68552e80a6f416c18c4b0b57cb0"},
154 |       {"project_url": "mocha-parallel/mocha-parallel-tests", "project_sha": "d1b2e88fa6bad71d0a5d7487809fcb4be4030b9d"},
155 |       {"project_url": "mohsen1/yawn-yaml", "project_sha": "aab6ee95ead9da9b7f1b1bbfb7325b2e90d7d3f5"},
156 |       {"project_url": "moorara/microservices-demo", "project_sha": "bc16c5eeb6091392e62d0c260d2acfe48aef4b06"},
157 |       {"project_url": "mozilla/blok", "project_sha": "faac2281c48cd226b4fb8c4e22de588a02328c31"},
158 |       {"project_url": "mui-org/material-ui", "project_sha": "6e8b99d133025c9e785a778a183fa81383998a42"},
159 |       {"project_url": "n5ro/aframe-extras", "project_sha": "5c20172a159aba54e7b6f7f243a864f76905448e"},
160 |       {"project_url": "nasa-gibs/worldview", "project_sha": "c4769a03394676dd4ec7126cc14a7c67dc7e4eaf"},
161 |       {"project_url": "NativeScript/nativescript-cli", "project_sha": "eb918011d6f0be9a8ccb6b569628e3960fd4f8b9"},
162 |       {"project_url": "nccgroup/tracy", "project_sha": "6ce4714a3b3b407503cecd8c9842132fe4dc37e4"},
163 |       {"project_url": "neffo/earth-view-wallpaper-gnome-extension", "project_sha": "016c982dccd9e7b454b84e9f50b4accc1b4348d6"},
164 |       {"project_url": "NetsBlox/NetsBlox", "project_sha": "419ca83482c562a0cfa5af1d2dd9907b7387f7ef"},
165 |       {"project_url": "nightwatchjs/nightwatch", "project_sha": "4b09cb57c8a9fb29d6b6795e59c64b4942bddf67"},
166 |       {"project_url": "noble/bleno", "project_sha": "72028bc995d55cb9dcf223f9b0ffce563d091212"},
167 |       {"project_url": "nock/nock", "project_sha": "8a38f41a28b36fef50d5723daa94cf21a6490fc5"},
168 |       {"project_url": "node-alarm-dot-com/homebridge-node-alarm-dot-com", "project_sha": "26516177a2324aa53b0cfbb8af52fb1354be78be"},
169 |       {"project_url": "nodejs/citgm", "project_sha": "460c3a008f1c33bda2e136631d0162479419ed36"},
170 |       {"project_url": "nodejs/node-chakracore", "project_sha": "770c8dcd1bc3e0fce2d4497b4eec3fe49d829d43"},
171 |       {"project_url": "nodejs/undici", "project_sha": "c415fbbb59e2b898c5db6a681265cf3da865d02c"},
172 |       {"project_url": "npm/cli", "project_sha": "29622c1349b38173924058a1fb0ede9edf8a5f6f"},
173 |       {"project_url": "NSWSESMembers/availability-poc", "project_sha": "7ebc17b6005a3c1573e6c68bd5411b0657c98f71"},
174 |       {"project_url": "nwjs-community/nw-builder", "project_sha": "a1d4fb5148255e2b6fa5bce4a2167c9be8cc71d6"},
175 |       {"project_url": "observablehq/plot", "project_sha": "4d3cd1586e7412b95687157d12c792fde84a2229"},
176 |       {"project_url": "ocadotechnology/rapid-router", "project_sha": "38adf70a3e76a05fa814a7d3c0e1c61e4ba125c2"},
177 |       {"project_url": "ONSdigital/eq-author-app", "project_sha": "8bb1621cd4973281730a38378765b1718b08ca54"},
178 |       {"project_url": "Ontotext-AD/graphdb.js", "project_sha": "d0880dabf966e82def44537a720bf620d6d29f5e"},
179 |       {"project_url": "open-wc/open-wc", "project_sha": "57ddb3ccfff6b00468d3a7ebabbc15cfe966f7a9"},
180 |       {"project_url": "OpenEnergyPlatform/oeplatform", "project_sha": "1ce978f8faade3effe4cf7d3eec7522e990df910"},
181 |       {"project_url": "openseadragon/openseadragon", "project_sha": "ebab356c207e626b6622f88ffcb0cd28b918f85d"},
182 |       {"project_url": "openstyles/stylus", "project_sha": "50a0a115d1c6587d221f3253feeb4cb88b6f5336"},
183 |       {"project_url": "Opentrons/opentrons", "project_sha": "f8f7e699d512f59e1a2f4a9969428744e86a6a22"},
184 |       {"project_url": "OpenZeppelin/openzeppelin-contracts", "project_sha": "604025400f9be5c32581bb6ab03a46bbc09c5562"},
185 |       {"project_url": "OriginProtocol/origin", "project_sha": "57c55c023188e3a53cb9ee3dfafe0bf3210e0cf8"},
186 |       {"project_url": "owncloud/contacts", "project_sha": "efb06fef530dbf1812cbb98d651ec87680de97a1"},
187 |       {"project_url": "palantir/eclipse-typescript", "project_sha": "007579ba58d2979a5989caf04733a9d5dfcc56de"},
188 |       {"project_url": "particle-iot/particle-cli", "project_sha": "07dfa4e7d928d9641be368881b2216c6fb017c6c"},
189 |       {"project_url": "perfsonar/toolkit", "project_sha": "d4c8906acdf7d8be49cf37b59939748945e526d9"},
190 |       {"project_url": "pingyhq/pingy-cli", "project_sha": "53721434b698f53ba195c4824ca8d1f87ea8b60c"},
191 |       {"project_url": "poanetwork/tokenbridge", "project_sha": "961b12b9f3545830a04044e109762277efcea6ef"},
192 |       {"project_url": "postmanlabs/newman", "project_sha": "89941554304362d0cfec2914d134f738348b27c5"},
193 |       {"project_url": "postmanlabs/postman-runtime", "project_sha": "7855b3ae5858734bfb6f0c5985592d8b2957f4d1"},
194 |       {"project_url": "pouchdb/pouchdb", "project_sha": "546c8bb696872f86816574d02d47131ace0b4d18"},
195 |       {"project_url": "PowerlineApp/powerline-mobile", "project_sha": "2030817dc80a07f3cfc2129bd830ce33ab50373d"},
196 |       {"project_url": "premasagar/sqwidget", "project_sha": "7edc6d21997bb18da7daa59068926a082028d6f0"},
197 |       {"project_url": "PrismJS/prism", "project_sha": "59e5a3471377057de1f401ba38337aca27b80e03"},
198 |       {"project_url": "probcomp/metaprob", "project_sha": "43c4bea80772ed8b2baa51cd5ac6c593a34a3a8b"},
199 |       {"project_url": "ProjectMirador/mirador", "project_sha": "3c121dbe99bae4eab910cb2df00e93904bc123ea"},
200 |       {"project_url": "Quicksaver/Tab-Groups", "project_sha": "29ea6517e73eb5d58b2f0b9fc2d65d589d910e8a"},
201 |       {"project_url": "regl-project/regl", "project_sha": "3d90d57d473b5dee6680dc97897f4a9fba465501"},
202 |       {"project_url": "reportportal/service-ui", "project_sha": "049abcb8fc70ee131625914e9da4a748e23d2230"},
203 |       {"project_url": "restify/node-restify", "project_sha": "89e7ac81a4cc885d153df6f07d5cf35ed75fd4d0"},
204 |       {"project_url": "rtfeldman/node-test-runner", "project_sha": "16cd4b9c8e5dab3ce297039f5d72d372bdd63de9"},
205 |       {"project_url": "ruiquelhas/blaine", "project_sha": "a69cdad6e59ebb19493018eacb7b7602f2225ce1"},
206 |       {"project_url": "ruiquelhas/copperfield", "project_sha": "fe5629ed8f5edea740ca4917dfac6a779e644b45"},
207 |       {"project_url": "ruiquelhas/electron-recipes", "project_sha": "9bcfc2520ad383c1e5bebe9c427214cab1d0a0da"},
208 |       {"project_url": "ruiquelhas/fischbacher", "project_sha": "35eb4dcf0225a8899e13a3ab63c3e878d9d434ca"},
209 |       {"project_url": "ruiquelhas/henning", "project_sha": "ac75e0b1cebdbb123eccb05277bc5c663f8e6696"},
210 |       {"project_url": "ruiquelhas/houdin", "project_sha": "4a700f66748b3a57a1c1ab6ee7bbe425ce20c526"},
211 |       {"project_url": "ruiquelhas/lafayette", "project_sha": "038578c360b22ff846daa7b3e6e0aeb712b145b2"},
212 |       {"project_url": "ruiquelhas/thurston", "project_sha": "071f9ee5265f64f47335b428a498df22895e549c"},
213 |       {"project_url": "sampotts/plyr", "project_sha": "0c9759455cbfcce888c66925c3b457ce06cee31e"},
214 |       {"project_url": "scalableminds/webknossos", "project_sha": "b91b15ff4180b2288c40ad9e3a86678258dcd5c9"},
215 |       {"project_url": "scality/Arsenal", "project_sha": "96cbaeb821d8045cbe8eabd00092290e13e46784"},
216 |       {"project_url": "SeleniumBuilder/se-builder", "project_sha": "8230ad58a526d3eb905d32a780daeaea1fb56a55"},
217 |       {"project_url": "serverless/serverless", "project_sha": "17d64e6c94b88a5daf36f28a4fa192c231052cfb"},
218 |       {"project_url": "SGrondin/bottleneck", "project_sha": "b83528333ba4d27cf70b81cc2be12e09d7ff692f"},
219 |       {"project_url": "signalapp/Signal-Desktop", "project_sha": "bd14b74e638dce03928e08ffbe2a83a6c047406e"},
220 |       {"project_url": "sindresorhus/npm-name", "project_sha": "7aef07b69ed35f584e0a8bf6cece96750becaf00"},
221 |       {"project_url": "sindresorhus/serialize-error", "project_sha": "a212a8c3902fa1ff1fdef8f7625dd0cc6d6e89a1"},
222 |       {"project_url": "skarfacegc/FlowTrack2", "project_sha": "990a2566f30b8dd84a61ea1ff6f58076016a7796"},
223 |       {"project_url": "solid/node-solid-server", "project_sha": "bbb8d78df7e8908e20e3052ae6655722aa6fa6de"},
224 |       {"project_url": "SolidarityEconomyAssociation/sea-map", "project_sha": "17fa76b9b4070354c31faae81ba0162b8f27bf1b"},
225 |       {"project_url": "soscripted/sox", "project_sha": "4be396373c06bb8340d740089018e364729bec70"},
226 |       {"project_url": "sourcecred/sourcecred", "project_sha": "3da222ebe44c110f265063cfa99316ed5c1fa0b3"},
227 |       {"project_url": "spark-notebook/spark-notebook", "project_sha": "69174f3923d0564d2078c0e0c70125245157d5b5"},
228 |       {"project_url": "stanford-oval/thingengine-core", "project_sha": "b69f7b0166d256428a08ba2dac3fc3ca8dddf611"},
229 |       {"project_url": "stealjs/steal-tools", "project_sha": "05f60d58e3ee56dbb8428c83121fdb6ee2b1825c"},
230 |       {"project_url": "stellar/js-stellar-sdk", "project_sha": "52947e81e487edf179a6003efb40a1425a4f7ff2"},
231 |       {"project_url": "stimulusreflex/stimulus_reflex", "project_sha": "52aa993165a656eccbe2cefaca9f5388509d014d"},
232 |       {"project_url": "streamr-dev/network", "project_sha": "4cdabba71db0a6c531c63368d1a78361fff01dce"},
233 |       {"project_url": "strongloop/loopback", "project_sha": "13371fd2a138a6f39db77e5a455b3170e5d4a0f5"},
234 |       {"project_url": "studentinsights/studentinsights", "project_sha": "4bb09f97eb9c0473a9ac6ee076171de12855e721"},
235 |       {"project_url": "sumup-oss/circuit-ui", "project_sha": "00ceacbd82b6cd3a71592ea9d2da5b95892f965b"},
236 |       {"project_url": "superscriptjs/superscript", "project_sha": "5e3e1b51654a54518dfada17c0cd9dc146c8e48a"},
237 |       {"project_url": "sveltejs/kit", "project_sha": "c4476c6d106b41dd8e6badbbdd0128b78be49d5c"},
238 |       {"project_url": "tarantool/graphql.0", "project_sha": "05f39946299cb2f35a97be326b992aace0205eaf"},
239 |       {"project_url": "testem/testem", "project_sha": "42fe29451b187bd1cd1e546228fa1bfbe11084f3"},
240 |       {"project_url": "thaliproject/jxcore", "project_sha": "d3ccd242a592416b6537dfea8ce539bd6208dd54"},
241 |       {"project_url": "thejoshwolfe/snakefall", "project_sha": "62bdfe3718f86ef85fc8c11e600bf621fa2a586c"},
242 |       {"project_url": "themgoncalves/react-loadable-ssr-addon", "project_sha": "2036a6f12e9048d8a6e3eb0a8097455fa0fe1ebc"},
243 |       {"project_url": "TheScienceMuseum/collectionsonline", "project_sha": "ef486c650bce9f2dccf25b7188dbe986d4b63c3c"},
244 |       {"project_url": "tmijs/tmi.js", "project_sha": "3904ae743a12b984aa1a175740e8b5bae08a03e4"},
245 |       {"project_url": "transloadit/uppy", "project_sha": "f07697e7f45e471ca16bac8751fa7221d9445605"},
246 |       {"project_url": "tristanHessell/mess-around", "project_sha": "19cdf7aa58eaf165a88ac7a3954fc7a33e5685bc"},
247 |       {"project_url": "trufflesuite/truffle", "project_sha": "0f17cf9680ac0dc7aa6a314ad3b78ad569daa896"},
248 |       {"project_url": "TryGhost/Ghost", "project_sha": "4da658e72ad42cf251e4fb100ca651a7d4dca79e"},
249 |       {"project_url": "tubbo/openrct2-benchwarmer", "project_sha": "504d75bfaf1b158dbe23e4bbfb926502189a0ff6"},
250 |       {"project_url": "tulios/kafkajs", "project_sha": "ff3b1117f316d527ae170b550bc0f772614338e9"},
251 |       {"project_url": "TypeStrong/ts-loader", "project_sha": "cf5326d9b5f1b804ff8d817f88fb127bc45ad9d1"},
252 |       {"project_url": "uber/baseweb", "project_sha": "65c791a6b5ac50722f34e2a7b1282b08c539f58a"},
253 |       {"project_url": "usdigitalresponse/neighbor-express", "project_sha": "130d9edd9ac09f2a8aa947b0d21f054d4dfc0462"},
254 |       {"project_url": "vega/vega", "project_sha": "b45cf431cd6c0d0c0e1567f087f9b3b55bc236fa"},
255 |       {"project_url": "video-dev/hls.js", "project_sha": "59d421479b5002993a5f3b36d4505adff3209fb5"},
256 |       {"project_url": "visgl/luma.gl", "project_sha": "044c0ef5f767cd56974e30475a30dd3f24305983"},
257 |       {"project_url": "w3c/aria-practices", "project_sha": "4adb78ea96b22db559577aa6ed64c9059596ab4a"},
258 |       {"project_url": "waiterio/api", "project_sha": "9948b542f5da1957c3f656d959c4f5957d364eb1"},
259 |       {"project_url": "web-animations/web-animations-js-legacy", "project_sha": "6a1c45473f9ba2db1ccad34f879bca829f77264d"},
260 |       {"project_url": "webdriverio/cucumber-boilerplate", "project_sha": "f91d34ff0bf9112d02830dc474f1a97ff6e8d9d3"},
261 |       {"project_url": "webex/webex-js-sdk", "project_sha": "cc743f187c646290dab21322431cbf8f1ce771a2"},
262 |       {"project_url": "webpack/webpack", "project_sha": "16143f5fa835ad8c7181b8aeedc52f9cdd0fd39d"},
263 |       {"project_url": "webpack/webpack-cli", "project_sha": "4e1c45ad8de888dea13247855c78848632475653"},
264 |     ]}
265 | 
266 | jobs:
267 |   build-matrix:
268 |     runs-on: ubuntu-latest
269 |     outputs:
270 |       matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }}
271 |     steps:
272 |       - id: set-matrix
273 |         run: |
274 |           echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT
275 |           echo $PROJECTS_JSON >> $GITHUB_OUTPUT
276 |           echo "__EOF__" >> $GITHUB_OUTPUT
277 |   execute:
278 |     needs: [build-matrix]
279 |     strategy:
280 |       matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }}
281 |       fail-fast: false
282 |     uses: ./.github/workflows/end2end.yml
283 |     with:
284 |       project_url: ${{ matrix.projects.project_url }}
285 |       project_sha: ${{ matrix.projects.project_sha }}
286 | 


--------------------------------------------------------------------------------
/.github/workflows/barbosa23flaky.yml:
--------------------------------------------------------------------------------
 1 | name: Test on Barbosa23 JS projects with >=5 flaky tests
 2 | 
 3 | on:
 4 |   push
 5 | 
 6 | env:
 7 |   PROJECTS_JSON: |
 8 |     { projects: [
 9 |       {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"},
10 |       {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"},
11 |       {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"},
12 |       {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"},
13 |       {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"},
14 |       {"project_url": "yui/yui3", "project_sha": "25264e3629b1c07fb779d203c4a25c0879ec862c"}
15 |     ]}
16 | 
17 | jobs:
18 |   build-matrix:
19 |     runs-on: ubuntu-latest
20 |     outputs:
21 |       matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }}
22 |     steps:
23 |       - id: set-matrix
24 |         run: |
25 |           echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT
26 |           echo $PROJECTS_JSON >> $GITHUB_OUTPUT
27 |           echo "__EOF__" >> $GITHUB_OUTPUT
28 |   execute:
29 |     needs: [build-matrix]
30 |     strategy:
31 |       matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }}
32 |       fail-fast: false
33 |     uses: ./.github/workflows/end2endCustomContainers.yml
34 |     with:
35 |       project_url: ${{ matrix.projects.project_url }}
36 |       project_sha: ${{ matrix.projects.project_sha }}
37 | 


--------------------------------------------------------------------------------
/.github/workflows/end2end.yml:
--------------------------------------------------------------------------------
  1 | name: Test NPMFilter End to End on a Project 
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |         project_url:
  7 |             description: 'GitHub suffix of project to test (username/project)'
  8 |             required: true
  9 |             type: string
 10 |         project_sha:
 11 |             description: 'SHA of project to test'
 12 |             required: true
 13 |             type: string
 14 |   workflow_call:
 15 |     inputs:
 16 |         project_url:
 17 |             description: 'GitHub suffix of project to test (username/project)'
 18 |             required: true
 19 |             type: string
 20 |         project_sha:
 21 |             description: 'SHA of project to test'
 22 |             required: true
 23 |             type: string
 24 | jobs:
 25 |   execute:
 26 |     runs-on: self-hosted
 27 | 
 28 |     steps:
 29 |       - name: Checkout code
 30 |         uses: actions/checkout@v3
 31 |       - name: Build NPMFilter container
 32 |         run: docker build -t npmfilter .
 33 |       - name: Run NPMFilter
 34 |         id: run-npm-filter
 35 |         env:
 36 |             SHA: ${{ inputs.project_sha }}
 37 |             URL: ${{ inputs.project_url }}
 38 |             DOCKER_IMAGE: npmfilter:latest
 39 |         run: |
 40 |             IFS="/" read -r -a projectArray <<< "$URL"
 41 |             OrgName=${projectArray[0]} 
 42 |             ProjectName=${projectArray[1]}
 43 |             LogDir=${URL//\//-}
 44 |             echo "LogDir=$LogDir" >> $GITHUB_OUTPUT
 45 | 
 46 |             echo "Running NPMFilter on $OrgName/$ProjectName@$SHA"
 47 | 
 48 |             mkdir -p docker_configs/
 49 |             cat >docker_configs/debug_filter_config.json <<EOL
 50 |             {
 51 |               "install": {
 52 |                       "timeout": 1000,
 53 |                       "do_install": true
 54 |               },
 55 |               "dependencies": {
 56 |                       "track_deps": false,
 57 |                       "include_dev_deps": false
 58 |               },
 59 |               "build": {
 60 |                       "track_build": true,
 61 |                       "tracked_build_commands": ["build", "compile", "init"],
 62 |                       "timeout": 1000
 63 |               },
 64 |               "test": {
 65 |                       "track_tests": true,
 66 |                       "test_verbose_all_output": {
 67 |                         "do_verbose_tracking": true,
 68 |                         "verbose_json_output_file": "verbose_test_report.json"
 69 |                       },                  
 70 |                       "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench",
 71 |                                                                         "mocha", "jest", "ava", "tap", "jasmine"],
 72 |                       "timeout": 1000
 73 |               },
 74 |               "meta_info": {
 75 |                       "VERBOSE_MODE": true,
 76 |                       "ignored_commands": ["watch", "debug"],
 77 |                       "ignored_substrings": ["--watch", "nodemon"],
 78 |                       "rm_after_cloning": false,
 79 |                       "scripts_over_code": [ ],
 80 |                       "QL_queries": [ ]
 81 |               }
 82 |             }
 83 |             EOL
 84 | 
 85 |             # Run NPMFilter
 86 |             ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/$URL $SHA --config docker_configs/debug_filter_config.json
 87 | 
 88 |             # Get tests overview
 89 |             python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/ > tests-overview.csv
 90 | 
 91 |             # Check if tests were found
 92 |             TestData=$(cat tests-overview.csv)
 93 |             IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/)
 94 |             TestsRun=${testCount[0]}
 95 |             if [ $TestsRun -le 2 ]; then
 96 |                 echo "ERROR: No tests found."
 97 |                 exit -1
 98 |             else
 99 |                 echo "OK: ${TestsRun} tests found!"
100 |             fi
101 | 
102 |             echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT"
103 |       - name: Upload output
104 |         uses: actions/upload-artifact@v2
105 |         with:
106 |             name: ${{ steps.run-npm-filter.outputs.LOGNAME }}
107 |             path: npm_filter_docker_results


--------------------------------------------------------------------------------
/.github/workflows/end2endCustomContainers.yml:
--------------------------------------------------------------------------------
  1 | name: Test NPMFilter End to End on a Project with custom-built containers per-project 
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |         project_url:
  7 |             description: 'GitHub suffix of project to test (username/project)'
  8 |             required: true
  9 |             type: string
 10 |         project_sha:
 11 |             description: 'SHA of project to test'
 12 |             required: true
 13 |             type: string
 14 |   workflow_call:
 15 |     inputs:
 16 |         project_url:
 17 |             description: 'GitHub suffix of project to test (username/project)'
 18 |             required: true
 19 |             type: string
 20 |         project_sha:
 21 |             description: 'SHA of project to test'
 22 |             required: true
 23 |             type: string
 24 | jobs:
 25 |   execute:
 26 |     runs-on: self-hosted
 27 | 
 28 |     steps:
 29 |       - name: Checkout code
 30 |         uses: actions/checkout@v3
 31 |       - name: Build NPMFilter container
 32 |         run: |
 33 |            if [ -f "project-overrides/${OrgName}-${ProjectName}.sh" ]; then
 34 |             CUSTOM_INSTALL_SCRIPT="--build-arg CUSTOM_INSTALL_SCRIPT=project-overrides/${OrgName}-${ProjectName}.sh"
 35 |            fi
 36 |            docker build -t npmfilter --build-arg REPO_LINK=https://github.com/${{ inputs.project_url }} --build-arg REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT .
 37 |       - name: Run NPMFilter
 38 |         id: run-npm-filter
 39 |         env:
 40 |             SHA: ${{ inputs.project_sha }}
 41 |             URL: ${{ inputs.project_url }}
 42 |             DOCKER_IMAGE: npmfilter:latest
 43 |         run: |
 44 |             IFS="/" read -r -a projectArray <<< "$URL"
 45 |             OrgName=${projectArray[0]} 
 46 |             ProjectName=${projectArray[1]}
 47 |             LogDir=${URL//\//-}
 48 |             echo "LogDir=$LogDir" >> $GITHUB_OUTPUT
 49 | 
 50 |             echo "Running NPMFilter on $OrgName/$ProjectName@$SHA"
 51 | 
 52 |             mkdir -p docker_configs/
 53 |             cat >docker_configs/debug_filter_config.json <<EOL
 54 |             {
 55 |               "install": {
 56 |                       "timeout": 1000,
 57 |                       "do_install": true
 58 |               },
 59 |               "dependencies": {
 60 |                       "track_deps": false,
 61 |                       "include_dev_deps": false
 62 |               },
 63 |               "build": {
 64 |                       "track_build": true,
 65 |                       "tracked_build_commands": ["build", "compile", "init"],
 66 |                       "timeout": 1000
 67 |               },
 68 |               "test": {
 69 |                       "track_tests": true,
 70 |                       "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench",
 71 |                                                                         "mocha", "jest", "ava", "tap", "jasmine"],
 72 |                       "test_verbose_all_output": {
 73 |                         "do_verbose_tracking": true,
 74 |                         "verbose_json_output_file": "verbose_test_report.json"
 75 |                       },                                                                    
 76 |                       "timeout": 1000
 77 |               },
 78 |               "meta_info": {
 79 |                       "VERBOSE_MODE": true,
 80 |                       "ignored_commands": ["watch", "debug"],
 81 |                       "ignored_substrings": ["--watch", "nodemon"],
 82 |                       "rm_after_cloning": false,
 83 |                       "scripts_over_code": [ ],
 84 |                       "QL_queries": [ ]
 85 |               }
 86 |             }
 87 |             EOL
 88 | 
 89 |             CUR_DIR=$(pwd)
 90 |             # Run NPMFilter
 91 |             ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/$URL $SHA --config docker_configs/debug_filter_config.json
 92 | 
 93 |             # Get tests overview
 94 |             python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/ > tests-overview.csv
 95 | 
 96 |             # Check if tests were found
 97 |             TestData=$(cat tests-overview.csv)
 98 |             IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/)
 99 |             TestsRun=${testCount[0]}
100 |             if [ $TestsRun -le 2 ]; then
101 |                 echo "ERROR: No tests found."
102 |                 exit -1
103 |             else
104 |                 echo "OK: ${TestsRun} tests found!"
105 |             fi
106 | 
107 |             echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT"
108 |       - name: Upload output
109 |         uses: actions/upload-artifact@v2
110 |         with:
111 |           name: ${{ steps.run-npm-filter.outputs.LOGNAME }}
112 |           path: npm_filter_docker_results


--------------------------------------------------------------------------------
/.github/workflows/smoketest.yml:
--------------------------------------------------------------------------------
 1 | name: Test NPMFilter End to End on a toy project
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 | env:
 7 |   PROJECTS_JSON: |
 8 |     { projects: [
 9 |     {"project_url": "mtiller/ts-jest-sample", "project_sha": "6739c576d4590c53296f3e4fcdf3074e582ae297"},
10 |     ]}
11 | 
12 | jobs:
13 |   build-matrix:
14 |     runs-on: ubuntu-latest
15 |     outputs:
16 |       matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }}
17 |     steps:
18 |       - id: set-matrix
19 |         run: |
20 |           echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT
21 |           echo $PROJECTS_JSON >> $GITHUB_OUTPUT
22 |           echo "__EOF__" >> $GITHUB_OUTPUT
23 |   execute-specialized-container:
24 |     needs: [build-matrix]
25 |     strategy:
26 |       matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }}
27 |       fail-fast: false
28 |     uses: ./.github/workflows/end2endCustomContainers.yml
29 |     with:
30 |       project_url: ${{ matrix.projects.project_url }}
31 |       project_sha: ${{ matrix.projects.project_sha }}
32 |   execute-generic-container:
33 |     needs: [build-matrix]
34 |     strategy:
35 |       matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }}
36 |       fail-fast: false
37 |     uses: ./.github/workflows/end2end.yml
38 |     with:
39 |       project_url: ${{ matrix.projects.project_url }}
40 |       project_sha: ${{ matrix.projects.project_sha }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | TESTING_REPOS/*
 2 | QLDBs/*
 3 | items.json
 4 | *__page_data.html
 5 | *__results.json
 6 | *_verbose_test_report.json
 7 | local_mount/*
 8 | **/node_modules
 9 | 
10 | tests/diagnose-npm-package.py
11 | tests/TESTING_REPOS/*
12 | tests/default_filter_config.json
13 | tests/local_mount/*
14 | 
15 | input_list_scripts/*_deps_repos.txt
16 | 
17 | **/__pycache__/*
18 | **/.cache/*
19 | nohup.out
20 | job.log
21 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | ARG DEBIAN_FRONTEND=noninteractive
 3 | 
 4 | # build arg: setting up for a specific repo? at a specific commit? custom install script?
 5 | ARG REPO_LINK
 6 | ARG REPO_COMMIT
 7 | # placeholder: if this arg isn't specified, copy over the readme file in configs (can't copy no source, RIP)
 8 | ARG CUSTOM_INSTALL_SCRIPT=configs/README.md
 9 | 
10 | RUN mkdir -p /home/npm-filter/results
11 | RUN mkdir /home/npm-filter/src
12 | RUN mkdir /home/npm-filter/configs
13 | 
14 | COPY src /home/npm-filter/src
15 | # copy the custom install script if it exists
16 | RUN echo $CUSTOM_INSTALL_SCRIPT
17 | COPY ${CUSTOM_INSTALL_SCRIPT} configs/ /home/npm-filter/configs/
18 | # delete the config readme: we don't need this in the docker. and it's a flag for no-custom-install 
19 | # since the readme is the default for custom install
20 | RUN rm /home/npm-filter/configs/README.md
21 | # and name it the custom_install_script
22 | RUN if [ -f /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} ] ; then mv /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} /home/npm-filter/configs/custom_install_script ; fi
23 | COPY *.sh /home/npm-filter/
24 | COPY get_rel_project_reqs.js /home/npm-filter
25 | 
26 | RUN apt-get update \
27 | 	&& apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel
28 | 
29 | RUN apt -y install python3-pip
30 | RUN pip3 install bs4 scrapy xmltodict pandas
31 | 
32 | WORKDIR /home/npm-filter
33 | 
34 | RUN git config --global http.sslVerify "false"
35 | RUN ./build.sh $REPO_LINK $REPO_COMMIT
36 | # source the env variables produced by the build script (node version, etc)
37 | RUN . /envfile
38 | 
39 | # add a default command for running the tests for repo_link and commit provided
40 | # this runs in verbose mode
41 | # need to use ENV instead of ARG in the CMD b/c docker is 10/10
42 | ENV ENV_REPO_COMMIT=$REPO_COMMIT
43 | ENV ENV_REPO_LINK=$REPO_LINK
44 | # gotta source our env vars so the command can run and use npm/node/etc :-)
45 | CMD . /envfile; ./run_verbose_for_repo_and_config.sh $ENV_REPO_LINK $ENV_REPO_COMMIT


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Ellen Arteca, Alexi Turcotte
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # npm-filter 
  2 | This tool takes a user-specified set of JavaScript/TypeScript packages, and installs/builds them. \
  3 | The primary use case is to automatically determine:
  4 | * what the test commands are
  5 | * what testing infrastructure is used
  6 | * how many passing and failing tests there are
  7 | 
  8 | Users can also specify:
  9 | * custom scripts, or
 10 | * [CodeQL](https://codeql.github.com/) static analyses
 11 | to be run over the source code of the package.
 12 | 
 13 | ## Usage options
 14 | This tool can either take packages specified as GitHub repo links, or as npm packages.
 15 | 
 16 | ### Running over GitHub repo links
 17 | To run the tool over GitHub repo links, use the [`diagnose_github_repo.py` script](https://github.com/emarteca/npm-filter/blob/master/src/diagnose_github_repo.py), called as follows:
 18 | ```
 19 | python src/diagnose_github_repo.py 
 20 | 			[--repo_list_file [rlistfile]] 
 21 | 			[--repo_link [rlink]] 
 22 | 			[--repo_link_and_SHA [rlink_and_SHA]] 
 23 | 			[--repo_local_dir [path_to_local_dir]]
 24 | 			[--config [config_file]]
 25 |                         [--output_dir [output_dir]]
 26 | ```
 27 | 
 28 | #### Arguments
 29 | All arguments are optional, although the tool will not do anything if no repo links are specified. So effectively, at least one of the three repo-link-specifying arguments must be specified for the tool to run.
 30 | * `--repo_list_file [rlistfile]`: a file containing a list of GitHub repo links to be analyzed. \
 31 | 	Each line of the input file must specify one repo link, with an optional whitespace delimited commit SHA to check the repo out at.
 32 | 	For example, a valid input file could be:
 33 | 	```
 34 | 	https://github.com/expressjs/body-parser 	d0a214b3beded8a9cd2dcb51d355f92c9ead81d4
 35 | 	https://github.com/streamich/memfs
 36 | 	```
 37 | * `--repo_link [rlink]`: a link to a single GitHub repo to be analyzed, e.g., `https://github.com/expressjs/body-parser`
 38 | * `--repo_link_and_SHA [rlink_and_SHA]`: a link to a single GitHub repo to be analyzed, followed by a space-delimited commit SHA to analyze the repo at, e.g., `https://github.com/expressjs/body-parser 	d0a214b3beded8a9cd2dcb51d355f92c9ead81d4`
 39 | * `repo_local_dir`: path to a local directory containing the source code of a repo/package to be diagnosed
 40 | * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) 
 41 | * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output))
 42 | 
 43 | ### Running over npm packages
 44 | To run the tool over npm packages, use the [`diagnose_npm_package.py` script](https://github.com/emarteca/npm-filter/blob/master/src/diagnose_npm_package.py), called as follows:
 45 | ```
 46 | python src/diagnose_npm_package.py
 47 | 			--packages [list_of_packages]
 48 | 			[--config [config_file]]
 49 | 			[--html [html_file]]
 50 | 			[--output_dir [output_dir]]
 51 | ```
 52 | The back end of the npm package analyzer is a web scraper: given the name of an npm package, it finds the associated repository link on the npm page so that it can analyze the source code. This tool has some custom middleware to get around the rate limiting on the npm site, but if you are analyzing a large number of packages you will still see a significant performance hit compared to running on the GitHub repos directly. 
 53 | 
 54 | #### Arguments
 55 | * `--packages [list_of_packages]`: list of npm packages to analyze. This is a required argument, and at least one package must be passed.
 56 | * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) 
 57 | * `--html [html_file]`: path to an html file that represents the npm page for the package that is specified to be analyzed. This option only works for one package, so if you want to use this option on multiple packages you'll need to call the tool in sequence for each one. 
 58 | * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output)) 
 59 | 
 60 | ### Configuration file
 61 | If you want to customize the behaviour of the tool, you can provide a custom configuration file. All fields in the configuration file are optional -- if not provided, defaults will be used. The [README in the configuration file directory](https://github.com/emarteca/npm-filter/tree/master/configs) goes through all the available options.
 62 | 
 63 | ### Output
 64 | The result of all the package diagnostics are output to a JSON file. The layout of the output is similar to that of the configuration file. 
 65 | The output is organized into the following top-level fields in the JSON, in order:
 66 | * `setup`: an object with fields that are initialized in the presence of different setup errors that prevent the source code from being properly set up. For example, if the repo link is invalid (or if it can't be found on an npm package page), if there is an error checking out the specified commit, or if there is an error loading the `package.json`.
 67 | * `installation`: an object listing the installer command for the package, and/or the presence of any errors in installation that prevent the analysis from continuing
 68 | * `dependencies`: an object listing the dependencies of the package, if the configuration specified that they should be tracked
 69 | * `build`: an object listing the build commands (in order, and if any) for the package, and/or the presence of any errors in the build commands that prevent the analysis from continuing
 70 | * `testing`: an object with fields for each of the test commands in the package. The test commands are those specified in the configuration file. \
 71 | 	For each test command, the tool lists: 
 72 | 	* if it is a linter or a coverage tool, and if so what tool (`test_linters`, `test_coverage_tools`)
 73 | 	* if it's not a linter or coverage tool, what testing infrastructure is being used (`test_infras`)
 74 | 	* whether or not it runs new user tests (this is false in test commands that only call other test commands, or test commands that don't run any tests explicitly (e.g., linters, coverage tools) (`RUNS_NEW_USER_TESTS`)
 75 | 	* if it runs other test commands, then a list of these commands are included (`nested_test_commands`)
 76 | 	* whether or not it timed out (`timed_out`)
 77 | 	* if it does run new user tests, then the number of passing and number of failing tests (`num_passing`, `num_failing`)
 78 |     * if verbose testing is specified as an option, then there will be an additional file of extra test output produced
 79 | * `scripts_over_code`: an object with fields for each of the scripts run over the package source code. For each script, the tool lists its output and if there was an error.
 80 | * `QL_queries`: an object with fields for each of the QL queries run over the package source code. For each script, the tool lists the output (if running in verbose mode), and if there was an error.
 81 | * `metadata`: an object with fields for some metadata about the package: repository link, commit SHA if one was specified
 82 | 
 83 | For example, the output of running `diagnose_github_repo` on `https://github.com/expressjs/body-parser` at commit SHA `d0a214b3beded8a9cd2dcb51d355f92c9ead81d4` with the default configuration file is as follows:
 84 | ```
 85 | {
 86 |     "installation": {
 87 |         "installer_command": "npm install"
 88 |     },
 89 |     "build": {
 90 |         "build_script_list": []
 91 |     },
 92 |     "testing": {
 93 |         "lint": {
 94 |             "test_linters": [
 95 |                 "eslint -- linter"
 96 |             ],
 97 |             "RUNS_NEW_USER_TESTS": false,
 98 |             "timed_out": false
 99 |         },
100 |         "test": {
101 |             "num_passing": 231,
102 |             "num_failing": 0,
103 |             "test_infras": [
104 |                 "mocha"
105 |             ],
106 |             "timed_out": false
107 |         },
108 |         "test-ci": {
109 |             "test_coverage_tools": [
110 |                 "nyc -- coverage testing"
111 |             ],
112 |             "RUNS_NEW_USER_TESTS": false,
113 |             "timed_out": false
114 |         },
115 |         "test-cov": {
116 |             "test_coverage_tools": [
117 |                 "nyc -- coverage testing"
118 |             ],
119 |             "RUNS_NEW_USER_TESTS": false,
120 |             "timed_out": false
121 |         }
122 |     },
123 |     "scripts_over_code": {},
124 |     "QL_queries": {},
125 |     "metadata": {
126 |         "repo_link": "https://github.com/expressjs/body-parser",
127 |         "repo_commit_SHA": "d0a214b3beded8a9cd2dcb51d355f92c9ead81d4"
128 |     }
129 | }
130 | ```
131 | 
132 | #### QL Query output
133 | The output of each QL query is saved to a CSV file in the same directory as the JSON output, named `[package name]__[query name]__results.csv`. For example, if you run a query `myQuery.ql` over `body-parser`, the query results file will be `body-parser__myQuery__results.csv`.
134 | 
135 | ### Running with docker
136 | To be safe, you should probably run any untrusted code in a sandbox.
137 | Since the entire point of this tool is to run code from a set of packages/projects you didn't write, we assume most of this code will fall into the untrusted category.
138 | 
139 | We host the generic docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below.
140 | 
141 | The generic docker container runs on any package or repo specified.
142 | However, it is pre-built with default versions of node and npm.
143 | There is also the option to build a _repo-specific_ docker container. 
144 | In this case, the container is built with the particular version of node and npm specified in the repo's `package.json` configuration file.
145 | The container is also pre-built with the install and build phases of `npm-filter` run, so that you can then run the tests in the container without waiting for any setup.
146 | 
147 | #### Building a container-specific docker
148 | If you want to build a container specific to a particular repo, use the following command:
149 | ```
150 | # general use
151 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=[github link to repo] [--build-arg REPO_COMMIT=[specific commit SHA]]
152 | 
153 | # specific example for memfs
154 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs 
155 | 
156 | # another example, for memfs at a specific commit
157 | docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs --build-arg REPO_COMMIT=863f373185837141504c05ed19f7a253232e0905
158 | ```
159 | 
160 | #### Building generic docker (if you've updated the npm-filter source code)
161 | Note: you don't need to do this if you're using npm-filter out of the box. 
162 | In that case, you'll pull directly from DockerHub.
163 | ```
164 | docker build -t npm-filter .
165 | ```
166 | 
167 | You'll also need to edit the `runDocker.sh` script and **remove the username `emarteca` before `npm-filter:latest` in the [`docker run` command](https://github.com/emarteca/npm-filter/blob/master/runDocker.sh#L18)**.
168 | 
169 | #### Sandboxed usage
170 | ```
171 | # general use
172 | ./runDocker.sh [regular command to run npm-filter]
173 | 
174 | # example use
175 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link https://github.com/jprichardson/node-fs-extra
176 | 
177 | # another example use
178 | ./runDocker.sh python3 src/diagnose_npm_package.py --packages body-parser
179 | 
180 | ```
181 | 
182 | #### Docker: where the script needs to read from external files
183 | 
184 | If you're running `npm-filter` with a custom config file, and running some custom scripts / QL queries over the package code, then you'll need to put these files in a specific folder called `docker_configs`.
185 | 
186 | Also, anything referenced in the config file must be in this folder, and the locations relative.
187 | 
188 | For example:
189 | ```
190 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_list_file docker_configs/repo_links.txt --config docker_configs/custom_config.json
191 | 
192 | ```
193 | Here we're reading a list of repos from `repo_links.txt` in the `docker_configs` directory.
194 | There's also a custom config file.
195 | 
196 | Now, if we wanted to run a script over the code, inside `custom_config.json` we'd have:
197 | ```
198 | "meta_info": {
199 | 		"scripts_over_code": [ "myscript.sh" ],
200 | 		"QL_queries": [ "myquery.ql" ]
201 | 	}
202 | 
203 | ```
204 | And, `myscript.sh` and `myquery.ql` need to also be in `docker_configs` directory.
205 | 
206 | Note that running outside of docker you can have different paths to the scripts/queries, but for running in docker they all need to be in the `docker_configs` directory.
207 | 
208 | 
209 | #### Results
210 | Results from running the docker will be output to a `npm_filter_docker_results` directory generated in the directory you run the container in.
211 | 
212 | #### Parallel execution: also in docker
213 | ```
214 | ./runParallelGitReposDocker.sh repo_link_file
215 | ```
216 | Results are in `npm_filter_parallel_docker_results`.
217 | Note that it's execution in parallel in _one_ docker container, and _not_ parallel docker containers.
218 | 
219 | ### Running locally
220 | You can also run this locally on your machine.
221 | To do so, you'll need to have the following installed:
222 | * python3 (running as python), with bs4 and scrapy libraries
223 | * git
224 | * npm
225 | * yarn
226 | * node
227 | 
228 | 
229 | ## Example uses
230 | Examples of common usages:
231 | 
232 | ### Specifying packages as github repos
233 | ```
234 | # running on a single repo
235 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser
236 | 
237 | # running on a single repo with a custom config file
238 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser --config my_config.json
239 | 
240 | # running on a single repo at a specific SHA
241 | python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905
242 | 
243 | # running on one repo from a link, and a list of repos from a file
244 | python src/diagnose_github_repo.py --repo_link https://github.com/expressjs/body-parser --repo_list_file repo_links.txt
245 | ```
246 | 
247 | ### Specifying packages via npm package names
248 | ```
249 | # running on a single package
250 | python src/diagnose_npm_package.py --packages body-parser
251 | 
252 | # running on multiple packages
253 | python src/diagnose_npm_package.py --packages body-parser memfs fs-extra
254 | 
255 | # running on multiple packages with a custom output directory (the parent directory)
256 | python src/diagnose_npm_package.py --packages body-parser memfs --output_dir ..
257 | ```
258 | 
259 | ## Common input generation
260 | 
261 | npm-filter takes as input a list of package names or repositories to run over. The [`input_list_scripts` directory](https://github.com/emarteca/npm-filter/tree/master/input_list_scripts) contains scripts for common input generation strategies.
262 | 
263 | ## Common output processing
264 | 
265 | npm-filter produces JSON results files for each package or repo that is analyzed. The [`output_proc_scripts` directory](https://github.com/emarteca/npm-filter/tree/master/output_proc_scripts) constains scripts for common output processing.
266 | 
267 | ## Running tests
268 | 
269 | Instructions on setting up and running the npm-filter test suite are included [in the `tests` directory](https://github.com/emarteca/npm-filter/blob/master/tests).
270 | 
271 | 


--------------------------------------------------------------------------------
/Tutorial.md:
--------------------------------------------------------------------------------
  1 | ## Tutorial: example walk-through
  2 | This is a simple tutorial giving an example walkthrough of npm-filter usage, one basic and one advanced.
  3 | We assume you have `docker` installed.
  4 | This tutorial will run the latest version of npm-filter [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter).
  5 | 
  6 | ### Setup
  7 | ```
  8 | git clone https://github.com/emarteca/npm-filter.git
  9 | cd npm-filter
 10 | ```
 11 | 
 12 | ### Usage example 1
 13 | Basic usage: analyze GitHub repo at specified commit SHA, with default configuration
 14 | ```
 15 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905
 16 | ```
 17 | 
 18 | Since this is using a specific commit SHA, the output should match exactly.
 19 | The terminal output should be:
 20 | ```
 21 | Diagnosing: memfs --- from: https://github.com/streamich/memfs          
 22 | Cloning package repository                                              
 23 | Checking out specified commit: 863f373185837141504c05ed19f7a253232e0905 
 24 | Running: yarn test                                                      
 25 | Running: yarn test:coverage                                             
 26 | Running: yarn tslint                                                    
 27 | ```
 28 | 
 29 | The output file should be in `npm_filter_docker_results/memfs__results.json`, and the contents of the file should be:
 30 | ```
 31 | {
 32 |     "installation": {
 33 |         "installer_command": "yarn"
 34 |     },
 35 |     "build": {
 36 |         "build_script_list": [
 37 |             "build"
 38 |         ]
 39 |     },
 40 |     "testing": {
 41 |         "test": {
 42 |             "num_passing": 265,
 43 |             "num_failing": 0,
 44 |             "test_infras": [
 45 |                 "jest"
 46 |             ],
 47 |             "timed_out": false
 48 |         },
 49 |         "test:coverage": {
 50 |             "num_passing": 265,
 51 |             "num_failing": 0,
 52 |             "test_infras": [
 53 |                 "jest"
 54 |             ],
 55 |             "timed_out": false
 56 |         },
 57 |         "tslint": {
 58 |             "test_linters": [
 59 |                 "tslint -- linter"
 60 |             ],
 61 |             "RUNS_NEW_USER_TESTS": false,
 62 |             "timed_out": false
 63 |         }
 64 |     },
 65 |     "metadata": {
 66 |         "repo_link": "https://github.com/streamich/memfs",
 67 |         "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905"
 68 |     }
 69 | }
 70 | 
 71 | ```
 72 | 
 73 | 
 74 | ### Usage example 2
 75 | Advanced usage: Analyze the same GitHub repo as above, but with a user-specified configuration file, running a script and a CodeQL query. 
 76 | Also track the package dependencies including the `devDependencies`.
 77 | 
 78 | #### Custom script
 79 | In this example, we will make a simple custom script. 
 80 | This will just list all the files in the directory.
 81 | Open a file `docker_configs/ls.sh`, and give it the contents:
 82 | ```
 83 | #!/bin/bash
 84 | ls
 85 | ```
 86 | **Note** you might need `sudo` to make this file, if you aren't in your `docker` group, since `docker` will own this directory if the container has already been run.
 87 | 
 88 | Make it an executable:
 89 | ```
 90 | chmod +x docker_configs/ls.sh
 91 | ```
 92 | 
 93 | #### CodeQL query
 94 | In this example, we will make a simple CodeQL query to list all the `await` expressions and the files they appear in the package source code.
 95 | Open a file `docker_configs/await.ql` and give it the contents:
 96 | ```
 97 | import javascript
 98 | 
 99 | from AwaitExpr ae
100 | select ae, ae.getFile()
101 | ```
102 | 
103 | #### Custom configuration file
104 | Now, we need a configuration file to tell npm-filter to run this custom script and query.
105 | We only need to include the configuration fields that we're changing; all other settings not specified use their default values.
106 | Open a file `docker_configs/my_config.json` and give it the contents:
107 | ```
108 | {
109 |         "dependencies": {
110 |                 "track_deps": true,
111 |                 "include_dev_deps": true
112 |         },
113 |         "meta_info": {
114 |                 "scripts_over_code": [ "ls.sh"],
115 |                 "QL_queries": [ "await.ql"]
116 |         }
117 | }
118 | 
119 | ```
120 | 
121 | #### Running and output
122 | Now, run npm-filter with the custom settings:
123 | ```
124 | ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 --config dock
125 | er_configs/my_config.json
126 | ```
127 | 
128 | The terminal output should be:
129 | ```
130 | Diagnosing: memfs --- from: https://github.com/streamich/memfs
131 | Cloning package repository
132 | Checking out specified commit: 863f373185837141504c05ed19f7a253232e0905
133 | Getting dependencies
134 | Running: yarn test
135 | Running: yarn test:coverage
136 | Running: yarn tslint
137 | Running script over code: /home/npm-filter/docker_configs/ls.sh
138 | Running QL query: /home/npm-filter/docker_configs/await.ql
139 | ```
140 | 
141 | The output file should be in `npm_filter_docker_results/memfs__results.json` again, and the contents of the file should be (with dependencies truncated for readability):
142 | ```
143 | {                             
144 |     "installation": {
145 |         "installer_command": "yarn"
146 |     },
147 |     "dependencies": {
148 |         "dep_list": [
149 |             "is-descriptor",   
150 |             "is-plain-obj",  
151 |             "util-deprecate",
152 |             "source-map-resolve",
153 |             "duplexer3",
154 |             "parse5",         
155 |             "boxen",
156 |             "protoduck",  
157 |             "promise-inflight",
158 |             "aws-sign2",     
159 |             "is-regex",     
160 |             "conventional-changelog-angular",
161 |             "forever-agent",
162 |             "signal-exit",  
163 |             ...
164 |             "gauge",          
165 |             "extend",
166 |             "lodash.ismatch"       
167 |         ],
168 |         "includes_dev_deps": true
169 |     },               
170 |     "build": {                 
171 |         "build_script_list": [
172 |             "build"          
173 |         ]                        
174 |     },                  
175 |     "testing": {              
176 |         "test": {   
177 |             "num_passing": 265,
178 |             "num_failing": 0,  
179 |             "test_infras": [ 
180 |                 "jest"      
181 |             ],                               
182 |             "timed_out": false
183 |         },                    
184 |         "test:coverage": {
185 |             "num_passing": 265,   
186 |             "num_failing": 0,
187 |             "test_infras": [      
188 |                 "jest"   
189 |             ],                           
190 |             "timed_out": false
191 |         },           
192 |         "tslint": {
193 |             "test_linters": [
194 |                 "tslint -- linter"                
195 |             ],         
196 |             "RUNS_NEW_USER_TESTS": false,
197 |             "timed_out": false
198 |         }                       
199 |     },                    
200 |     "scripts_over_code": {                            
201 |         "/home/npm-filter/docker_configs/ls.sh": {
202 |             "output": "CHANGELOG.md\nCODE_OF_CONDUCT.md\nCONTRIBUTING.md\nLICENSE\nREADME.md\ncodecov.yml\ncoverage\ndemo\ndocs\nlib\nnode_modules\npackage.json\nprettier.config.js\nrenovate.json\nsrc\
203 | ntsconfig.json\ntslint.json\nyarn.lock\n"                 
204 |         }                                                            
205 |     },                         
206 |     "QL_queries": {       
207 |         "/home/npm-filter/docker_configs/await.ql": {}
208 |     },
209 |     "metadata": {
210 |         "repo_link": "https://github.com/streamich/memfs",
211 |         "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905"
212 |     }
213 | }
214 | ```
215 | 
216 | The output from running the CodeQL query should be in `npm_filter_docker_results/memfs__await__results.csv`, and the contents should be (truncated for readability):
217 | ```
218 | "ae","col1"                                                                                
219 | "await p ... ', 'r')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
220 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
221 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
222 | "await f ... ('baz')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
223 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
224 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
225 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
226 | "await p ... ', 'a')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
227 | "await f ... (0o444)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
228 | ...
229 | "await p ... '/foo')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
230 | "await p ... '/bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
231 | "await p ... oo', 5)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
232 | "await p ... '/foo')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
233 | "await p ... arture)","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
234 | "await p ...  'bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
235 | "await p ... ', 'w')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
236 | "await p ...  'bar')","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
237 | "await f ... close()","/home/npm-filter/TESTING_REPOS/memfs/src/__tests__/promises.test.ts"
238 | ```
239 | 
240 | 
241 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # can be building for one specific repo, at a specific commit 
  4 | # (if theyre not specified theyre just empty string, that's fine)
  5 | repo_link=$1
  6 | repo_commit=$2
  7 | 
  8 | # install nvm, so we can then use specific versions of node and npm
  9 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | /usr/bin/bash
 10 | export NVM_DIR="$HOME/.nvm"
 11 | [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"  # this loads nvm
 12 | 
 13 | 
 14 | rm build.sh
 15 | rm runDocker.sh
 16 | if [ -d local_mount ]; then
 17 | 	rm -r local_mount
 18 | fi
 19 | 
 20 | mkdir -p /home/codeql_home
 21 | 
 22 | # cd /home/codeql_home
 23 | # curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip
 24 | # unzip codeql-linux64.zip 
 25 | # # clone stable version
 26 | # git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo
 27 | 
 28 | apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential
 29 | apt-get update
 30 | 
 31 | curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
 32 | echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list
 33 | apt-get update
 34 | 
 35 | curl https://sh.rustup.rs -sSf | sh -s -- -y
 36 | source $HOME/.cargo/env
 37 | 
 38 | pip3 install --upgrade setuptools setuptools_rust wheel
 39 | 
 40 | echo "alias python=python3" >> /root/.bashrc
 41 | echo "alias ipython=ipython3" >> /root/.bashrc
 42 | echo "alias vi=vim" >> /root/.bashrc
 43 | 
 44 | cd /home/npm-filter
 45 | 
 46 | if [ -d TESTING_REPOS ]; then
 47 | 	rm -rf TESTING_REPOS
 48 | fi 
 49 | mkdir TESTING_REPOS
 50 | 
 51 | node_version='v18.16.0' # default to just the latest LTS version
 52 | npm_version='*'
 53 | # if there's a repo_link specified
 54 | if [ ! -z "$repo_link" ]; then
 55 | 	cd TESTING_REPOS
 56 | 	git clone $repo_link
 57 | 	# repo dir will be the only thing in TESTING_REPOS
 58 | 	repo_dir_name=`ls`
 59 | 	if [ ! -z "$repo_commit" ]; then
 60 | 		cd $repo_dir_name
 61 | 		git checkout $repo_commit
 62 | 	fi
 63 | 	cd /home/npm-filter
 64 | 
 65 | 	# this will make the node_version and npm_version variables
 66 | 	# it's ok to use the generic version here -- just using it for the vars
 67 | 	# need these dependencies for my get_rel_project_reqs.js script
 68 | 	nvm install $node_version
 69 | 	nvm use $node_version
 70 | 	nvm install-latest-npm
 71 | 
 72 | 	npm install semver node-fetch
 73 | 
 74 | 	# script to set the env variables for node_version etc
 75 | 	echo "#!/bin/bash" > req_vars.sh
 76 | 	node get_rel_project_reqs.js TESTING_REPOS/${repo_dir_name} >> req_vars.sh
 77 | 	chmod 700 req_vars.sh
 78 | 	# source in current shell: so we set the variables in the current shell
 79 | 	. req_vars.sh
 80 | 	rm req_vars.sh
 81 | 
 82 | 	echo $node_version
 83 | 	`$set_req_vars`
 84 | 	rm -r node_modules
 85 | 
 86 | 	if [[ $node_version == "*" ]]; then
 87 | 		node_version=node
 88 | 	fi
 89 | fi
 90 | 
 91 | # set up node and npm, and also add this node/npm config to the bashrc 
 92 | # so that it runs on docker startup too 
 93 | 
 94 | nvm install $node_version
 95 | nvm use $node_version
 96 | 
 97 | if [[ $npm_version == "*" ]]; then
 98 | 	nvm install-latest-npm
 99 | else
100 | 	npm install -g npm@${npm_version}
101 | fi
102 | 
103 | NVM_DIR=/root/.nvm
104 | NODE_VERSION=`node --version`
105 | 
106 | echo "export NODE_VERSION=\"$NODE_VERSION\"" >> /envfile
107 | echo "export NVM_DIR=$NVM_DIR" >> /envfile
108 | echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /envfile
109 | echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /envfile
110 | 
111 | cat /envfile >> /root/.bashrc
112 | 
113 | # permissive
114 | npm config set strict-ssl false
115 | 
116 | # install the dependencies: but use the current version of npm
117 | npm install -g jest mocha tap ava nyc yarn next
118 | 
119 | config_file=configs/build_only_config.json
120 | if [ -f "/home/npm-filter/configs/custom_install_script" ]; then
121 | 	chmod +x /home/npm-filter/configs/custom_install_script
122 | 	config_file=configs/custom_install_only.json
123 | fi
124 | 
125 | if [ ! -z "$repo_link" ]; then 
126 | 	cd /home/npm-filter
127 | 	# do the install and build only (build_only_config.json config file)
128 | 	if [ ! -z "$repo_commit" ]; then
129 |         python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results
130 |     else 
131 |         python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results
132 |     fi
133 | fi
134 | 
135 | 


--------------------------------------------------------------------------------
/configs/QL_output_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"install": {
 3 | 		"timeout": 600
 4 | 	},
 5 | 	"dependencies": {
 6 | 		"track_deps": false,
 7 | 		"include_dev_deps": false
 8 | 	},
 9 | 	"build": {
10 | 		"tracked_build_commands": ["build", "compile", "init"],
11 | 		"timeout": 300
12 | 	},
13 | 	"test": {
14 | 		"track_tests": true,
15 | 		"tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench",
16 | 								  "mocha", "jest", "ava", "tap", "jasmine"],
17 | 		"timeout": 300
18 | 	},
19 | 	"meta_info": {
20 | 		"VERBOSE_MODE": false,
21 | 		"ignored_commands": ["watch", "debug"],
22 | 		"ignored_substrings": ["--watch", "nodemon"],
23 | 		"rm_after_cloning": true
24 | 	},
25 | 	"QL_output": {
26 | 		"QL_cutoff": 5
27 | 	}
28 | }


--------------------------------------------------------------------------------
/configs/README.md:
--------------------------------------------------------------------------------
 1 | # npm-filter configuration file
 2 | The configuration file is a JSON, organized by stages of npm-filter analysis. 
 3 | The stages are as follows:
 4 | * `install`: package installation. Users can specify:
 5 |   * `timeout`: number of millisections after which, if the install is not complete, the process bails and is considered timed out
 6 |   * `do_install`: if false, skip the install stage
 7 | * `dependencies`: package dependency tracking (this is the libraries the current package depends on, both directly and transitively). Users can specify:
 8 |   * `track_deps`: if true, this specifies to compute the package dependencies
 9 |   * `include_dev_deps`: if true, this specifies to include the `devDependencies` in the dependency computation
10 |   * `timeout`: timeout in milliseconds
11 | * `build`: package compile/build stage. Users can specify: 
12 |   * `tracked_build_commands`: a list of build commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the build stage.
13 |   * `timeout`: timeout in milliseconds, per build command
14 |   * `track_build`: if false, skip the build stage
15 | * `test`: package test stage. Users can specify:
16 |   * `track_tests`: if true, then the tool will run this testing diagnostic stage
17 |   * `tracked_test_commands`: a list of test commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the test stage.
18 |   * `timeout`: timeout in milliseconds, per test command
19 |   * `test_verbose_all_output`: an object with two fields to configure the "verbose" test tracking option: here, output and some metrics (runtime, pass/fail, etc) for each test is output to a specified file. Note that currently we only support this option for the `jest` and `mocha` test infras. 
20 |   	* `do_verbose_tracking`: if true, do this verbose test tracking
21 | 	* `verbose_json_output_file`: name of the file to which to save this verbose output
22 | * `meta_info`: any analysis-level configurations. Users can specify:
23 |   * `VERBOSE_MODE`: if true, then the output JSON file will include the full output of all the commands run. Mainly for debugging.
24 |   * `ignored_commands`: commands to ignore: if these are present in the npm script name, then they are not run even if they otherwise fall into a category of commands to run (mainly used to exclude any interactive-mode commands, such as tests with `watch`)
25 |   * `ignored_substrings`: commands to ignore: if these strings are present in the command string itself, then these npm scripts are not run (same as `ignored_commands`, but for the command strings instead of the npm script names)
26 |   * `rm_after_cloning`: if true, delete the package source code after the tool is done running. Strongly recommended if running over a large batch of packages.
27 |   * `scripts_over_code`: list of paths to script files to run over the package source code. Note that these paths are relative to the location of **the config file**.
28 |   * `QL_queries`: list of paths to QL query files to run over the package source code. Like the scripts, these paths are relative to the location of the config file.
29 |   * `custom_setup_scripts`: list of paths to script files to run over the package code after cloning, but before any of the stages of `npm-filter` are actually run. Commonly used to replace the default install stage (i.e., set `do_install` to `false`). Like all the other scripts, these paths are relative to the location of the config file.
30 | 
31 | Users can customize any of the configuration fields, by providing a JSON file with the desired fields modified.
32 | Default values are used for any fields not specified.
33 | 
34 | As a demonstrative example, the default configuration is included below.
35 | ```
36 | {
37 | 	"install": {
38 | 		"timeout": 1000,
39 | 		"do_install": true
40 | 	},
41 | 	"dependencies": {
42 | 		"track_deps": false,
43 | 		"include_dev_deps": false
44 | 	},
45 | 	"build": {
46 | 		"track_build": true,
47 | 		"tracked_build_commands": ["build", "compile", "init"],
48 | 		"timeout": 1000
49 | 	},
50 | 	"test": {
51 | 		"track_tests": true,
52 | 		"test_verbose_all_output": {
53 | 			"do_verbose_tracking": false,
54 | 			"verbose_json_output_file": "verbose_test_report.json"
55 | 		},
56 | 		"tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench",
57 | 								  "mocha", "jest", "ava", "tap", "jasmine"],
58 | 		"timeout": 1000
59 | 	},
60 | 	"meta_info": {
61 | 		"VERBOSE_MODE": false,
62 | 		"ignored_commands": ["watch", "debug"],
63 | 		"ignored_substrings": ["--watch", "nodemon"],
64 | 		"rm_after_cloning": false,
65 | 		"scripts_over_code": [ ],
66 | 		"QL_queries": [ ],
67 | 		"custom_setup_scripts": [ ]
68 | 	}
69 | }
70 | ```
71 | 
72 | ## Infrastructures tracked
73 | npm-filter is configured to track the following infrastructures:
74 | * Testing infrastructures: mocha, jest, jasmine, tap, lab, ava, gulp. \
75 |   Any test commands that run other infrastructures (such as custom node scripts) will still be parsed, but whether or not the correct number of passing/failing tests is determined depends on the shape of the output.
76 | * Linters: eslint, tslint, xx, standard, prettier, gulp lint
77 | * Coverage tools: istanbul, nyc, coveralls, c8
78 | 
79 | If you have another infrastructure you'd like support for, you can send an email with a request, or add it yourself and submit a PR. [This is the relevant code](https://github.com/emarteca/npm-filter/blob/master/src/test_JS_repo_lib.py#L144) that you'd need to extend.
80 | 


--------------------------------------------------------------------------------
/configs/build_only_config.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"test": {
3 | 		"track_tests": false
4 | 	}
5 | }


--------------------------------------------------------------------------------
/configs/custom_install_only.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"test": {
3 | 		"track_tests": false
4 | 	},
5 |     "meta_info": {
6 |         "custom_setup_scripts": [ "custom_install_script" ]
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/configs/default_filter_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"install": {
 3 | 		"timeout": 1000,
 4 | 		"do_install": true
 5 | 	},
 6 | 	"dependencies": {
 7 | 		"track_deps": false,
 8 | 		"include_dev_deps": false
 9 | 	},
10 | 	"build": {
11 | 		"track_build": true,
12 | 		"tracked_build_commands": ["build", "compile", "init"],
13 | 		"timeout": 1000
14 | 	},
15 | 	"test": {
16 | 		"test_command_repeats": 1,
17 | 		"track_tests": true,
18 | 		"test_verbose_all_output": {
19 | 			"do_verbose_tracking": false,
20 | 			"verbose_json_output_file": "verbose_test_report.json"
21 | 		},
22 | 		"tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench",
23 | 								  "mocha", "jest", "ava", "tap", "jasmine"],
24 | 		"timeout": 1000
25 | 	},
26 | 	"meta_info": {
27 | 		"VERBOSE_MODE": false,
28 | 		"ignored_commands": ["watch", "debug"],
29 | 		"ignored_substrings": ["--watch", "nodemon"],
30 | 		"rm_after_cloning": false,
31 | 		"scripts_over_code": [ ],
32 | 		"QL_queries": [ ],
33 | 		"custom_setup_scripts": [ ]
34 | 	}
35 | }


--------------------------------------------------------------------------------
/configs/verbose.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"test": {
3 | 		"test_command_repeats": 1,
4 | 		"test_verbose_all_output": { "do_verbose_tracking": true }
5 | 	}
6 | }
7 | 


--------------------------------------------------------------------------------
/configs/verbose_only.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"install": {
 3 |         "do_install": false
 4 |     },
 5 |     "build": {
 6 |         "track_build": false
 7 |     },
 8 |     "test": {
 9 | 		"test_verbose_all_output": { "do_verbose_tracking": true }
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/get_rel_project_reqs.js:
--------------------------------------------------------------------------------
  1 | // get the build requirements for the project, if they're present
  2 | // these are:
  3 | // - npm version
  4 | // - node version
  5 | // - OS
  6 | //
  7 | // some notes:
  8 | // - devs can specify a range of engines (npm, node) that their project works on.
  9 | //   If a range is specified we just get one version in the valid range
 10 | // - if the project specifically doesn't work on linux, then we're bailing -- this 
 11 | //   only makes linux docker containers
 12 | 
 13 | // also this is in JS instead of python bc the python semver library is garbage
 14 | 
 15 | const semver = require('semver');
 16 | const subproc = require('child_process');
 17 | const fs = require('fs').promises;
 18 | 
 19 | // can specify OS version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#os
 20 | // can specify node/npm version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#engines
 21 | async function get_reqs_from_pkg_json(pkg_json) {
 22 |     let reqs = {}
 23 | 
 24 |     let engines = pkg_json["engines"] || {};
 25 |     // if not specified, "*" any version
 26 |     let npm_req = engines["npm"] || "*"; 
 27 |     let node_req = engines["node"] ||  "*";
 28 | 
 29 |     // if a range is specified, get a version in the valid range
 30 |     let { node_version, npm_version } = await get_versions_in_range(node_req, npm_req);
 31 |     reqs["node_version"] = node_version;
 32 |     reqs["npm_version"] = npm_version;
 33 | 
 34 | 
 35 |     oss = engines["os"] ||  [];
 36 |     // explicit versions and linux is not listed
 37 |     if (oss.length > 0 && oss.indexOf("linux") == -1)
 38 |         reqs["linux"] = false
 39 |     // explicitly excluding linux :'(
 40 |     else if (oss.indexOf("!linux") != -1)
 41 |         reqs["linux"] = false
 42 |     else
 43 |         reqs["linux"] = true
 44 | 
 45 |     return reqs 
 46 | }
 47 | 
 48 | const BANNED_VERSION_SUBSTRINGS = ["beta", "alpha", "pre"]
 49 | 
 50 | // using semver, let's get a version that matches our specs
 51 | async function get_versions_in_range(node_version, npm_version) {
 52 |     let node_npm_version_pairs = [];
 53 |     try {
 54 |         node_npm_version_pairs = await get_node_npm_version_pairs();
 55 |     } catch(e) {
 56 |         console.log("Error getting npm/node pairs -- proceeding blind: " + e);
 57 |     }
 58 |     
 59 |     // normal route: we have the data.
 60 |     // now just need to find a pair that matches
 61 |     if (node_npm_version_pairs.length > 0) {
 62 |         for (const pair of node_npm_version_pairs) {
 63 |             if (is_banned(pair["npm"]) || is_banned(pair["node"])) {
 64 |                 continue;
 65 |             }
 66 |             if (semver.satisfies(pair["npm"], npm_version) && semver.satisfies(pair["node"], node_version)) {
 67 |                 return { "node_version": pair["node"], "npm_version": pair["npm"] }
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 |     // if we get here we didn't return in the if above
 73 |     // we don't have the data: get the list of all node versions from nvm: `nvm ls-remote`
 74 |     // and all npm versions from npm itself: `npm view npm versions`
 75 |     // NOTE: node version takes precedence over the npm version bc it's more commonly specified, 
 76 |     // and because it's more important 
 77 |     if (node_version !== "*" ) {
 78 |         // then we care about the node version
 79 |         subproc.exec('nvm ls-remote', { shell: '/bin/bash'}, (err, stdout, stderr) => {
 80 |             let versions = stdout.split("\n").map(v => v.trim().split(" ")[0]); // strip formatting and any space-delimited labels (LTS, etc)
 81 |             for (vers of versions) {
 82 |                 if (is_banned(vers)) {
 83 |                     continue;
 84 |                 }
 85 |                 if (semver.satisfies(vers, node_version)) {
 86 |                     return { "node_version": vers, "npm_version": "*" }
 87 |                 }
 88 |             }
 89 |         })
 90 |     }
 91 | 
 92 |     // if we get here, then we didn't have the version pair data, and we also didn't care about the node version
 93 |     // so let's get an npm version
 94 |     if (npm_version !== "*") {
 95 |         // then we care about the npm version
 96 |         subproc.exec('npm view npm versions --json', { shell: '/bin/bash'}, (err, stdout, stderr) => {
 97 |             let versions = JSON.parse(stdout);
 98 |             for (vers of versions) {
 99 |                 if (is_banned(vers)) {
100 |                     continue;
101 |                 }
102 |                 if (semver.satisfies(vers, npm_version)) {
103 |                     return { "node_version": "*", "npm_version": vers }
104 |                 }
105 |             }
106 |         })
107 |     }
108 |     
109 |     // no matching pairs: we're flying blind folks
110 |     return { "node_version": "*", "npm_version": "*" }
111 | }
112 | 
113 | // versions of node and the versions of npm they are bundled with
114 | // see: https://stackoverflow.com/questions/51238643/which-versions-of-npm-came-with-which-versions-of-node
115 | // read this file in -- from it we can get all the valid versions of npm and node
116 | // for fetch usage: https://stackoverflow.com/questions/2499567/how-to-make-a-json-call-to-an-url/2499647#2499647
117 | const NODE_NPM_VERSIONS_URL = 'https://nodejs.org/dist/index.json';
118 | async function get_node_npm_version_pairs() {
119 |     let resp = await fetch(NODE_NPM_VERSIONS_URL);
120 |     // look for errors:
121 |     if (!resp.ok) {
122 |         throw new Error("Uh oh: error reaching npm/node version pairs");
123 |     }
124 |     let all_data = await resp.json();
125 |     let node_npm_pairs = []; 
126 |     for (const vers_data of all_data) {
127 |         let node_version = vers_data["version"];
128 |         let npm_version = vers_data["npm"];
129 |         // if both were in the version data
130 |         if (node_version && npm_version)
131 |             node_npm_pairs.push({node: node_version, npm: npm_version})
132 |     }
133 |     return node_npm_pairs;
134 | }  
135 | 
136 | // check if a version is banned 
137 | function is_banned(vers) {
138 |     for (const banned of BANNED_VERSION_SUBSTRINGS) {
139 |         if (vers.indexOf(banned) > -1) {
140 |             return true;
141 |         }
142 |     }
143 |     return false;
144 | }
145 | 
146 | function print_as_bash_vars(reqs) {
147 |     for ( key in reqs) {
148 |         console.log("export " + key + "=" + reqs[key]);
149 |     }
150 | }
151 |    
152 | async function main(proj_dir) {
153 |     let pkg_json = {};
154 |     try {
155 |         pkg_json = JSON.parse(await fs.readFile(proj_dir + "/package.json", 'utf8'));
156 |     } catch(e) {
157 |         console.error("Error, bailing out: " + proj_dir + " invalid directory, could not load package.json");
158 |         process.exit();
159 |     }
160 |     // get the node and npm versions
161 |     let reqs = await get_reqs_from_pkg_json(pkg_json);
162 |     print_as_bash_vars(reqs);
163 | }
164 | 
165 | if (process.argv.length != 3) {
166 |     console.error("Usage: node get_rel_project_req.js path_to_project_dir")
167 |     process.exit()
168 | }
169 | 
170 | let proj_dir = process.argv[2];
171 | main(proj_dir);
172 | 


--------------------------------------------------------------------------------
/input_list_scripts/README.md:
--------------------------------------------------------------------------------
 1 | # Common input generation
 2 | npm-filter takes a list of package names or repositories to run over. This list could come from anywhere, but this directory has scripts to automate some of the most common input generation patterns.
 3 | 
 4 | ## All of a package's direct dependents
 5 | A common analysis target is the set of direct dependents of a package -- this is all of the packages that have the specified package as a dependency. We've included a script to automate the computation of the repository links for the direct dependents.
 6 | ```
 7 | # general case:
 8 | ./get_dep_repos.sh [package_name]
 9 | 
10 | # specific example:
11 | ./get_dep_repos.sh memfs
12 | 
13 | # generates memfs_deps_repos.txt
14 | ```
15 | This generates a file `[package_name]_deps_repos.txt` where each line is a repo link for the direct dependents of the specified package.
16 | 
17 | ### Disclaimer
18 | Note that the dependency computation is done using [the npm package `dependent-packages`](https://www.npmjs.com/package/dependent-packages), which is based on an a static version of the npm registry. Therefore, any dependencies computed with this script will be accurate modulo what was present in the version of the npm registry that `dependent-packages` is using.
19 | 


--------------------------------------------------------------------------------
/input_list_scripts/get_dep_repos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pkg_name=$1
 4 | 
 5 | # script to get repo links for all dependencies of a given package
 6 | node get_package_deps.js --package $pkg_name --output_file temp_repos.out
 7 | 
 8 | echo "Done getting deps: now getting repo links"
 9 | python3 get_package_repo_link.py --package_file temp_repos.out --good_repo_list_mode True > `echo $pkg_name`_deps_repos.txt
10 | rm temp_repos.out
11 | 


--------------------------------------------------------------------------------
/input_list_scripts/get_package_deps.js:
--------------------------------------------------------------------------------
 1 | const {directDependents} = require('dependent-packages');
 2 | const {argv} = require('yargs');
 3 | const fs = require('fs');
 4 | 
 5 | let package_name = argv.package;
 6 | if (!package_name) {
 7 | 	console.log("Usage: node get_package_deps.js --package npm_package_name [--output_file output_file_name]");
 8 | 	process.exit(1);
 9 | }
10 | 
11 | let deps_list = directDependents(package_name);
12 | 
13 | if (!argv.output_file) {
14 | 	console.log(directDependents(package_name));
15 | } else {
16 | 	fs.writeFile( argv.output_file, deps_list.join("\n"), (err)=> {
17 | 		if(err) {
18 | 			console.log("Error printing to: " + argv.output_file);
19 | 			process.exit(1);
20 | 		}
21 | 		console.log("Done getting deps for: " + package_name);
22 | 	});
23 | }
24 | 


--------------------------------------------------------------------------------
/input_list_scripts/get_package_repo_link.py:
--------------------------------------------------------------------------------
 1 | import scrapy
 2 | from scrapy import signals
 3 | from scrapy.crawler import CrawlerProcess
 4 | from bs4 import BeautifulSoup
 5 | import re
 6 | import json
 7 | import logging
 8 | import argparse
 9 | import time
10 | import sys
11 | import os
12 | sys.path.append(os.path.abspath('../src'))
13 | import middlewares
14 | 
15 | logging.getLogger('scrapy').propagate = False
16 | 
17 | class NPMRepoSpider(scrapy.Spider):
18 | 	name = "npm-repos"
19 | 	
20 | 	def __init__(self, packages=None, good_repo_list_mode=None, *args, **kwargs):
21 | 		if packages is not None:
22 | 			self.packages = packages
23 | 		self.start_urls = ['https://www.npmjs.com/package/' + pkg for pkg in self.packages]
24 | 		self.pkg_repolink_pairs = []
25 | 		# dispatcher.connect(self.spider_closed, signals.spider_closed)
26 | 		self.good_repo_list_mode = good_repo_list_mode
27 | 		super(NPMRepoSpider, self).__init__(*args, **kwargs)
28 | 
29 | 	def parse(self, response):
30 | 		cur_pkg = response.url[ len("https://www.npmjs.com/package/"):]
31 | 		# TODO should we handle specific response codes?
32 | 		# successful responses are those in the 200s
33 | 		# source: https://doc.scrapy.org/en/latest/topics/spider-middleware.html#module-scrapy.spidermiddlewares.httperror
34 | 		if response.status > 299 or response.status < 200:
35 | 			self.pkg_repolink_pairs += [(cur_pkg, "ERROR")]
36 | 		else:
37 | 			soup = BeautifulSoup(response.body, 'html.parser')
38 | 			script = soup.find('script', text=re.compile('window\.__context__'))
39 | 			json_text = re.search(r'^\s*window\.__context__\s*=\s*({.*?})\s*$',
40 | 			                      script.string, flags=re.DOTALL | re.MULTILINE).group(1)
41 | 			data = json.loads(json_text)
42 | 			repo_link = ""
43 | 			try:
44 | 				repo_link = data['context']['packument']['repository']
45 | 			except KeyError:
46 | 				repo_link = "ERROR"
47 | 			self.pkg_repolink_pairs += [(cur_pkg, repo_link)]
48 | 	def closed(self, reason):
49 |       # second param is instance of spder about to be closed.
50 | 		if not self.good_repo_list_mode:
51 | 			print(self.pkg_repolink_pairs)
52 | 		else:
53 | 			good_repos = [rp[1] for rp in self.pkg_repolink_pairs if rp[1] != "ERROR" and rp[1] != ""]
54 | 			print("\n".join(good_repos))
55 | 	
56 | process = CrawlerProcess(settings={
57 | 	"FEEDS": {
58 | 		"items.json": {"format": "json"},
59 | 	},
60 | 	"HTTPERROR_ALLOW_ALL": True,
61 | 	"RETRY_HTTP_CODES" : [429],
62 | 	# next couple settings are for beating the npm request rate limiter
63 | 	#"DOWNLOAD_DELAY": 0.75,    # 3/4 second delay
64 | 	"RETRY_TIMES": 6,
65 | 	#"CONCURRENT_REQUESTS_PER_DOMAIN" : 2,
66 |  	"DOWNLOADER_MIDDLEWARES": {
67 |  		"scrapy.downloadermiddlewares.retry.RetryMiddleware": None,
68 |  		"middlewares.TooManyRequestsRetryMiddleware": 543,
69 | 	}
70 | })
71 | 
72 | 
73 | argparser = argparse.ArgumentParser(description="Get repo link for packages")
74 | argparser.add_argument("--packages", metavar="package", type=str, nargs='*', help="a package to get repo link for")
75 | argparser.add_argument("--package_file", metavar="package_file", type=str, nargs='?', help="file with list of packages to get links for")
76 | argparser.add_argument("--good_repo_list_mode", metavar="good_repo_list_mode", type=bool, nargs='?', help="if true, print only the repo links with no errors")
77 | args = argparser.parse_args()
78 | 
79 | packages=[]
80 | if args.packages:
81 | 	packages += args.packages
82 | if args.package_file:
83 | 	with open(args.package_file) as f:
84 | 		packages += f.read().split("\n")
85 | 
86 | process.crawl(NPMRepoSpider, packages=packages, good_repo_list_mode=args.good_repo_list_mode)
87 | process.start() # the script will block here until the crawling is finished
88 | 	
89 | 
90 | 


--------------------------------------------------------------------------------
/input_list_scripts/package.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"dependencies": {
3 | 		"dependent-packages": "^2.2.2",
4 | 		"yargs": "*"
5 | 	}
6 | }
7 | 


--------------------------------------------------------------------------------
/output_proc_scripts/README.md:
--------------------------------------------------------------------------------
 1 | # Common output processing
 2 | 
 3 | npm-filter produces JSON results files for each package or repo that is analyzed. This directory has a python script that does some common output processing: 
 4 | given a directory with results JSON files, this script finds the list of all the analyzed packages/repos for which there were no setup/install errors, and for which there is at least one test command that has >= 1 passing test and no failing tests.
 5 | 
 6 | ## Usage
 7 | 
 8 | The script takes one optional argument: the directory in which to look at results files. If not provided, the current directory is used as a default.
 9 | ```
10 | # general case
11 | python get_json_results.py [output directory to look for results JSON files in]
12 | 
13 | # specific case: look at current directory
14 | python get_json_results.py
15 | 
16 | # specific case: look at another directory (here, the parent directory)
17 | python get_json_results.py ..
18 | ```
19 | 
20 | ### Example output
21 | This script generates a list of all the analyzed packages/repos that successfully ran and for which there is at least one test command that has >= 1 passing test and no failing tests.
22 | This list is printed to the console newline-delimited, the repo/package name paired with the relevant test command.
23 | 
24 | For example, running this script on a directory containing the results of running npm-filter on `body-parser` at SHA `d0a214b3beded8a9cd2dcb51d355f92c9ead81d4
25 | ` as given in the working example will produce the following output:
26 | ```
27 | Following is a list of all projects with commands meeting the criteria, paired with these commands
28 | ('..//body-parser__results.json', ['test'])
29 | ```
30 | This means that the `body-parser` package has a test command `test` that has passing test(s) and no failing tests.
31 | 
32 | ## Customization
33 | This script is hardcoded to exclude packages with setup/install errors, and only report packages with a test command that has >= 1 passing test(s) and no failing tests.
34 | It can easily be modified for different search parameters.
35 | 
36 | ### Exclusion of packages
37 | Exclusion of packages is done via a `JSON_filter` JSON object, hardcoded at the beginning of the script. To exclude packages with particular results, simply add the fields in the results JSON you want to exclude to this object.
38 | For example, if you want to additionally exclude packages that have no build commands, then you would extend the `JSON_filter` variable with the `build` field as follows:
39 | ```
40 | JSON_filter = { 	
41 | 	"setup": { 
42 | 		"repo_cloning_ERROR": True,
43 | 		"pkg_json_ERROR": True
44 | 	},
45 | 	"installation": { 
46 | 		"ERROR": True 
47 | 	},
48 | +	"build": {
49 | +	    "build_script_list": []
50 | +	}
51 | }
52 | ```
53 | 
54 | ### Filtering for criteria other than all-passing test commands
55 | The script is hardcoded to only report non-excluded packages for which there is a test command with >= 1 passing test and no failing tests.
56 | To modify this criteria, either modify the `get_passing_test_commands` function or write a new function that reports the criteria you want and call that where `get_passing_test_commands` is called currently.
57 | 
58 | For example, to get packages that run a linter, you could add the function:
59 | ```
60 | def get_successful_linter_commands(json_check): 
61 | 	test_dict = json_check.get("testing", {})
62 | 	passing_commands = []
63 | 	for test_com, test_out in test_dict.items():
64 | 		if test_out.get("timed_out", False) or test_out.get("ERROR", False): 
65 | 			continue
66 | 		if test_out.get("test_linters", []) == []:
67 | 			continue
68 | 		passing_commands += [test_com]
69 | 	return( passing_commands)
70 | ```
71 | And then, instead of calling `get_passing_test_commands`, call `get_successful_linter_commands`.
72 | In this case, running the script over the directory with `body-parser__results.json` would yield the output:
73 | ```
74 | Following is a list of all projects with commands meeting the criteria, paired with these commands
75 | ('..//body-parser__results.json', ['lint'])
76 | ```
77 | 
78 | 


--------------------------------------------------------------------------------
/output_proc_scripts/count_tests_run.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | # simple, unrefined script for parsing npm-filter output files
 6 | # for the current directory, get all files named *__results.json
 7 | # (wildcard represents the project name)
 8 | # prints out (Number of tests passing),(Number of tests failing)
 9 | 
10 | 
11 | # JSON specifying possible errors
12 | # that should be avoided if an input JSON will pass the filter check
13 | 
14 | JSON_filter = { 	
15 | 	"setup": { 
16 | 		"repo_cloning_ERROR": True,
17 | 		"pkg_json_ERROR": True
18 | 	},
19 | 	"installation": { 
20 | 		"ERROR": True 
21 | 	},
22 | }
23 | 
24 | # input to the function is a JSON of undesirable elements
25 | # return true if the JSON to be filtered has any of the filter elements
26 | def json_contains_issues(json_check, json_filter):
27 | 	contains_issues = False
28 | 	for filter_key, filter_val in json_filter.items():
29 | 		# recursive case
30 | 		if isinstance( filter_val, dict):
31 | 			contains_issues = contains_issues or json_contains_issues( json_check.get(filter_key, {}), filter_val)
32 | 		# base case
33 | 		contains_issues = contains_issues or (json_check.get(filter_key, {}) == filter_val)
34 | 	return( contains_issues)
35 | 
36 | # by default, there needs to be at least one passing test
37 | def get_num_tests_run(json_check): 
38 | 	test_dict = json_check.get("testing", {})
39 | 	num_passing = 0
40 | 	num_failing = 0
41 | 	passing_commands = []
42 | 	for test_com, test_out in test_dict.items():
43 | 		if test_out.get("timed_out", False) or (not test_out.get("RUNS_NEW_USER_TESTS", True)) or test_out.get("ERROR", False): 
44 | 			continue
45 | 		num_passing += test_out.get("num_passing")
46 | 		num_failing += test_out.get("num_failing")
47 | 	return [num_passing, num_failing]
48 | 
49 | output_proc_dir = "."
50 | if len(sys.argv) == 2:
51 | 	output_proc_dir = sys.argv[1]
52 | else:
53 | 	print("No output directory specified: looking at current directory")
54 | 
55 | # get all relevant files
56 | all_files = [ output_proc_dir + "/" + fname for fname in os.listdir(output_proc_dir) if fname.find("__results.json") != -1]
57 | passing_files = []
58 | total_passing_tests = 0
59 | total_failing_tests = 0
60 | for file in all_files:
61 | 	with open(file) as f:
62 | 		json_check = json.load(f)
63 | 	proj_name = file[ : file.index("__results.json")]
64 | 	if json_contains_issues( json_check, JSON_filter):
65 | 		# print(proj_name + " has setup/install errors")
66 | 		continue
67 | 	num_tests = get_num_tests_run( json_check)
68 | 	total_passing_tests += num_tests[0]
69 | 	total_failing_tests += num_tests[1]
70 | 
71 | print(f"{total_passing_tests},{total_failing_tests}")


--------------------------------------------------------------------------------
/output_proc_scripts/get_json_results.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | 
 5 | # simple, unrefined script for parsing npm-filter output files
 6 | # for the current directory, get all files named *__results.json
 7 | # (wildcard represents the project name)
 8 | # from this list, filter for projects with specific characteristics
 9 | 
10 | 
11 | # JSON specifying possible errors
12 | # that should be avoided if an input JSON will pass the filter check
13 | 
14 | JSON_filter = { 	
15 | 	"setup": { 
16 | 		"repo_cloning_ERROR": True,
17 | 		"pkg_json_ERROR": True
18 | 	},
19 | 	"installation": { 
20 | 		"ERROR": True 
21 | 	},
22 | }
23 | 
24 | # input to the function is a JSON of undesirable elements
25 | # return true if the JSON to be filtered has any of the filter elements
26 | def json_contains_issues(json_check, json_filter):
27 | 	contains_issues = False
28 | 	for filter_key, filter_val in json_filter.items():
29 | 		# recursive case
30 | 		if isinstance( filter_val, dict):
31 | 			contains_issues = contains_issues or json_contains_issues( json_check.get(filter_key, {}), filter_val)
32 | 		# base case
33 | 		contains_issues = contains_issues or (json_check.get(filter_key, {}) == filter_val)
34 | 	return( contains_issues)
35 | 
36 | # by default, there needs to be at least one passing test
37 | def get_passing_test_commands(json_check, min_passing=1): 
38 | 	test_dict = json_check.get("testing", {})
39 | 	passing_commands = []
40 | 	for test_com, test_out in test_dict.items():
41 | 		if test_out.get("timed_out", False) or (not test_out.get("RUNS_NEW_USER_TESTS", True)) or test_out.get("ERROR", False): 
42 | 			continue
43 | 		if test_out.get("num_failing", 0) > 0:
44 | 			continue
45 | 		if test_out.get("num_passing", 0) < min_passing:
46 | 			continue
47 | 		passing_commands += [test_com]
48 | 	return( passing_commands)
49 | 
50 | output_proc_dir = "."
51 | if len(sys.argv) == 2:
52 | 	output_proc_dir = sys.argv[1]
53 | else:
54 | 	print("No output directory specified: looking at current directory")
55 | 
56 | # get all relevant files
57 | all_files = [ output_proc_dir + "/" + fname for fname in os.listdir(output_proc_dir) if fname.find("__results.json") != -1]
58 | passing_files = []
59 | for file in all_files:
60 | 	with open(file) as f:
61 | 		json_check = json.load(f)
62 | 	proj_name = file[ : file.index("__results.json")]
63 | 	if json_contains_issues( json_check, JSON_filter):
64 | 		# print(proj_name + " has setup/install errors")
65 | 		continue
66 | 	passing_commands = get_passing_test_commands( json_check)
67 | 	if len(passing_commands) > 0:
68 | 		passing_files += [(file, passing_commands)]
69 | print("\nFollowing is a list of all projects with commands meeting the criteria, paired with these commands")
70 | print("\n".join([str(pf) for pf in passing_files]))


--------------------------------------------------------------------------------
/qlpack.yml:
--------------------------------------------------------------------------------
1 | name: npm-filter-queries
2 | version: 0.0.0
3 | libraryPathDependencies: codeql-javascript
4 | 


--------------------------------------------------------------------------------
/runDocker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | npm_filter_command=$@
 4 | 
 5 | if [ ! -d local_mount ]; then
 6 | 	mkdir local_mount
 7 | fi
 8 | 
 9 | # create the dir ourselves so we have write privilege to it
10 | if [ ! -d npm_filter_docker_results ]; then
11 | 	mkdir npm_filter_docker_results
12 | fi
13 | 
14 | if [ -v $DOCKER_IMAGE ]; then
15 | 	DOCKER_IMAGE=emarteca/npm-filter:latest
16 | fi
17 | 
18 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \
19 | 		   --volume `pwd`/npm_filter_docker_results:/home/npm-filter/results \
20 | 		   --volume `pwd`/docker_configs:/home/npm-filter/docker_configs\
21 | 		   -w /home/npm-filter \
22 | 		   $DOCKER_IMAGE \
23 | 		   bash -c "source /envfile; PATH=/home/codeql_home/codeql:\$PATH; $npm_filter_command --output_dir results"
24 | rm -r local_mount


--------------------------------------------------------------------------------
/runParallelGitRepos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | repo_link_file=$1
 4 | config_file=$2
 5 | output_dir=$3
 6 | 
 7 | if [ ! -f $config_file ]; then 
 8 | 	config_file="configs/QL_output_config.json"
 9 | fi
10 | 
11 | if [ ! -d $output_dir ]; then
12 | 	output_dir=`pwd`
13 | fi
14 | 
15 | # you'll probably want to bg this
16 | nohup parallel -j 20 -a $repo_link_file --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config $config_file --output_dir $output_dir
17 | 


--------------------------------------------------------------------------------
/runParallelGitReposDocker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | repo_link_file=$1
 4 | config_file=$2
 5 | 
 6 | if [ ! -f $config_file ] || [ ! $config_file ]; then 
 7 | 	config_file="configs/QL_output_config.json"
 8 | fi
 9 | 
10 | if [ ! -d local_mount ]; then
11 |         mkdir local_mount
12 | fi
13 | 
14 | # copy config files to a shared volume with the container
15 | if [ ! -d npm_filter_parallel_docker_results ]; then
16 | 	mkdir npm_filter_parallel_docker_results
17 | fi
18 | cp $repo_link_file npm_filter_parallel_docker_results/repo_links.txt
19 | cp $config_file npm_filter_parallel_docker_results/config.json
20 | 
21 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \
22 |                    --volume `pwd`/npm_filter_parallel_docker_results:/home/npm-filter/results \
23 |                    -w /home/npm-filter \
24 |                    -it emarteca/npm-filter:latest \
25 |                    bash -c "nohup parallel -j 20 -a results/repo_links.txt --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config results/config.json --output_dir results"
26 | 
27 | rm -r local_mount
28 | rm npm_filter_parallel_docker_results/repo_links.txt npm_filter_parallel_docker_results/config.json
29 | 
30 | 


--------------------------------------------------------------------------------
/run_verbose_for_repo_and_config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # run npm-filter on a specified repo with verbose, at an optional commit
 4 | # output to the "results" directory
 5 | 
 6 | # usage: ./run_for_repo_and_config.sh repo_link repo_commit
 7 | 
 8 | repo_link=$1
 9 | config_file=configs/verbose_only.json
10 | repo_commit=$2
11 | 
12 | if [ ! -z "$repo_link" ] && [ ! -z "$config_file" ]; then
13 |     if [ ! -z "$repo_commit" ]; then
14 |         python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results
15 |     else 
16 |         python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results
17 |     fi
18 | fi


--------------------------------------------------------------------------------
/src/TestInfo.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import output_parsing.test_output_proc as TestOutputProc
  3 | 
  4 | class TestInfo:
  5 | 	OUTPUT_CHECKERS = {
  6 | 		"mocha": 
  7 | 			{
  8 | 				"output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '.*',
  9 | 				"passing": ("passing", -1),
 10 | 				"failing": ("failing", -1)
 11 | 			},
 12 | 		"jest": 
 13 | 			{
 14 | 				"output_regex_fct" : lambda condition: r'Tests:.*\d+ ' + condition,
 15 | 				"passing": ("passed", -1),
 16 | 				"failing": ("failed", -1)
 17 | 			},
 18 | 		"tap": {
 19 | 				"output_regex_fct" : lambda condition: r'# ' + condition + '.*\d+',
 20 | 				"passing": ("pass", 1),
 21 | 				"failing": ("fail", 1)
 22 | 			},
 23 | 		"tap_raw": {
 24 | 				"output_regex_fct" : lambda condition: r'' + condition + ' \d+ - (?!.*time=).*$',
 25 | 				"passing": (r'^.*(?!not )ok', None), # this "passing" is a regex: count "ok" but not "not ok"
 26 | 				"failing":  (r'^.*not ok', None)
 27 | 			},
 28 | 		"ava": 
 29 | 		{
 30 | 			"output_regex_fct": lambda condition: r'.*\d+ tests? ' + condition,
 31 | 			"passing": ("passed", -2), 
 32 | 			"failing": ("failed", -2)
 33 | 		},
 34 | 		"ava_2": 
 35 | 			{
 36 | 				"output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '$',
 37 | 				"passing": ("passed", -1),
 38 | 				"failing": ("failed", -1)
 39 | 			},
 40 | 	}
 41 | 	# extra args, their position in the arg list, and any post-processing required
 42 |     # post-processing is a function that takes 2 arguments: input file and output file
 43 | 	# CAUTION: DO NOT PUT ANY MORE ARGS AFTER PLACEHOLDER_OUTPUT_FILE_NAME. THE CODE THAT
 44 | 	# PARSES THE OUTPUT RELIES ON THIS BEING THE *LAST* ARGUMENT
 45 | 	VERBOSE_TESTS_EXTRA_ARGS = {
 46 | 		"jest": {
 47 | 			"args": " --verbose --json -i --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$",
 48 | 			"position":  -1,
 49 | 			"post_processing": TestOutputProc.parse_jest_json_to_csv
 50 | 		},
 51 | 		"mocha": {
 52 | 			"args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$",
 53 | 			"position": -1,
 54 | 			"post_processing": TestOutputProc.parse_mocha_json_to_csv
 55 | 		}
 56 | 	}
 57 | 	TRACKED_INFRAS = {
 58 | 		"mocha": {
 59 | 			"name": "mocha", 
 60 | 			"output_checkers": [ "mocha", "tap" ],
 61 | 			"verbose_tests_extra_args": [ "mocha" ]
 62 | 		},
 63 | 		"jest": {
 64 | 			"name": "jest", 
 65 | 			"output_checkers": [ "jest" ],
 66 | 			"verbose_tests_extra_args": [ "jest" ]
 67 | 		},
 68 | 		"jasmine": {
 69 | 			"name": "jasmine", 
 70 | 			"output_checkers": [ "mocha" ]
 71 | 		},
 72 | 		"tap": {
 73 | 			"name": "tap", 
 74 | 			"output_checkers": [ "tap", "tap_raw" ]
 75 | 		},
 76 | 		"lab": {
 77 | 			"name": "lab", 
 78 | 			"output_checkers": []
 79 | 		},
 80 | 		"ava": {
 81 | 			"name": "ava", 
 82 | 			"output_checkers": [ "ava", "ava_2" ]
 83 | 		},
 84 | 		"gulp": {
 85 | 			"name": "gulp", 
 86 | 			"output_checkers": [ "mocha" ]
 87 | 		},
 88 | 	}
 89 | 	TRACKED_COVERAGE = {
 90 | 		"istanbul": "istanbul -- coverage testing",
 91 | 		"nyc": "nyc -- coverage testing",
 92 | 		"coveralls": "coveralls -- coverage testing",
 93 | 		"c8": "c8 -- coverage testing"
 94 | 	}
 95 | 	TRACKED_LINTERS = {
 96 | 		"eslint": "eslint -- linter",
 97 | 		"tslint": "tslint -- linter",
 98 | 		"xx": "xx -- linter",
 99 | 		"standard": "standard -- linter",
100 | 		"prettier": "prettier -- linter",
101 | 		"gulp lint": "gulp lint -- linter"
102 | 	}
103 | 
104 | 	TRACKED_RUNNERS = [ "node", "babel-node", "grunt", "lerna" ]
105 | 
106 | 	def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE):
107 | 		self.success = success
108 | 		self.error_stream = error_stream
109 | 		self.output_stream = output_stream
110 | 		self.manager = manager
111 | 		# start all other fields as None
112 | 		self.test_infras = None
113 | 		self.test_covs = None
114 | 		self.test_lints = None
115 | 		self.nested_test_commands = None
116 | 		self.num_passing = None
117 | 		self.num_failing = None
118 | 		self.timed_out = False
119 | 		self.VERBOSE_MODE = VERBOSE_MODE
120 | 		self.test_verbosity_output = None
121 | 		self.startTime = 0
122 | 		self.endTime = 0
123 | 
124 | 	def set_test_command( self, test_command):
125 | 		self.test_command = test_command
126 | 
127 | 	def set_test_verbosity_output( self, verbose_output):
128 | 		self.test_verbosity_output = verbose_output
129 | 
130 | 	def get_test_infras_list( test_command, manager):
131 | 		test_infras = []
132 | 		test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, test_command, manager) ]
133 | 		test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, test_command, manager) ]
134 | 		return( test_infras)
135 | 
136 | 	def compute_test_infras( self):
137 | 		self.test_infras = []
138 | 		self.test_covs = []
139 | 		self.test_lints = []
140 | 		self.nested_test_commands = []
141 | 		if self.test_command:
142 | 			self.test_infras += TestInfo.get_test_infras_list(self.test_command, self.manager)
143 | 			self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ]
144 | 			self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ]
145 | 		self.test_infras = list(set(self.test_infras))
146 | 		self.test_covs = list(set(self.test_covs))
147 | 		self.test_lints = list(set(self.test_lints))
148 | 		# TODO: maybe we can also figure it out from the output stream
149 | 
150 | 	def compute_nested_test_commands( self, test_commands):
151 | 		# one might think that we should only check the package's own manager
152 | 		# however, it's common to mix and match (esp. to run commands with "npm run" even if the package manager is yarn)
153 | 		self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "npm run " + tc, self.test_command, self.manager) ]
154 | 		self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "yarn " + tc, self.test_command, self.manager) ]
155 | 
156 | 	def compute_test_stats( self):
157 | 		if not self.test_infras or self.test_infras == []:
158 | 			return
159 | 		test_output = self.output_stream.decode('utf-8') + self.error_stream.decode('utf-8')
160 | 		ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
161 | 		test_output = ansi_escape.sub('', test_output)
162 | 		self.num_passing = 0
163 | 		self.num_failing = 0
164 | 		self.timed_out = (self.error_stream.decode('utf-8') == "TIMEOUT ERROR")
165 | 		for infra in self.test_infras:
166 | 			output_checker_names = TestInfo.TRACKED_INFRAS.get(infra, {}).get("output_checkers", [])
167 | 			if infra in TestInfo.TRACKED_RUNNERS and output_checker_names == []:
168 | 				output_checker_names = self.OUTPUT_CHECKERS.keys() # all the checkers
169 | 			for checker_name in output_checker_names:
170 | 				div_factor = 2 if checker_name == "ava_2" else 1
171 | 				checker = self.OUTPUT_CHECKERS[ checker_name]
172 | 				self.num_passing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["passing"][0], checker["passing"][1]) / div_factor)
173 | 				self.num_failing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["failing"][0], checker["failing"][1]) / div_factor)
174 | 
175 | 	def get_json_rep( self):
176 | 		json_rep = {}
177 | 		if self.VERBOSE_MODE:
178 | 			json_rep["test_debug"] = ""
179 | 		if not self.success:
180 | 			json_rep["ERROR"] = True
181 | 			if self.VERBOSE_MODE:
182 | 				json_rep["test_debug"] += "\nError output: " + self.error_stream.decode('utf-8')
183 | 		if self.num_passing is not None and self.num_failing is not None:
184 | 			json_rep["num_passing"] = self.num_passing
185 | 			json_rep["num_failing"] = self.num_failing
186 | 		if self.VERBOSE_MODE:
187 | 			json_rep["test_debug"] += "\nOutput stream: " + self.output_stream.decode('utf-8')
188 | 		if self.test_infras and self.test_infras != []:
189 | 			json_rep["test_infras"] = [TestInfo.TRACKED_INFRAS.get(infra, {}).get("name", "Custom Testing: " + infra) for infra in self.test_infras]
190 | 		if self.test_covs and self.test_covs != []:
191 | 			json_rep["test_coverage_tools"] = self.test_covs
192 | 		if self.test_lints and self.test_lints != []:
193 | 			json_rep["test_linters"] = self.test_lints
194 | 		if self.nested_test_commands and self.nested_test_commands != []:
195 | 			json_rep["nested_test_commands"] = self.nested_test_commands
196 | 		if "test_infras" not in json_rep:
197 | 			json_rep["RUNS_NEW_USER_TESTS"] = False
198 | 		if self.test_verbosity_output:
199 | 			json_rep["test_verbosity_output"] = self.test_verbosity_output
200 | 		json_rep["timed_out"] = self.timed_out
201 | 		json_rep["start_time"] = self.start_time
202 | 		json_rep["end_time"] = self.end_time
203 | 		return( json_rep)
204 | 
205 | 	def __str__(self):
206 | 		to_ret = ""
207 | 		if not self.success:
208 | 			to_ret += "ERROR"
209 | 			if self.VERBOSE_MODE:
210 | 				to_ret += "\nError output: " + self.error_stream.decode('utf-8')
211 | 		else:
212 | 			to_ret += "SUCCESS"
213 | 		if self.num_passing is not None and self.num_failing is not None:
214 | 			to_ret += "\nPassing tests: " + str(self.num_passing) + "\nFailing tests: " + str(self.num_failing)
215 | 		if self.VERBOSE_MODE:
216 | 			to_ret += "\nOutput stream: " + self.output_stream.decode('utf-8')
217 | 		if self.test_infras and self.test_infras != []:
218 | 			to_ret += "\nTest infras: " + str([TestInfo.TRACKED_INFRAS[infra]["name"] for infra in self.test_infras])
219 | 		if self.test_covs and self.test_covs != []:
220 | 			to_ret += "\nCoverage testing: " + str(self.test_covs)
221 | 		if self.test_lints and self.test_lints != []:
222 | 			to_ret += "\nLinter: " + str(self.test_lints)
223 | 		if self.nested_test_commands and self.nested_test_commands != []:
224 | 			to_ret += "\nNested test commands: " + str(self.nested_test_commands)
225 | 		to_ret += "\nTimed out: " + str(self.timed_out)
226 | 		return( to_ret)
227 | 
228 | def called_in_command( str_comm, command, manager):
229 | 	# command ends with command terminator (this list includes \0 end-of-string, 
230 | 	# but this is not available to check in Python so we use endswith)
231 | 	post_command_chars = [ "" ] if command.endswith(str_comm) else [ " ", "\t", ";"]
232 | 	for pcc in post_command_chars:
233 | 		check_comm = str_comm + pcc
234 | 		if command.find( check_comm) == 0:
235 | 			return( True)
236 | 		if command.find( "&&" + check_comm) > -1 or command.find( "&& " + check_comm) > -1:
237 | 			return( True)
238 | 		if command.find( "cross-env NODE_ENV=test " + check_comm) > -1 or command.find( "cross-env NODE_ENV=production " + check_comm) > -1:
239 | 			return( True)
240 | 		if command.find( "cross-env CI=true " + check_comm) > -1:
241 | 			return( True)
242 | 		if command.find( "cross-env TZ=utc " + check_comm) > -1:
243 | 			return( True)
244 | 		if command.find( "opener " + check_comm) > -1:
245 | 			return( True)
246 | 		if command.find( "gulp " + check_comm) > -1:
247 | 			return( True)
248 | 		if command.find( "nyc " + check_comm) > -1:
249 | 			return( True)
250 | 	return( False)
251 | 
252 | def test_cond_count( test_output, regex_fct, condition, offset):
253 | 	ptrn = re.compile( regex_fct(condition), re.MULTILINE)
254 | 	results = ptrn.findall( test_output)
255 | 	if offset is None:
256 | 		return( len( results)) # just count the number of hits, each hit is an individual test (example: tap "ok" vs "not ok")
257 | 	num_cond = 0
258 | 	for r in results:
259 | 		temp = r.split()
260 | 		try:
261 | 			num_cond += int( temp[temp.index(condition) + offset])  
262 | 		except ValueError:
263 | 			num_cond += 0
264 | 	return( num_cond)


--------------------------------------------------------------------------------
/src/diagnose_github_repo.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | import subprocess
  4 | import os
  5 | import argparse
  6 | from test_JS_repo_lib import *
  7 | import get_repo_links as GetLinks
  8 | 
  9 | # expecting links to look like :
 10 | # https://github.com/user/reponame [optional commit SHA]
 11 | def get_name_from_link(link): 
 12 | 	# split first on whitespace and take the first word
 13 | 	# to make sure we ignore the optional commit SHA
 14 | 	return( link.split()[0].split("/")[-1])
 15 | 
 16 | def get_repo_and_SHA_from_repo_link(repo):
 17 | 	split_res = repo.split()
 18 | 	commit_SHA = None
 19 | 	if len(split_res) > 1:
 20 | 		commit_SHA = split_res[1]
 21 | 	return(split_res[0], commit_SHA)
 22 | 
 23 | # same format as getting the name from the repo link: we want the name of the dir, 
 24 | # so after the last slash (and if there's no slash the whole name is returned)
 25 | def get_name_from_path(repo_local_path):
 26 | 	return( repo_local_path.split("/")[-1])
 27 | 
 28 | 
 29 | class RepoWalker():
 30 | 	name = "npm-pkgs"
 31 | 	VERBOSE_MODE = False
 32 | 	RM_AFTER_CLONING = False
 33 | 	SCRIPTS_OVER_CODE = []
 34 | 	CUSTOM_SETUP_SCRIPTS = []
 35 | 	CUSTOM_LOCK_FILES = []
 36 | 	QL_QUERIES = []
 37 | 
 38 | 	DO_INSTALL = True
 39 | 	INCLUDE_DEV_DEPS = False
 40 | 	COMPUTE_DEP_LISTS = False
 41 | 	TRACK_BUILD = True
 42 | 	TRACK_TESTS = True
 43 | 	TEST_VERBOSE_ALL_OUTPUT = False
 44 | 	TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json"
 45 | 	TEST_COMMAND_REPEATS = 1
 46 | 
 47 | 	TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 
 48 | 							 "mocha", "jest", "ava", "tap", "jasmine"]
 49 | 	IGNORED_COMMANDS = ["watch", "debug"]
 50 | 	IGNORED_SUBSTRINGS = ["--watch", "nodemon"]
 51 | 	TRACKED_BUILD_COMMANDS = ["build", "compile", "init"]
 52 | 
 53 | 	# timeouts for stages, in seconds
 54 | 	INSTALL_TIMEOUT = 10800 # 3 hours
 55 | 	# note: these are timeouts per *script* in the stage of the process
 56 | 	BUILD_TIMEOUT = 10800 # 3 hours
 57 | 	TEST_TIMEOUT = 10800 # 3 hours
 58 | 
 59 | 	QL_CUTOFF = 5 # ignore if there are < 5 results
 60 | 	
 61 | 	def __init__(self, config_file="", output_dir = "."):
 62 | 		self.set_up_config( config_file)
 63 | 		self.output_dir = os.path.abspath(output_dir)
 64 | 
 65 | 	def set_repo_links(self, repo_links):
 66 | 		self.repo_links = repo_links
 67 | 
 68 | 	def set_local_repo_path(self, repo_local_dir):
 69 | 		self.repo_local_dir = repo_local_dir
 70 | 
 71 | 	def set_up_config( self, config_file):
 72 | 		if not os.path.exists(config_file):
 73 | 			if config_file != "":
 74 | 				print("Could not find config file: " + config_file + " --- using defaults")
 75 | 			return
 76 | 
 77 | 		config_json = {}
 78 | 		try:
 79 | 			with open( config_file, 'r') as f:
 80 | 				config_json = json.loads(f.read())
 81 | 		except:
 82 | 			print("Error reading config file: " + config_file + " --- using defaults")
 83 | 
 84 | 		# now, read the relevant config info from the file
 85 | 		cf_dict = config_json.get( "meta_info", {})
 86 | 		self.VERBOSE_MODE = cf_dict.get("VERBOSE_MODE", self.VERBOSE_MODE)
 87 | 		self.IGNORED_COMMANDS = cf_dict.get( "ignored_commands", self.IGNORED_COMMANDS)
 88 | 		self.IGNORED_SUBSTRINGS = cf_dict.get( "ignored_substrings", self.IGNORED_SUBSTRINGS)
 89 | 		self.RM_AFTER_CLONING = cf_dict.get( "rm_after_cloning", self.RM_AFTER_CLONING)
 90 | 		# scripts and query file location is relative to the config file
 91 | 		self.SCRIPTS_OVER_CODE = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 92 | 											for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)]
 93 | 		self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 94 | 											for p in cf_dict.get( "QL_queries", self.QL_QUERIES)]
 95 | 		self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 96 | 											for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)]
 97 | 
 98 | 		cf_dict = config_json.get( "dependencies", {})
 99 | 		self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS)
100 | 		self.COMPUTE_DEP_LISTS = cf_dict.get("track_deps", self.COMPUTE_DEP_LISTS)
101 | 
102 | 		cf_dict = config_json.get( "install", {})
103 | 		self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL)
104 | 		self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT)
105 | 		self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
106 | 											for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)]
107 | 
108 | 		cf_dict = config_json.get( "build", {})
109 | 		self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD)
110 | 		self.BUILD_TIMEOUT = cf_dict.get("timeout", self.BUILD_TIMEOUT)
111 | 		self.TRACKED_BUILD_COMMANDS = cf_dict.get("tracked_build_commands", self.TRACKED_BUILD_COMMANDS)
112 | 
113 | 		cf_dict = config_json.get("test", {})
114 | 		self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT)
115 | 		self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS)
116 | 		self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS)
117 | 		self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS)
118 | 		test_verbose_config = cf_dict.get("test_verbose_all_output", {})
119 | 		self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT)
120 | 		self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON)
121 | 
122 | 		cf_dict = config_json.get("QL_output", {})
123 | 		self.QL_CUTOFF = cf_dict.get("QL_cutoff", self.QL_CUTOFF)
124 | 
125 | 	def iterate_over_repos( self):
126 | 		for repo in self.repo_links:
127 | 			[repo_link, commit_SHA] = get_repo_and_SHA_from_repo_link(repo)
128 | 			package_name = get_name_from_link( repo_link)
129 | 			json_results = diagnose_package( repo_link, self, commit_SHA)
130 | 			json_results["metadata"] = {}
131 | 			json_results["metadata"]["repo_link"] = repo_link
132 | 			# if not None
133 | 			if commit_SHA:
134 | 				json_results["metadata"]["repo_commit_SHA"] = commit_SHA
135 | 			with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f:
136 | 				json.dump( json_results, f, indent=4)
137 | 		if self.repo_local_dir:
138 | 			package_name = get_name_from_path( self.repo_local_dir)
139 | 			json_results = diagnose_local_dir(self.repo_local_dir, self)
140 | 			json_results["metadata"] = {}
141 | 			json_results["metadata"]["repo_local_dir"] = repo_local_dir
142 | 			with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f:
143 | 				json.dump( json_results, f, indent=4)
144 | 
145 | 
146 | argparser = argparse.ArgumentParser(description="Diagnose github repos, from a variety of sources")
147 | argparser.add_argument("--repo_list_file", metavar="rlistfile", type=str, nargs='?', help="file with list of github repo links")
148 | argparser.add_argument("--repo_link", metavar="rlink", type=str, nargs='?', help="single repo link")
149 | argparser.add_argument("--repo_local_dir", metavar="rlocallink", type=str, nargs='?', help="path to local directory that has the repo code")
150 | argparser.add_argument("--repo_link_and_SHA", metavar="rlink_and_SHA", type=str, nargs='*', help="single repo link, with optional commit SHA")
151 | argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file")
152 | argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to")
153 | args = argparser.parse_args()
154 | 
155 | config = args.config if args.config else ""
156 | output_dir = args.output_dir if args.output_dir else "."
157 | 
158 | walker = RepoWalker(config_file=config, output_dir=output_dir)
159 | 
160 | repo_local_dir = None
161 | if args.repo_local_dir:
162 | 	repo_local_dir = os.path.abspath(args.repo_local_dir)
163 | 
164 | repo_links = []
165 | if args.repo_list_file:
166 | 	try:
167 | 		repo_links += GetLinks.from_list_of_repos(args.repo_list_file)
168 | 	except:
169 | 		print("Error reading list of repos file: " + args.repo_list_file + " --- no repos to try")
170 | 		repo_links += []
171 | 
172 | 
173 | if args.repo_link:
174 | 	repo_links += [args.repo_link]
175 | 
176 | if args.repo_link_and_SHA:
177 | 	# repo_link_and_SHA can have an optional commit SHA: if so it's space delimited
178 | 	# so we join all the repo_link args into a space-delimited string
179 | 	repo_links += [' '.join(args.repo_link_and_SHA)]
180 | walker.set_repo_links( repo_links)
181 | walker.set_local_repo_path(repo_local_dir)
182 | walker.iterate_over_repos()
183 | 	
184 | 
185 | 


--------------------------------------------------------------------------------
/src/diagnose_npm_package.py:
--------------------------------------------------------------------------------
  1 | import scrapy
  2 | from scrapy import signals
  3 | from scrapy.crawler import CrawlerProcess
  4 | from scrapy.selector import Selector
  5 | from bs4 import BeautifulSoup
  6 | import json
  7 | import re
  8 | import subprocess
  9 | import os
 10 | import logging
 11 | import argparse
 12 | from test_JS_repo_lib import *
 13 | import middlewares
 14 | 
 15 | logging.getLogger('scrapy').propagate = False
 16 | 
 17 | class NPMSpider(scrapy.Spider):
 18 | 	name = "npm-pkgs"
 19 | 	VERBOSE_MODE = False
 20 | 	RM_AFTER_CLONING = False
 21 | 	SCRIPTS_OVER_CODE = []
 22 | 	CUSTOM_SETUP_SCRIPTS = []
 23 | 	CUSTOM_LOCK_FILES = []
 24 | 	QL_QUERIES = []
 25 | 
 26 | 	DO_INSTALL = True
 27 | 	INCLUDE_DEV_DEPS = False
 28 | 	COMPUTE_DEP_LISTS = False
 29 | 	TRACK_BUILD = True
 30 | 	TRACK_TESTS = True
 31 | 	TEST_VERBOSE_ALL_OUTPUT = False
 32 | 	TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json"
 33 | 	TEST_COMMAND_REPEATS = 1
 34 | 
 35 | 	TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", 
 36 | 							 "mocha", "jest", "ava", "tap", "jasmine"]
 37 | 	IGNORED_COMMANDS = ["watch", "debug"]
 38 | 	IGNORED_SUBSTRINGS = ["--watch", "nodemon"]
 39 | 	TRACKED_BUILD_COMMANDS = ["build", "compile", "init"]
 40 | 
 41 | 	# timeouts for stages, in seconds
 42 | 	INSTALL_TIMEOUT = 1000
 43 | 	# note: these are timeouts per *script* in the stage of the process
 44 | 	BUILD_TIMEOUT = 1000
 45 | 	TEST_TIMEOUT = 1000
 46 | 	
 47 | 	def __init__(self, packages=None, config_file="", output_dir=".", *args, **kwargs):
 48 | 		if packages is not None:
 49 | 			self.packages = packages
 50 | 		self.start_urls = ['https://www.npmjs.com/package/' + pkg for pkg in self.packages]
 51 | 		self.set_up_config( config_file)
 52 | 		self.output_dir = os.path.abspath(output_dir)
 53 | 		super(NPMSpider, self).__init__(*args, **kwargs)
 54 | 
 55 | 	def set_up_config( self, config_file):
 56 | 		if not os.path.exists(config_file):
 57 | 			if config_file != "":
 58 | 				print("Could not find config file: " + config_file + " --- using defaults")
 59 | 			return
 60 | 
 61 | 		config_json = {}
 62 | 		try:
 63 | 			with open( config_file, 'r') as f:
 64 | 				config_json = json.loads(f.read())
 65 | 		except:
 66 | 			print("Error reading config file: " + config_file + " --- using defaults")
 67 | 
 68 | 		# now, read the relevant config info from the file
 69 | 		cf_dict = config_json.get( "meta_info", {})
 70 | 		self.VERBOSE_MODE = cf_dict.get("VERBOSE_MODE", self.VERBOSE_MODE)
 71 | 		self.IGNORED_COMMANDS = cf_dict.get( "ignored_commands", self.IGNORED_COMMANDS)
 72 | 		self.IGNORED_SUBSTRINGS = cf_dict.get( "ignored_substrings", self.IGNORED_SUBSTRINGS)
 73 | 		self.RM_AFTER_CLONING = cf_dict.get( "rm_after_cloning", self.RM_AFTER_CLONING)
 74 | 		# script and query file location is relative to the config file
 75 | 		self.SCRIPTS_OVER_CODE = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 76 | 											for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)]
 77 | 		self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 78 | 											for p in cf_dict.get( "QL_queries", self.QL_QUERIES)]
 79 | 		self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 80 | 											for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)]
 81 | 
 82 | 		cf_dict = config_json.get( "dependencies", {})
 83 | 		self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS)
 84 | 		self.COMPUTE_DEP_LISTS = cf_dict.get("track_deps", self.COMPUTE_DEP_LISTS)
 85 | 
 86 | 		cf_dict = config_json.get( "install", {})
 87 | 		self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL)
 88 | 		self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT)
 89 | 		self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p 
 90 | 											for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)]
 91 | 
 92 | 		cf_dict = config_json.get( "build", {})
 93 | 		self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD)
 94 | 		self.BUILD_TIMEOUT = cf_dict.get("timeout", self.BUILD_TIMEOUT)
 95 | 		self.TRACKED_BUILD_COMMANDS = cf_dict.get("tracked_build_commands", self.TRACKED_BUILD_COMMANDS)
 96 | 
 97 | 		cf_dict = config_json.get("test", {})
 98 | 		self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT)
 99 | 		self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS)
100 | 		self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS)
101 | 		self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS)
102 | 		test_verbose_config = cf_dict.get("test_verbose_all_output", {})
103 | 		self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT)
104 | 		self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON)
105 | 
106 | 	def parse(self, response):
107 | 		# TODO should we handle specific response codes?
108 | 		# successful responses are those in the 200s
109 | 		# source: https://doc.scrapy.org/en/latest/topics/spider-middleware.html#module-scrapy.spidermiddlewares.httperror
110 | 		if response.status > 299 or response.status < 200:
111 | 			json_results = { "http_error_code": response.status, "message": "Could not analyze url: " + response.url }
112 | 			with open( response.url[ len("https://www.npmjs.com/package/"):] + '__results.json', 'w') as f:
113 | 				json.dump( json_results, f, indent=4)
114 | 			return
115 | 		package_name = self.parse_process(response.body)
116 | 		with open(self.output_dir + "/" + package_name + '__page_data.html', 'wb') as f:
117 | 			f.write(response.body)
118 | 		
119 | 	def parse_process( self, html_text):	
120 | 		soup = BeautifulSoup(html_text, 'html.parser')
121 | 		# print(soup.prettify())
122 | 		script = soup.find('script', text=re.compile('window\.__context__'))
123 | 		json_text = re.search(r'^\s*window\.__context__\s*=\s*({.*?})\s*$',
124 | 		                      script.string, flags=re.DOTALL | re.MULTILINE).group(1)
125 | 		data = json.loads(json_text)
126 | 		
127 | 		num_dependents = data['context']['dependents']['dependentsCount']
128 | 		repo_link = data['context']['packument']['repository']
129 | 		package_name = data['context']['packument']['name']
130 | 
131 | 		json_results = diagnose_package( repo_link, self)
132 | 
133 | 		json_results["metadata"] = {}
134 | 		json_results["metadata"]["package_name"] = package_name
135 | 		json_results["metadata"]["repo_link"] = repo_link
136 | 		json_results["metadata"]["num_dependents"] = num_dependents
137 | 		
138 | 		with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f:
139 | 			json.dump( json_results, f, indent=4)
140 | 		return(package_name)
141 | 
142 | 	def iterate_over_pkgs_from_files( self):
143 | 		for pkg_name in self.packages:
144 | 			with open(pkg_name + '__page_data.html', 'rb') as f:
145 | 				html_text = f.read()
146 | 			self.parse_process(html_text)
147 | 
148 | process = CrawlerProcess(settings={
149 | 	"FEEDS": {
150 | 		"items.json": {"format": "json"},
151 | 	},
152 | 	"HTTPERROR_ALLOW_ALL": True,
153 |         "RETRY_HTTP_CODES" : [429],
154 |         "DOWNLOADER_MIDDLEWARES": {
155 |             "scrapy.downloadermiddlewares.retry.RetryMiddleware": None,
156 |             "middlewares.TooManyRequestsRetryMiddleware": 543,
157 |         }
158 |     })
159 | 
160 | 
161 | argparser = argparse.ArgumentParser(description="Diagnose npm packages")
162 | argparser.add_argument("--packages", metavar="package", type=str, nargs='+', help="a package to be diagnosed")
163 | argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file")
164 | argparser.add_argument("--html", metavar="html_file", type=bool, nargs='?', help="read from existing html instead of scraping")
165 | argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to")
166 | args = argparser.parse_args()
167 | 
168 | output_dir = args.output_dir if args.output_dir else "."
169 | 
170 | config = args.config if args.config else ""
171 | html = args.html if args.html else False
172 | 
173 | if not args.html:
174 | 	process.crawl(NPMSpider, packages=args.packages, config_file=config, output_dir=output_dir)
175 | 	process.start() # the script will block here until the crawling is finished
176 | else:
177 | 	# reading from a config file
178 | 	spider = NPMSpider(args.packages, config_file=config, output_dir=output_dir)
179 | 	spider.iterate_over_pkgs_from_files()
180 | 	
181 | 
182 | 


--------------------------------------------------------------------------------
/src/get_repo_links.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # set of functions for extracting lists of repos to clone 
 3 | # from a variety of sources
 4 | 
 5 | # from a file that's just a list of github repos 
 6 | # of the form: https://github.com/username/reponame
 7 | # optionally, users can specify a particular commit SHA to run over
 8 | # this should be separated from the repo by some whitespace
 9 | def from_list_of_repos( filename):
10 | 	with open(filename) as f:
11 | 		file_lines = f.read().split("\n")
12 | 	# filter out empty lines and return
13 | 	return( [ f for f in file_lines if len(f) > 0]) 


--------------------------------------------------------------------------------
/src/middlewares.py:
--------------------------------------------------------------------------------
 1 | from scrapy.downloadermiddlewares.retry import RetryMiddleware
 2 | from scrapy.utils.response import response_status_message
 3 | 
 4 | import time
 5 | 
 6 | class TooManyRequestsRetryMiddleware(RetryMiddleware):
 7 | 
 8 |     def __init__(self, crawler):
 9 |         super(TooManyRequestsRetryMiddleware, self).__init__(crawler.settings)
10 |         self.crawler = crawler
11 | 
12 |     @classmethod
13 |     def from_crawler(cls, crawler):
14 |         return cls(crawler)
15 | 
16 |     def process_response(self, request, response, spider):
17 |         if request.meta.get('dont_retry', False):
18 |             return response
19 |         elif response.status == 429:
20 |             self.crawler.engine.pause()
21 |             time.sleep(60) # If the rate limit is renewed in a minute, put 60 seconds, and so on.
22 |             self.crawler.engine.unpause()
23 |             reason = response_status_message(response.status)
24 |             return self._retry(request, reason, spider) or response
25 |         elif response.status in self.retry_http_codes:
26 |             reason = response_status_message(response.status)
27 |             return self._retry(request, reason, spider) or response
28 |         return response 
29 | 


--------------------------------------------------------------------------------
/src/output_parsing/test_output_proc.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import xmltodict
 3 | import pandas as pd
 4 |  
 5 | # parse the output of mocha xunit reporter to a csv
 6 | # does not delete the original xunit output file
 7 | # outputs include, per test (in this order):
 8 | # - test suite it's a part of
 9 | # - name of the test itself
10 | # - runtime of the test
11 | # - stdout of the test (if any)
12 | # - pass/fail status (could also be "pending")
13 | def parse_mocha_json_to_csv(output_file, new_output_file=None):
14 |     if new_output_file is None:
15 |         new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension
16 |     # convert an xml file to json
17 |     # used to convert the xunit reporter output from mocha into json 
18 |     # code from https://www.geeksforgeeks.org/python-xml-to-json/
19 |     data_dict = {}
20 |     try:
21 |         with open(output_file) as xml_file:
22 |             data_dict = xmltodict.parse(xml_file.read()).get("testsuite", {})
23 |     except:
24 |         data_dict = {}
25 |     # the format: all the tests are in a top-level list called "testcase"
26 |     test_suites = []
27 |     test_names = []
28 |     test_runtimes = []
29 |     test_stdout = []
30 |     test_pass_fail = []
31 |     for test in data_dict.get("testcase", []):
32 |         test_suites += [test.get("@classname", "").strip()]
33 |         test_names += [test.get("@name", "").strip()]
34 |         test_runtimes += [float(test.get("@time", "NaN"))]
35 |         if test.get("failure", False):
36 |             test_stdout += [test["failure"]]
37 |             test_pass_fail += ["failed"]
38 |         else:
39 |             test_stdout += [""]
40 |             test_pass_fail += ["passed"]
41 |     res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail)))
42 |     try:
43 |         res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"]
44 |         with open(new_output_file, 'w') as csv_file:
45 |             csv_file.write(res_df.to_csv())
46 |     except:
47 |         print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...")
48 | 
49 | # parse the output of jest xunit reporter to a csv
50 | # this does the same thing as for mocha, to produce the same data fields
51 | # does not delete the original xunit output file
52 | # outputs include, per test (in this order):
53 | # - test suite it's a part of
54 | # - name of the test itself
55 | # - runtime of the test
56 | # - stdout of the test (if any)
57 | # - pass/fail status (could also be "pending")
58 | def parse_jest_json_to_csv(output_file, new_output_file=None):
59 |     if new_output_file is None:
60 |         new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension
61 |     data_dict = {}
62 |     try:
63 |         with open(output_file) as json_file:
64 |             data_dict = json.loads(json_file.read())
65 |     except:
66 |         data_dict = {}
67 |     # the format: all tests are in a top level list called "testResults"
68 |     # this is a list of objects that have "assertionResults" representing the test suites
69 |     # "assertionResults" is a list of objects that have the test data
70 |     test_suites = []
71 |     test_names = []
72 |     test_runtimes = []
73 |     test_stdout = []
74 |     test_pass_fail = []
75 |     for test_suite in data_dict.get("testResults", []):
76 |         test_suite_results = test_suite.get("assertionResults", [])
77 |         test_suite_name = test_suite.get("name", "")
78 |         for test_results in test_suite_results:
79 |             test_status = test_results.get("status", "failed")
80 |             test_duration = test_results.get("duration")
81 |             # if it can't convert to a string, could be missing/nonetype (None duration for pending tests)
82 |             try:
83 |                 test_duration = float(test_duration)
84 |             except:
85 |                 test_duration = float("NaN")
86 |             test_suites += [test_suite_name]
87 |             test_names += [test_results.get("fullName", "")]
88 |             test_runtimes += [test_duration]
89 |             test_stdout += [";".join(test_results.get("failureMessages", []))]
90 |             test_pass_fail += [test_status] # passed/failed/pending -- if not present assume failed
91 |     res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail)))
92 |     try:
93 |         res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"]
94 |         with open(new_output_file, 'w') as csv_file:
95 |             csv_file.write(res_df.to_csv())
96 |     except:
97 |         print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...")


--------------------------------------------------------------------------------
/src/runQuery.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | projRoot=$1
 4 | projName=$2
 5 | query=$3
 6 | outputDir="."
 7 | 
 8 | if [ $# == 4 ]; then
 9 | 	outputDir=$4
10 | fi
11 | 
12 | # if there is no QLDBs folder yet, create it
13 | if [ ! -d "QLDBs" ]; then
14 | 	mkdir QLDBs
15 | fi
16 | 
17 | # make the QL DB and upgrade it, if it doesnt already exist
18 | 
19 | if [ ! -d "QLDBs/$projName" ]; then
20 | 	#export LGTM_INDEX_FILTERS='include:/'
21 | 	codeql database create --language=javascript --source-root $projRoot QLDBs/$projName
22 | 	codeql database upgrade QLDBs/$projName
23 | fi
24 | 
25 | # run the query
26 | codeql query run --database QLDBs/${projName} --output=${projName}_tempOut.bqrs $query
27 | codeql bqrs decode --format=csv ${projName}_tempOut.bqrs > $outputDir/${projName}__`basename $query .ql`__results.csv
28 | rm ${projName}_tempOut.bqrs


--------------------------------------------------------------------------------
/src/test_JS_repo_lib.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import subprocess
  3 | import json
  4 | import os
  5 | import time
  6 | from TestInfo import *
  7 | 
  8 | def run_command( commands, timeout=None):
  9 | 	for command in commands.split(";"):
 10 | 		try:
 11 | 			process = subprocess.run( command.split(), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
 12 | 		except subprocess.TimeoutExpired:
 13 | 			error_string = "TIMEOUT ERROR: for user-specified timeout " + str(timeout) + " seconds"
 14 | 			error = "TIMEOUT ERROR"
 15 | 			return( error.encode('utf-8'), error_string.encode('utf-8'), 1) # non-zero return code
 16 | 	return( process.stderr, process.stdout, process.returncode)
 17 | 
 18 | def run_installation( pkg_json, crawler):
 19 | 	installation_command = ""
 20 | 	installation_debug = "Running Installation\n"
 21 | 	manager = ""
 22 | 
 23 | 	# if there is a yarn lock file use yarn
 24 | 	# if there is a package-lock, use npm
 25 | 	# if there is neither, try npm first, and if that fails use yarn
 26 | 	if os.path.exists( "yarn.lock"):
 27 | 		installation_debug += "\nyarn detected -- installing using yarn"
 28 | 		manager = "yarn "
 29 | 		installation_command = "yarn"
 30 | 		error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT)
 31 | 	elif os.path.exists( "package-lock.json"):
 32 | 		installation_debug += "\npackage-lock detected -- installing using npm"
 33 | 		manager = "npm run "
 34 | 		installation_command = "npm install"
 35 | 		error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT)
 36 | 	else:
 37 | 		installation_debug += "\nNo installer detected -- trying npm"
 38 | 		manager = "npm run "
 39 | 		installation_command = "npm install"
 40 | 		error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT)
 41 | 		if retcode != 0:
 42 | 			installation_debug += "No installer detected -- tried npm, error, now trying yarn"
 43 | 			manager = "yarn "
 44 | 			installation_command = "yarn"
 45 | 			error, output, retcode = run_command( installation_command, crawler.INSTALL_TIMEOUT)
 46 | 	return( (manager, retcode, installation_command, installation_debug))
 47 | 
 48 | def get_deps():
 49 |     deps = []
 50 |     for d in os.listdir("node_modules"):
 51 | 	# if a folder's name starts with '.', ignore it.
 52 |         if d[0] == '.':
 53 |             continue
 54 | 	# if a folder's name starts with '@', count subfolders in it.
 55 |         if d[0] == '@':
 56 |             subFolder = os.path.join("node_modules/", d)
 57 |             for f in os.listdir(subFolder):
 58 |                 deps.append(d + '/' + f)
 59 | 
 60 |         else:
 61 |             deps.append(d)
 62 | 
 63 |     return deps
 64 | 
 65 | # note: no timeout option for get_dependencies, so "None" is passed as a default timeout argument to run_command
 66 | def get_dependencies( pkg_json, manager, include_dev_deps):
 67 | 	if pkg_json.get("devDependencies", None) and not include_dev_deps:
 68 | 		run_command( "rm -r node_modules")
 69 | 		run_command( "mv package.json TEMP_package.json_TEMP")
 70 | 		dev_deps = pkg_json["devDependencies"]
 71 | 		pkg_json["devDependencies"] = {}
 72 | 		with open("package.json", 'w') as f:
 73 | 			json.dump( pkg_json, f)
 74 | 		run_command( "npm install" if manager == "npm run " else manager)
 75 | 		pkg_json["devDependencies"] = dev_deps
 76 | 	# get the list of deps, excluding hidden directories
 77 | 	deps = [] if not os.path.isdir("node_modules") else get_deps()
 78 | 	# then, reset the deps (if required)
 79 | 	if pkg_json.get("devDependencies", None) and not include_dev_deps:
 80 | 		run_command( "rm -r node_modules")
 81 | 		run_command( "mv TEMP_package.json_TEMP package.json")
 82 | 		run_command( "npm install" if manager == "npm run " else manager)
 83 | 	return( deps)
 84 | 
 85 | 
 86 | def run_build( manager, pkg_json, crawler):
 87 | 	build_debug = ""
 88 | 	build_script_list = []
 89 | 	retcode = 0
 90 | 	if len(crawler.TRACKED_BUILD_COMMANDS) == 0:
 91 | 		return(retcode, build_script_list, build_debug)
 92 | 	build_scripts = [b for b in pkg_json.get("scripts", {}).keys() if not set([ b.find(b_com) for b_com in crawler.TRACKED_BUILD_COMMANDS]) == {-1}]
 93 | 	build_scripts = [b for b in build_scripts if set([b.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}]
 94 | 	build_scripts = [b for b in build_scripts if set([pkg_json.get("scripts", {})[b].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}]
 95 | 	for b in build_scripts:
 96 | 		build_debug += "Running: " + manager + b
 97 | 		error, output, retcode = run_command( manager + b, crawler.BUILD_TIMEOUT)
 98 | 		if retcode != 0 and build_scripts.count(b) < 2:
 99 | 			build_debug += "ERROR running command: " + b
100 | 			build_scripts += [b] # re-add it onto the end of the list, and try running it again after the other build commands
101 | 		elif retcode == 0:
102 | 			build_script_list += [b]
103 | 	return( retcode, build_script_list, build_debug)
104 | 
105 | def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."):
106 | 	test_json_summary = {}
107 | 	retcode = 0
108 | 	if len(crawler.TRACKED_TEST_COMMANDS) == 0:
109 | 		return(retcode, test_json_summary)
110 | 	test_scripts = [t for t in pkg_json.get("scripts", {}).keys() if not set([ t.find(t_com) for t_com in crawler.TRACKED_TEST_COMMANDS]) == {-1}]
111 | 	test_scripts = [t for t in test_scripts if set([t.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}]
112 | 	test_scripts = [t for t in test_scripts if set([pkg_json.get("scripts", {})[t].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}]
113 | 	for test_index, t in enumerate(test_scripts):
114 | 		test_output_rep = {}
115 | 		for test_rep_index in range(crawler.TEST_COMMAND_REPEATS):
116 | 			test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index)
117 | 			print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t)
118 | 			test_command = pkg_json.get("scripts", {})[t]
119 | 			test_infras = TestInfo.get_test_infras_list(test_command, manager)
120 | 			test_verbosity_output = {}
121 | 			# initialize these variables for timing; they'll be set before/after running test commands (resp)
122 | 			start_time = 0
123 | 			end_time = 0
124 | 			# if we're in verbose testing mode (i.e. getting all timing info for each test, etc)
125 | 			# then, we rerun the test commands with all the commands for adding verbose_mode to 
126 | 			# each of the test infras involved (individually)
127 | 			if crawler.TEST_VERBOSE_ALL_OUTPUT:
128 | 				# we're gonna be adding our new custom scripts for verbosity testing
129 | 				run_command( "mv package.json TEMP_package.json_TEMP")
130 | 				for verbosity_index, test_infra in enumerate(test_infras):
131 | 					verbose_test_json = crawler.output_dir + "/" \
132 | 										+ "repo_" + repo_name + "_" \
133 | 										+ "test_" + str(test_index) + "_"\
134 | 										+ "infra_" + str(verbosity_index) + "_" \
135 | 										+ ("" if test_rep_id == "" else test_rep_id + "_") \
136 | 										+ crawler.TEST_VERBOSE_OUTPUT_JSON
137 | 					infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS.get(test_infra)
138 | 					if not infra_verbosity_config: # checks if it's an empty object
139 | 						print("TEST VERBOSE MODE: unsupported test infra " + test_infra)
140 | 						test_verbosity_output[test_infra] = { "error": True }
141 | 						continue
142 | 					infra_verbosity_args = infra_verbosity_config.get("args", "")
143 | 					infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end
144 | 					infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None)
145 | 					infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_command, test_infra, infra_verbosity_args, 
146 | 																					verbose_test_json, infra_verbosity_args_pos)
147 | 					verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index)
148 | 					pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command
149 | 					with open("package.json", 'w') as f:
150 | 						json.dump( pkg_json, f)
151 | 					print("Running verbosity: " + manager + infra_verbosity_command)
152 | 					# time how long the next line takes
153 | 					start_time = time.time()
154 | 					error, output, retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT)
155 | 					end_time = time.time()
156 | 					# if there's post-processing to be done
157 | 					if not infra_verbosity_post_proc is None:
158 | 						for out_file_obj in out_files:
159 | 							infra_verbosity_post_proc(out_file_obj["output_file"])
160 | 					verbosity_index += 1
161 | 					# get the output
162 | 					test_verbosity_infra = {}
163 | 					test_verbosity_infra["command"] = infra_verbosity_command
164 | 					test_verbosity_infra["output_files"] = out_files
165 | 					if crawler.VERBOSE_MODE:
166 | 						test_verbosity_infra["test_debug"] = "\nError output: " + error.decode('utf-8') \
167 | 															+ "\nOutput stream: " + output.decode('utf-8')
168 | 					test_verbosity_output[test_infra] = test_verbosity_infra
169 | 				# put the package.json back
170 | 				run_command( "mv TEMP_package.json_TEMP package.json")
171 | 			# not verbose test mode -- just run the normal test command
172 | 			# if start and end time are both still zero then no instrumented test commands ran
173 | 			# and so we also rerun here
174 | 			if (not crawler.TEST_VERBOSE_ALL_OUTPUT) or (start_time == 0 and end_time == 0): 
175 | 				start_time = time.time()
176 | 				error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT)
177 | 				end_time = time.time()
178 | 			test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE)
179 | 			# the below info on the test infras etc is independent of verbose mode: just based on the command itself
180 | 			test_info.set_test_command( test_command)
181 | 			test_info.compute_test_infras()
182 | 			test_info.compute_nested_test_commands( test_scripts)
183 | 			test_info.start_time = start_time
184 | 			test_info.end_time = end_time
185 | 			# note: if we're running in verbose mode, then the stats will be that of the last executed verbose mode 
186 | 			# instrumented version of the test command
187 | 			test_info.compute_test_stats()
188 | 			if crawler.TEST_VERBOSE_ALL_OUTPUT:
189 | 				test_info.set_test_verbosity_output(test_verbosity_output)
190 | 			# if we're not doing any repeats then don't make another layer of jsons
191 | 			if crawler.TEST_COMMAND_REPEATS == 1:
192 | 				test_output_rep = test_info.get_json_rep()
193 | 			else:
194 | 				test_output_rep[test_rep_id] = test_info.get_json_rep()
195 | 		test_json_summary[t] = test_output_rep
196 | 	return( retcode, test_json_summary)
197 | 
198 | # instrument the test command specified to make it produce verbose output to a file
199 | def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos):
200 | 	# replace the output file name with the custom output filename
201 | 	# add an index to the filename for the 2nd,+ time the filename shows up
202 | 	# so as to avoid overwriting the files
203 | 	num_files = 0
204 | 	new_infra_verbosity_args = ""
205 | 	output_files = []
206 | 	for i, sub in enumerate(infra_verbosity_args.split("$PLACEHOLDER_OUTPUT_FILE_NAME$")):
207 | 		out_file_object = { "test_script": test_script, "test_infra": test_infra }
208 | 		# not the file name
209 | 		if sub != "": 
210 | 			new_infra_verbosity_args += sub
211 | 		else:
212 | 			path_index = verbose_test_json.rfind("/")
213 | 			if path_index == -1:
214 | 				output_file = "out_" + str(num_files) + "_" + verbose_test_json 
215 | 				new_infra_verbosity_args += output_file
216 | 				out_file_object["output_file"] = output_file
217 | 			else:
218 | 				output_file = verbose_test_json[:path_index] + "/out_" + str(num_files) + "_" + verbose_test_json[path_index + 1:]
219 | 				print(output_file)
220 | 				new_infra_verbosity_args += output_file
221 | 				out_file_object["output_file"] = output_file
222 | 			output_files += [ out_file_object ]
223 | 			num_files += 1
224 | 	infra_verbosity_args = new_infra_verbosity_args
225 | 	# split into sub-commands
226 | 	command_split_chars = [ "&&", ";"]
227 | 	infra_calls = test_script.split(test_infra)
228 | 	real_calls = []
229 | 	for maybe_call in infra_calls:
230 | 		# if the last char in the string is not whitespace and not a command delimiter,
231 | 		# and it's not the last string in the split
232 | 		# then it's a string that is appended to the front of the name of the infra (e.g., "\"jest\"") 
233 | 		# and not a call 
234 | 		# rebuild it
235 | 		if i < len(infra_calls) - 1 and maybe_call != "" and (not maybe_call[-1].isspace()) and (not any([maybe_call.endswith(s) for s in command_split_chars])):
236 | 			if len(real_calls) > 0:
237 | 				real_calls[-1] += test_infra + maybe_call
238 | 				continue
239 | 		# if the first char in the string is not whitespace and not a command delimiter,
240 | 		# and it's not the first string in the split
241 | 		# then it's a string that is appended to the back of the name of the infra (e.g., jest".config.js")
242 | 		# and not a call either
243 | 		# rebuild it
244 | 		if i > 0 and maybe_call != "" and (not maybe_call[0].isspace()) and (not any([maybe_call.startswith(s) for s in command_split_chars])):
245 | 			if len(real_calls) > 0:
246 | 				real_calls[-1] += test_infra + maybe_call
247 | 				continue
248 | 		real_calls += [ maybe_call ]
249 | 	infra_calls = real_calls
250 | 	instrumented_test_command = []
251 | 	for i, infra_call in enumerate(infra_calls):
252 | 		# if the current call is empty string
253 | 		# then this is the call to the testing infra and the next is the arguments 
254 | 		# so, skip this one
255 | 		# if there are no args (i.e. no next string), then just instrument this one
256 | 		if infra_call == "" and i < len(infra_calls) - 1:
257 | 			instrumented_test_command += [ "" ]
258 | 			continue
259 | 		# if the first call is non-empty and there's more than one call, then it's pre-test-infra and we skip it too
260 | 		elif len(infra_calls) > 1 and infra_call != "" and i == 0:
261 | 			instrumented_test_command += [ "" ]
262 | 			continue
263 | 		# get the arguments, splitting off from any other non-test commands that might be
264 | 		# in this command (note: we know all the commands started with test_infra)
265 | 		end_command_pos = re.search(r'|'.join(command_split_chars), infra_call)
266 | 		end_command_pos = end_command_pos.start() if not end_command_pos is None else -1
267 | 		sub_command_args = (infra_call[0:end_command_pos] if end_command_pos > -1 else infra_call).split(" ")
268 | 		if infra_verbosity_args_pos != -1:
269 | 			sub_command_args.insert(infra_verbosity_args_pos, infra_verbosity_args)
270 | 		else:
271 | 			sub_command_args.append(infra_verbosity_args)
272 | 		# rebuild the command, re-attaching any extra sub-commands
273 | 		instrumented_test_command += [ " ".join(sub_command_args) + (infra_call[end_command_pos:] if end_command_pos > -1 else "") ]
274 | 	return(test_infra.join(instrumented_test_command), output_files)
275 | 
276 | def on_diagnose_exit( json_out, crawler, cur_dir, repo_name):
277 | 	# if we still have the temp package.json, restore it
278 | 	if os.path.isfile("TEMP_package.json_TEMP"):
279 | 		run_command( "mv TEMP_package.json_TEMP package.json")
280 | 	# move back to the original working directory
281 | 	if repo_name != "":
282 | 		os.chdir( cur_dir)
283 | 		if crawler.RM_AFTER_CLONING:
284 | 			run_command( "rm -rf TESTING_REPOS/" + repo_name)
285 | 	return( json_out)
286 | 
287 | def diagnose_package( repo_link, crawler, commit_SHA=None):
288 | 	json_out = {}
289 | 
290 | 	repo_name = ""
291 | 	cur_dir = os.getcwd()
292 | 	try: 
293 | 		repo_name = repo_link[len(repo_link) - (repo_link[::-1].index("/")):]
294 | 	except: 
295 | 		print("ERROR cloning the repo -- malformed repo link. Exiting now.")
296 | 		json_out["setup"] = {}
297 | 		json_out["setup"]["repo_cloning_ERROR"] = True
298 | 		return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
299 | 
300 | 	print("Diagnosing: " + repo_name + " --- from: " + repo_link)
301 | 
302 | 	if not os.path.isdir("TESTING_REPOS"):
303 | 		os.mkdir("TESTING_REPOS")
304 | 	os.chdir("TESTING_REPOS")
305 | 
306 | 	# first step: cloning the package's repo
307 | 
308 | 	# if the repo already exists, dont clone it
309 | 	if not os.path.isdir( repo_name):
310 | 		print( "Cloning package repository")
311 | 		error, output, retcode = run_command( "git clone " + repo_link)
312 | 		if retcode != 0:
313 | 			print("ERROR cloning the repo. Exiting now.")
314 | 			json_out["setup"] = {}
315 | 			json_out["setup"]["repo_cloning_ERROR"] = True
316 | 			return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
317 | 	else:
318 | 		print( "Package repository already exists. Using existing directory: " + repo_name)
319 | 	
320 | 	# diagnose the repo dir
321 | 	return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=commit_SHA))
322 | 
323 | def diagnose_local_dir(repo_dir, crawler):
324 | 	json_out = {}
325 | 	repo_name = ""
326 | 	cur_dir = os.getcwd()
327 | 	repo_name = repo_dir.split("/")[-1]
328 | 	if not os.path.isdir(repo_dir):
329 | 		print("ERROR using local directory: " + repo_dir + " invalid directory path")
330 | 		json_out["setup"] = {}
331 | 		json_out["setup"]["local_dir_ERROR"] = True
332 | 		return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
333 | 	
334 | 	print("Diagnosing: " + repo_name + " --- from: " + repo_dir)
335 | 	if not os.path.isdir("TESTING_REPOS"):
336 | 		os.mkdir("TESTING_REPOS")
337 | 	os.chdir("TESTING_REPOS")
338 | 
339 | 	# if the repo already exists, dont clone it
340 | 	if not os.path.isdir( repo_name):
341 | 		print( "Copying package directory")
342 | 		error, output, retcode = run_command( "cp -r " + repo_dir + " " + repo_name)
343 | 		if retcode != 0:
344 | 			print("ERROR copying the directory. Exiting now.")
345 | 			json_out["setup"] = {}
346 | 			json_out["setup"]["local_dir_ERROR"] = True
347 | 			return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
348 | 	else:
349 | 		print( "Package directory already exists. Using existing directory: " + repo_name)
350 | 	# diagnose the repo dir
351 | 	return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir))
352 | 
353 | def diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=None):
354 | 	# move into the repo and begin testing
355 | 	os.chdir( repo_name)
356 | 
357 | 	# checkout the specified commit if needed
358 | 	if commit_SHA:
359 | 		print("Checking out specified commit: " + commit_SHA)
360 | 		error, output, retcode = run_command( "git checkout " + commit_SHA)
361 | 		if retcode != 0:
362 | 			print("ERROR checking out specified commit. Exiting now.")
363 | 			json_out["setup"] = {}
364 | 			json_out["setup"]["repo_commit_checkout_ERROR"] = True
365 | 			return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
366 | 	
367 | 
368 | 	pkg_json = None
369 | 	try:
370 | 		with open('package.json') as f:
371 | 			pkg_json = json.load(f)
372 | 	except:
373 | 		print("ERROR reading the package.json. Exiting now.")
374 | 		json_out["setup"] = {}
375 | 		json_out["setup"]["pkg_json_ERROR"] = True
376 | 		return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
377 | 
378 | 	manager = ""
379 | 	# if there's custom lock files, copy them into the repo (repo is "." since we're in the repo currently)
380 | 	if crawler.CUSTOM_LOCK_FILES != []:
381 | 		for custom_lock in crawler.CUSTOM_LOCK_FILES:
382 | 			run_command("cp " + custom_lock + " .")
383 | 
384 | 	# first, check if there is a custom install
385 | 	# this runs custom scripts the same way as the scripts_over_code below; only 
386 | 	# difference is it's before the npm-filter run
387 | 	if crawler.CUSTOM_SETUP_SCRIPTS != []:
388 | 		json_out["custom_setup_scripts"] = {}
389 | 		for script in crawler.CUSTOM_SETUP_SCRIPTS:
390 | 			print("Running custom setup script script over code: " + script)
391 | 			json_out["custom_setup_scripts"][script] = {}
392 | 			error, output, retcode = run_command( script)
393 | 			script_output = output.decode('utf-8') + error.decode('utf-8')
394 | 			ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
395 | 			script_output = ansi_escape.sub('', script_output)
396 | 			json_out["custom_setup_scripts"][script]["output"] = script_output
397 | 			if retcode != 0:
398 | 				json_out["custom_setup_scripts"][script]["ERROR"] = True
399 | 
400 | 	# check if the install is done (check if there is a node_modules folder)
401 | 	already_installed = os.path.isdir("node_modules")
402 | 
403 | 	# then, the install
404 | 	if crawler.DO_INSTALL:
405 | 		(new_manager, retcode, installer_command, installer_debug) = run_installation( pkg_json, crawler)
406 | 		if manager == "":
407 | 			manager = new_manager
408 | 		json_out["installation"] = {}
409 | 		json_out["installation"]["installer_command"] = installer_command
410 | 		if crawler.VERBOSE_MODE:
411 | 			json_out["installation"]["installer_debug"] = installer_debug
412 | 		if retcode != 0:
413 | 			print("ERROR -- installation failed")
414 | 			json_out["installation"]["ERROR"] = True
415 | 			if not already_installed:
416 | 				return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
417 | 	else:
418 | 		json_out["installation"] = { "do_install": False }
419 | 
420 | 	if manager == "": # default the manager to npm if it wasn't already IDd
421 | 		manager = "npm run "
422 | 
423 | 	if crawler.COMPUTE_DEP_LISTS:
424 | 		json_out["dependencies"] = {}
425 | 		if not crawler.DO_INSTALL:
426 | 			print("Can't get dependencies without installing (do_install: false) -- skipping")
427 | 		else:
428 | 			print("Getting dependencies")
429 | 			dep_list = get_dependencies( pkg_json, manager, crawler.INCLUDE_DEV_DEPS)
430 | 			json_out["dependencies"]["dep_list"] = dep_list
431 | 			json_out["dependencies"]["includes_dev_deps"] = crawler.INCLUDE_DEV_DEPS
432 | 
433 | 	# now, proceed with the build
434 | 	if crawler.TRACK_BUILD:
435 | 		json_out["build"] = {}
436 | 		if not crawler.DO_INSTALL and not already_installed:
437 | 			print("Can't do build without installing (do_install: false and not already installed) -- skipping")
438 | 		else:
439 | 			(retcode, build_script_list, build_debug) = run_build( manager, pkg_json, crawler)
440 | 			json_out["build"]["build_script_list"] = build_script_list
441 | 			if crawler.VERBOSE_MODE:
442 | 				json_out["build"]["build_debug"] = build_debug
443 | 			if retcode != 0:
444 | 				print("ERROR -- build failed. Continuing anyway...")
445 | 				json_out["build"]["ERROR"] = True
446 | 	else:
447 | 		json_out["build"] = { "track_build": False }
448 | 
449 | 	# then, the testing
450 | 	if crawler.TRACK_TESTS:
451 | 		json_out["testing"] = {}
452 | 		if not crawler.DO_INSTALL and not already_installed:
453 | 			print("Can't run tests without installing (do_install: false and not already installed) -- skipping")
454 | 		else:
455 | 			(retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, repo_name, cur_dir)
456 | 			json_out["testing"] = test_json_summary
457 | 	else:
458 | 		json_out["testing"] = { "track_tests": False }
459 | 
460 | 	if crawler.SCRIPTS_OVER_CODE != []:
461 | 		json_out["scripts_over_code"] = {}
462 | 		for script in crawler.SCRIPTS_OVER_CODE:
463 | 			print("Running script over code: " + script)
464 | 			json_out["scripts_over_code"][script] = {}
465 | 			error, output, retcode = run_command( script)
466 | 			script_output = output.decode('utf-8') + error.decode('utf-8')
467 | 			ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
468 | 			script_output = ansi_escape.sub('', script_output)
469 | 			json_out["scripts_over_code"][script]["output"] = script_output
470 | 			if retcode != 0:
471 | 				json_out["scripts_over_code"][script]["ERROR"] = True
472 | 	if crawler.QL_QUERIES != []:
473 | 		# first, move back out of the repo
474 | 		os.chdir(cur_dir)
475 | 		json_out["QL_queries"] = {}
476 | 		for query in crawler.QL_QUERIES:
477 | 			print("Running QL query: " + query)
478 | 			json_out["QL_queries"][query] = {}
479 | 			# runQuery.sh does the following:
480 | 			# - create QL database (with name repo_name)
481 | 			# - save the result of the query.ql in repo_name__query__results.csv
482 | 			# - clean up: delete the bqrs file
483 | 			error, output, retcode = run_command( "src/runQuery.sh TESTING_REPOS/" + repo_name + " " 
484 | 													+ repo_name + " " + query + " " + crawler.output_dir)
485 | 			if crawler.VERBOSE_MODE:
486 | 				query_output = output.decode('utf-8') + error.decode('utf-8')
487 | 				ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
488 | 				query_output = ansi_escape.sub('', query_output)
489 | 				json_out["QL_queries"][query]["output"] = query_output
490 | 			if retcode != 0:
491 | 				json_out["QL_queries"][query]["ERROR"] = True
492 | 		if crawler.RM_AFTER_CLONING:
493 | 			run_command( "rm -rf QLDBs/" + repo_name)
494 | 		os.chdir( "TESTING_REPOS/" + repo_name)
495 | 
496 | 
497 | 	return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name))
498 | 


--------------------------------------------------------------------------------
/tests/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | RUN apt-get update \
 4 | 	&& DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends python3 python3-pip git vim curl wget
 5 | 
 6 | RUN mkdir -p /home/playground
 7 | 
 8 | COPY . /home/playground/
 9 | 
10 | WORKDIR /home/playground
11 | 
12 | RUN ./build.sh
13 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | ## Testing
 2 | 
 3 | The tests run on a specific commit SHA of `memfs`.
 4 | This docker also has specific, hardcoded versions of nodejs, npm, and yarn to ensure consistency of results.
 5 | 
 6 | ```
 7 | # setup the docker container to run the tests
 8 | ./prepTestDocker.sh
 9 | 
10 | # actually run the tests
11 | ./runTestDocker.sh
12 | ```
13 | 
14 | The test docker is constructed using the version of the npm-filter source code in the `src` directory. This test should be run on any updates to the source code, to ensure that the functionality is preserved.
15 | 
16 | The tests run `src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905`, inside the constructed test docker. The output JSON file produced is `diff`ed against the expected output file; any difference would case the test to fail.
17 | 
18 | If the tests pass, you should see the following output:
19 | ```
20 | memfs: test passed
21 | ```
22 | If the tests fail, then the `diff` will be printed to the terminal.
23 | 
24 | If you extend the npm-filter functionality, then [the expected JSON output file](https://github.com/emarteca/npm-filter/blob/master/tests/memfs__results_expected.json) will need to be updated accordingly.
25 | 


--------------------------------------------------------------------------------
/tests/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # specific versions to lock at 
 4 | # note that nodejs comes with npm
 5 | YARN_VERSION="1.22.17"
 6 | NODEJS_VERSION="16.10.0"
 7 | 
 8 | apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential
 9 | curl -sL https://deb.nodesource.com/setup_12.x | bash -
10 | apt-get update
11 | 
12 | curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
13 | echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list
14 | apt-get update
15 | 
16 | curl https://sh.rustup.rs -sSf | sh -s -- -y
17 | source $HOME/.cargo/env
18 | 
19 | pip3 install --upgrade setuptools setuptools_rust wheel
20 | pip3 install scrapy bs4
21 | 
22 | rm build.sh
23 | 
24 | mkdir TESTING_REPOS
25 | 
26 | # install specific version of node
27 | # https://askubuntu.com/questions/957439/how-to-install-a-specific-version-of-node-on-ubuntu-server
28 | wget https://nodejs.org/dist/v${NODEJS_VERSION}/node-v${NODEJS_VERSION}-linux-x64.tar.gz
29 | mkdir -p /opt/nodejs
30 | tar -xvzf node-v${NODEJS_VERSION}-linux-x64.tar.gz -C /opt/nodejs
31 | cd /opt/nodejs
32 | mv node-v${NODEJS_VERSION}-linux-x64 ${NODEJS_VERSION}
33 | ln -s ${NODEJS_VERSION} current
34 | ln -s /opt/nodejs/current/bin/node /usr/bin/node
35 | 
36 | # link npm and use it to install yarn and common testing packages
37 | ln -s /opt/nodejs/current/bin/npm /usr/bin/npm
38 | 
39 | npm install -g yarn@${YARN_VERSION}
40 | npm install -g jest mocha tap ava nyc 
41 | 
42 | echo PATH=/opt/nodejs/current/bin/:$PATH >> /root/.bashrc
43 | 


--------------------------------------------------------------------------------
/tests/memfs__results_expected.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "installation": {
 3 |         "installer_command": "yarn"
 4 |     },
 5 |     "build": {
 6 |         "build_script_list": [
 7 |             "build"
 8 |         ]
 9 |     },
10 |     "testing": {
11 |         "test": {
12 |             "num_passing": 265,
13 |             "num_failing": 0,
14 |             "test_infras": [
15 |                 "jest"
16 |             ],
17 |             "timed_out": false
18 |         },
19 |         "test:coverage": {
20 |             "num_passing": 265,
21 |             "num_failing": 0,
22 |             "test_infras": [
23 |                 "jest"
24 |             ],
25 |             "timed_out": false
26 |         },
27 |         "tslint": {
28 |             "test_linters": [
29 |                 "tslint -- linter"
30 |             ],
31 |             "RUNS_NEW_USER_TESTS": false,
32 |             "timed_out": false
33 |         }
34 |     },
35 |     "metadata": {
36 |         "repo_link": "https://github.com/streamich/memfs",
37 |         "repo_commit_SHA": "863f373185837141504c05ed19f7a253232e0905"
38 |     }
39 | }


--------------------------------------------------------------------------------
/tests/prepTestDocker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cp -r ../src ../configs/default_filter_config.json .
4 | docker build -t npm-filter .
5 | 
6 | rm -r src
7 | rm default_filter_config.json
8 | 


--------------------------------------------------------------------------------
/tests/runTestDocker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -d local_mount ]; then
 4 | 	mkdir local_mount
 5 | fi
 6 | 
 7 | docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \
 8 | 	-it npm-filter:latest \
 9 | 	bash -c "./runTests.sh"
10 | rm -r local_mount
11 | 


--------------------------------------------------------------------------------
/tests/runTests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # if we have a custom version of node, add it to the PATH
 4 | if [ -d /opt/nodejs/current/bin ]; then
 5 | 	PATH=/opt/nodejs/current/bin/:$PATH
 6 | fi
 7 | 
 8 | # memfs
 9 | if [[ -d TESTING_REPOS/memfs ]]; then
10 |         rm -rf TESTING_REPOS/memfs
11 | fi
12 | 
13 | python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/streamich/memfs 863f373185837141504c05ed19f7a253232e0905 >/dev/null 2>&1
14 | 
15 | pkg_diff=`diff memfs__results.json memfs__results_expected.json`
16 | if [ "$pkg_diff" = "" ]; then
17 | 	echo "memfs: test passed"
18 | else
19 | 	echo "memfs: test failed"
20 | 	echo "memfs failing diff: " $pkg_diff
21 | fi
22 | 


--------------------------------------------------------------------------------