├── nbs ├── .gitignore └── Evaluation.ipynb ├── plugins └── vscode │ ├── .gitignore │ ├── CodeCompass.png │ ├── Code-Compass-Screenshot.png │ ├── README.txt │ ├── CHANGELOG.md │ ├── tslint.json │ ├── tsconfig.json │ ├── src │ ├── test │ │ ├── extension.test.ts │ │ └── index.ts │ └── extension.ts │ ├── webpack.config.js │ ├── LICENSE.txt │ ├── errorPage.html │ ├── package.json │ ├── README.md │ ├── codeCompass.css │ └── codeCompass.html ├── assets ├── showcase.gif ├── vscode_usage.gif └── java_ecosystem_3d_ann.png ├── CONTRIBUTORS ├── scripts ├── docker-build.sh ├── Dockerfile ├── docker-run.sh ├── preprocess.sh ├── setup.sh ├── crawl.sh ├── run.sh ├── create_import_dataset.py ├── README.md ├── extract_imports.sh ├── filter_import_dataset.py ├── create_crawl_scripts.py ├── gitgrab.py └── extract_imports.py ├── docs ├── buildApiDoc.sh ├── code-compass-openapi.yaml └── code-compass.apib ├── LICENSE └── README.md /nbs/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /plugins/vscode/.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | node_modules 3 | .vscode-test/ 4 | *.vsix 5 | -------------------------------------------------------------------------------- /assets/showcase.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/code-compass/HEAD/assets/showcase.gif -------------------------------------------------------------------------------- /assets/vscode_usage.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/code-compass/HEAD/assets/vscode_usage.gif -------------------------------------------------------------------------------- /assets/java_ecosystem_3d_ann.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/code-compass/HEAD/assets/java_ecosystem_3d_ann.png -------------------------------------------------------------------------------- /plugins/vscode/CodeCompass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/code-compass/HEAD/plugins/vscode/CodeCompass.png -------------------------------------------------------------------------------- /plugins/vscode/Code-Compass-Screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nokia/code-compass/HEAD/plugins/vscode/Code-Compass-Screenshot.png -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Bart Theeten 2 | Frederik Vandeputte 3 | Tom Van Cutsem 4 | -------------------------------------------------------------------------------- /plugins/vscode/README.txt: -------------------------------------------------------------------------------- 1 | To package the extension, run: vsce package 2 | To load the extension, select 'Extensions: Install from VSIX...' from the VSCode command palette. 3 | Configure the extension through VSCode > Preferences > Settings 4 | -------------------------------------------------------------------------------- /scripts/docker-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | docker build -t codecompass_crawler --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy . 8 | -------------------------------------------------------------------------------- /scripts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.2 2 | RUN pip install pygithub tqdm pandas dis 3 | #RUN apt-get update && apt-get install -y nodejs 4 | #RUN npm install decomment 5 | WORKDIR /app 6 | COPY *.sh *.py ./ 7 | ENV SKIP_SETUP 1 8 | ENTRYPOINT ["/bin/bash", "./run.sh"] 9 | -------------------------------------------------------------------------------- /plugins/vscode/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to the "code-compass" extension will be documented in this file. 3 | 4 | Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. 5 | 6 | ## [Unreleased] 7 | - Initial release -------------------------------------------------------------------------------- /plugins/vscode/tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "rules": { 3 | "no-string-throw": true, 4 | "no-unused-expression": true, 5 | "no-duplicate-variable": true, 6 | "curly": true, 7 | "class-name": true, 8 | "semicolon": [ 9 | true, 10 | "always" 11 | ], 12 | "triple-equals": true 13 | }, 14 | "defaultSeverity": "warning" 15 | } -------------------------------------------------------------------------------- /scripts/docker-run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | [ -f apikey.txt ] || { echo >&2 "Please provide your GitHub API key in file apikey.txt. Aborting. " ; exit 1; } 8 | 9 | mkdir -p ../datasets 10 | docker run --rm -it -u $UID:$(id -g $UID) --env http_proxy=$http_proxy --env https_proxy=$https_proxy -v $PWD/apikey.txt:/app/apikey.txt -v $PWD/../datasets:/datasets codecompass_crawler $* 11 | -------------------------------------------------------------------------------- /docs/buildApiDoc.sh: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019, Nokia 2 | # Licensed under the BSD 3-Clause License 3 | 4 | 5 | # Builds the Swagger/OpenAPI/API Blueprint API documentation 6 | 7 | # Requirements: 8 | # npm install -g aglio 9 | 10 | 11 | aglio --theme-full-width -i code-compass.apib -o index.html 12 | #aglio --theme-full-width --theme-variables slate -i code-compass.apib -o index.html 13 | #aglio --theme-variables slate -i code-compass.apib --theme-template triple -o index.html 14 | open index.html 15 | -------------------------------------------------------------------------------- /scripts/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | language=${1:-python} 8 | 9 | basedir=../datasets/$language 10 | 11 | 12 | echo "[PREPROCESS: EXTRACTING IMPORTS]" 13 | ./extract_imports.sh $basedir $language || exit 1 14 | echo ; echo 15 | 16 | 17 | echo "[PREPROCESS: CREATING RAW IMPORT DATASET]" 18 | ./create_import_dataset.py $basedir || exit 1 19 | echo ; echo 20 | 21 | echo "[PREPROCESS: CREATING PROCESSED PROJECTFILEIMPORT DATASET]" 22 | ./filter_import_dataset.py $basedir || exit 1 23 | echo ; echo 24 | -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | PYTHON=python3 8 | PIP=pip 9 | JUPYTER=jupyter 10 | 11 | #enable sudo in case pip requires root privileges 12 | SUDO= 13 | #SUDO=sudo 14 | 15 | #### 16 | 17 | command -v $PYTHON >/dev/null 2>&1 || { echo >&2 "Cannot find $PYTHON. Aborting." ; exit 1; } 18 | command -v $PIP >/dev/null 2>&1 || { echo >&2 "Cannot find $PIP. Aborting." ; exit 1; } 19 | 20 | 21 | $SUDO $PIP install pygithub tqdm pandas dis 22 | echo 23 | 24 | [ -f apikey.txt ] || { echo >&2 "Please provide your GitHub API key in file apikey.txt. Aborting. " ; exit 1; } 25 | 26 | echo "Setup complete." 27 | -------------------------------------------------------------------------------- /scripts/crawl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | language=${1:-python} 8 | maxprojects=${2:-0} 9 | minstars=${3:-2} 10 | maxsize=${4:-0} #in kb 11 | usecache=${5:-0} 12 | 13 | basedir=../datasets/$language 14 | mkdir -p $basedir 15 | 16 | if [ $usecache -eq 0 ] ; then 17 | rm -rf $basedir/dataset*/ 18 | fi 19 | 20 | #check for gitGrab file 21 | if [ $usecache -eq 0 -o ! -f $basedir/gitGrab.json ] ; then 22 | ./gitgrab.py $basedir $language fast $maxprojects $minstars $maxsize 23 | fi 24 | 25 | #check for download scripts 26 | if [ $usecache -eq 0 -o ! -f $basedir/crawl_dataset01.sh ] ; then 27 | ./create_crawl_scripts.py $basedir 28 | fi 29 | 30 | #download all 31 | cd $basedir 32 | for f in $(ls ./crawl_dataset*.sh) ; do 33 | $f 34 | done 35 | -------------------------------------------------------------------------------- /plugins/vscode/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es6", 5 | "outDir": "out", 6 | "lib": [ 7 | "es6" 8 | ], 9 | "sourceMap": true, 10 | "rootDir": "src", 11 | /* Strict Type-Checking Option */ 12 | "strict": false, /* enable all strict type-checking options */ 13 | /* Additional Checks */ 14 | "noUnusedLocals": false /* Report errors on unused locals. */ 15 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 16 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 17 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 18 | }, 19 | "exclude": [ 20 | "node_modules", 21 | ".vscode-test" 22 | ] 23 | } -------------------------------------------------------------------------------- /plugins/vscode/src/test/extension.test.ts: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2019, Nokia 2 | // Licensed under the BSD 3-Clause License 3 | 4 | // 5 | // Note: This example test is leveraging the Mocha test framework. 6 | // Please refer to their documentation on https://mochajs.org/ for help. 7 | // 8 | 9 | // The module 'assert' provides assertion methods from node 10 | import * as assert from 'assert'; 11 | 12 | // You can import and use all API from the 'vscode' module 13 | // as well as import your extension to test it 14 | // import * as vscode from 'vscode'; 15 | // import * as myExtension from '../extension'; 16 | 17 | // Defines a Mocha test suite to group tests of similar kind together 18 | suite("Extension Tests", function () { 19 | 20 | // Defines a Mocha unit test 21 | test("Something 1", function() { 22 | assert.equal(-1, [1, 2, 3].indexOf(5)); 23 | assert.equal(-1, [1, 2, 3].indexOf(0)); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /plugins/vscode/src/test/index.ts: -------------------------------------------------------------------------------- 1 | // 2 | // PLEASE DO NOT MODIFY / DELETE UNLESS YOU KNOW WHAT YOU ARE DOING 3 | // 4 | // Copyright (C) 2019, Nokia 5 | // Licensed under the BSD 3-Clause License 6 | // 7 | // This file is providing the test runner to use when running extension tests. 8 | // By default the test runner in use is Mocha based. 9 | // 10 | // You can provide your own test runner if you want to override it by exporting 11 | // a function run(testRoot: string, clb: (error:Error) => void) that the extension 12 | // host can call to run the tests. The test runner is expected to use console.log 13 | // to report the results back to the caller. When the tests are finished, return 14 | // a possible error to the callback or null if none. 15 | 16 | import * as testRunner from 'vscode/lib/testrunner'; 17 | 18 | // You can directly control Mocha options by uncommenting the following lines 19 | // See https://github.com/mochajs/mocha/wiki/Using-mocha-programmatically#set-options for more info 20 | testRunner.configure({ 21 | ui: 'tdd', // the TDD UI is being used in extension.test.ts (suite, test, etc.) 22 | useColors: true // colored output from test results 23 | }); 24 | 25 | module.exports = testRunner; 26 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | SCRIPTPATH=$(cd "$(dirname "${BASH_SOURCE[0]}" )" && pwd -P) 7 | cd $SCRIPTPATH 8 | 9 | if [ $# -eq 0 -o "$1" = "-h" -o "$1" = "--help" ] ; then 10 | echo "Usage: $0 [] [] [] []" 11 | echo 12 | echo " Programming language. Supported languages:" 13 | echo " python, java, javascript, csharp, php, ruby" 14 | echo " Maximum GitHub projects (default = 0: all)" 15 | echo " Min GitHub stars (default = 2)" 16 | echo " Max project size (in kb) (default = 0)" 17 | echo " Cache intermediate files/scripts (default = 0)" 18 | exit 1 19 | fi 20 | 21 | language=${1:-python} 22 | maxprojects=${2:-0} 23 | minstars=${3:-2} 24 | maxsize=${4:-0} 25 | usecache=${5:-0} 26 | 27 | if [ -z "$SKIP_SETUP" ] ; then 28 | echo "[RUNNING SETUP]" 29 | ./setup.sh || exit 1 30 | echo ; echo 31 | fi 32 | 33 | echo "[RUNNING CRAWL]" 34 | ./crawl.sh $language $maxprojects $minstars $maxsize $usecache || exit 1 35 | echo ; echo 36 | 37 | echo "[RUNNING PREPROCESS]" 38 | ./preprocess.sh $language || exit 1 39 | echo ; echo 40 | 41 | echo "DONE!" 42 | -------------------------------------------------------------------------------- /scripts/create_import_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | import gzip 8 | import json 9 | import glob 10 | from tqdm import tqdm 11 | import re 12 | import sys 13 | import os 14 | 15 | basedir = sys.argv[1] if len(sys.argv) > 1 else '../datasets/python' 16 | 17 | os.makedirs(basedir+'/raw', exist_ok=True) 18 | 19 | def dump_ds(projectfileimports, dsidx): 20 | with gzip.open(basedir+'/raw/raw-import-ds%02d.json.gz'%dsidx, 'w') as fd: 21 | fd.write(json.dumps(projectfileimports, sort_keys=True, indent=2).encode('utf-8')) 22 | 23 | 24 | def load_ds(dsidx): 25 | projectfileimports = {} 26 | for p in tqdm(glob.iglob(basedir+'/dataset%02d/**/*.json'%dsidx, recursive=True)): 27 | projectname = p[:-5] 28 | try: 29 | with open(p, 'r') as fd: 30 | projectfileimports[projectname] = json.loads(fd.read()) 31 | except: 32 | print('Problem with', p) 33 | return projectfileimports 34 | 35 | 36 | dsidx = 1 37 | while True: 38 | if not os.path.isdir(basedir+"/dataset%02d/"%dsidx): 39 | #print("Folder dataset%02d does not exist"%dsidx, file=sys.stderr) 40 | sys.exit(0) 41 | 42 | print("Loading ds",dsidx) 43 | projectfileimports = load_ds(dsidx) 44 | print("Dumping ds",dsidx) 45 | dump_ds(projectfileimports, dsidx) 46 | 47 | dsidx += 1 48 | 49 | -------------------------------------------------------------------------------- /plugins/vscode/webpack.config.js: -------------------------------------------------------------------------------- 1 | //@ts-check 2 | 3 | // Copyright (C) 2019, Nokia 4 | // Licensed under the BSD 3-Clause License 5 | 6 | 'use strict'; 7 | 8 | const path = require('path'); 9 | 10 | /**@type {import('webpack').Configuration}*/ 11 | const config = { 12 | target: 'node', // vscode extensions run in a Node.js-context 📖 -> https://webpack.js.org/configuration/node/ 13 | 14 | entry: './src/extension.ts', // the entry point of this extension, 📖 -> https://webpack.js.org/configuration/entry-context/ 15 | output: { 16 | // the bundle is stored in the 'dist' folder (check package.json), 📖 -> https://webpack.js.org/configuration/output/ 17 | path: path.resolve(__dirname, 'dist'), 18 | filename: 'extension.js', 19 | libraryTarget: 'commonjs2', 20 | devtoolModuleFilenameTemplate: '../[resource-path]' 21 | }, 22 | devtool: 'source-map', 23 | externals: { 24 | vscode: 'commonjs vscode' // the vscode-module is created on-the-fly and must be excluded. Add other modules that cannot be webpack'ed, 📖 -> https://webpack.js.org/configuration/externals/ 25 | }, 26 | resolve: { 27 | // support reading TypeScript and JavaScript files, 📖 -> https://github.com/TypeStrong/ts-loader 28 | extensions: ['.ts', '.js'] 29 | }, 30 | module: { 31 | rules: [ 32 | { 33 | test: /\.ts$/, 34 | exclude: /node_modules/, 35 | use: [ 36 | { 37 | loader: 'ts-loader' 38 | } 39 | ] 40 | } 41 | ] 42 | } 43 | }; 44 | module.exports = config; 45 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | This folder contains the necessary scripts to crawl and process the raw import datasets. 4 | 5 | ## Prerequisites 6 | 7 | Before starting, you should create a GitHub API key, and put it into apikey.txt. 8 | 9 | ## Running the scripts 10 | 11 | The scripts can be executed both locally, or in a Docker container. Note that in the latter case, the crawled and processed datasets will be stored locally in this folder. 12 | 13 | To run it in a container, you first need to locally build the container, by running `./docker-build.sh`. Note that 14 | 15 | To run the scripts locally, you need to execute `./run.sh`; to run it as a container, you need to execute `./docker-run.sh`. 16 | 17 | The usage of the run-scripts is as follows. You currently need to provide at least the programming language. Other positional options include the maximum number of projects to crawl (default = all projects), the minimum number of github stars (default = 2), the maximum size of the projects in kb (default=0), and whether to reuse the cached files (default = 0). 18 | 19 | ``` 20 | Usage: ./run.sh [] [] [] [] 21 | 22 | Programming language. Supported languages: 23 | python, java, javascript, csharp, php, ruby 24 | Maximum GitHub projects (default = 0: all) 25 | Min GitHub stars (default = 2) 26 | Max project size (in kb) (default = 0) 27 | Cache intermediate files/scripts (default = 0) 28 | ``` 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Nokia 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /plugins/vscode/LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Nokia 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /scripts/extract_imports.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | basedir=${1:-../datasets/python} 8 | language=${2:-python} 9 | shift 2 10 | 11 | uname=$(uname) 12 | 13 | cwd=$PWD 14 | 15 | cd $basedir 16 | 17 | # Note: you can also run this in parallel per dataset to speed up 18 | 19 | dslist=$* 20 | if [ -z "$dslist" ] ; then 21 | dslist=$(ls -d dataset*/|sed "s|dataset\([0-9]*\)/|\1|") 22 | fi 23 | 24 | case $language in 25 | python) fileextensions="*.py" ;; 26 | java) fileextensions="*.java" ;; 27 | javascript) fileextensions="*.js *.ts" ;; 28 | csharp) fileextensions="*.cs" ;; 29 | php) fileextensions="*.php" ;; 30 | ruby) fileextensions="*.rb" ;; 31 | *) echo "Unsupported language $language" ; exit 1 ;; 32 | esac 33 | 34 | for d in $dslist ; do 35 | ds=$(printf "%02d" $d) 36 | echo "[Dataset $ds]" 37 | 38 | find dataset$ds -name "*.tgz" | while read f ; do 39 | if echo $f|grep -q ".src.tgz" ; then continue ; fi 40 | srctar=${f%.tgz}.src.tgz 41 | jsonfile=${f%.tgz}.json 42 | if [ -f $jsonfile ] ; then echo "Skipping $f" >&2 ; continue ; fi 43 | echo $f 44 | tmpdir=$(mktemp -d /tmp/extractsources_XXXXX) 45 | if [ $uname = "Darwin" ] ; then 46 | tar -C $tmpdir -xzf $f $(eval echo "$fileextensions") 2>/dev/null 47 | else 48 | tar --wildcards --ignore-case -C $tmpdir -xzf $f $(eval echo "$fileextensions") 2>/dev/null 49 | fi 50 | $cwd/extract_imports.py $tmpdir $language >$jsonfile 51 | if [ ! -s $jsonfile ] ; then 52 | rm -f $jsonfile 53 | fi 54 | tar -C $tmpdir -czf $srctar . 55 | rm -rf $tmpdir 56 | done | tqdm >/dev/null 57 | done 58 | -------------------------------------------------------------------------------- /plugins/vscode/errorPage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Code Compass 7 | 23 | 24 | 25 | 26 |

Code Compass -- Oops...

27 |

Failed to communicate with server. Please verify that you can reach $${serverURL}

28 |

You may need to configure a proxy in Code > Preferences > Settings. 29 |

Search for configuration key http.proxy

30 | 31 |

32 | 33 | -------------------------------------------------------------------------------- /plugins/vscode/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "code-compass", 3 | "displayName": "Code-Compass (Nokia Bell-Labs)", 4 | "description": "Code Compass suggests libraries given your current development context", 5 | "version": "1.0.4", 6 | "publisher": "NokiaBellLabs", 7 | "repository": { 8 | "type": "github", 9 | "url": "https://github.com/nokia/code-compass.git" 10 | }, 11 | "engines": { 12 | "vscode": "^1.24.0" 13 | }, 14 | "categories": [ 15 | "Programming Languages" 16 | ], 17 | "keywords": [ 18 | "code", 19 | "libraries", 20 | "compass", 21 | "context", 22 | "search" 23 | ], 24 | "activationEvents": [ 25 | "onCommand:extension.showCodeCompass" 26 | ], 27 | "main": "./dist/extension", 28 | "contributes": { 29 | "commands": [ 30 | { 31 | "command": "extension.showCodeCompass", 32 | "title": "Code Compass" 33 | } 34 | ], 35 | "configuration": { 36 | "type": "object", 37 | "title": "Code-Compass settings", 38 | "properties": { 39 | "code-compass.url": { 40 | "type": "string", 41 | "default": "https://www.code-compass.com", 42 | "description": "URL of the backend server" 43 | } 44 | } 45 | } 46 | }, 47 | "scripts": { 48 | "vscode:prepublish": "webpack --mode production", 49 | "compile": "webpack --mode none", 50 | "watch": "webpack --mode none --watch", 51 | "postinstall": "node ./node_modules/vscode/bin/install", 52 | "test": "npm run compile && node ./node_modules/vscode/bin/test", 53 | "test-compile": "tsc -p ./" 54 | }, 55 | "dependencies": { 56 | "follow-redirects": "^1.5.10", 57 | "https-proxy-agent": "^2.2.1", 58 | "maven": "^4.4.1", 59 | "xml2js": "^0.4.19" 60 | }, 61 | "devDependencies": { 62 | "@types/mocha": "^2.2.42", 63 | "@types/node": "^7.10.2", 64 | "ts-loader": "^5.4.4", 65 | "tslint": "^5.8.0", 66 | "typescript": "^2.6.1", 67 | "vscode": "^1.1.26", 68 | "webpack": "^4.30.0", 69 | "webpack-cli": "^3.3.1" 70 | }, 71 | "icon": "CodeCompass.png" 72 | } 73 | -------------------------------------------------------------------------------- /plugins/vscode/README.md: -------------------------------------------------------------------------------- 1 | # code-compass README 2 | 3 | Code Compass analyzes your workspace in order to provide software library suggestions based on your current context. Just tell Code Compass what you are looking for (intent) and it will suggest libraries that are compatible with your current development context. 4 | 5 | ## Features 6 | 7 | Here is a screenshot of Code Compass in action: 8 | 9 | ![Screenshot](https://www.code-compass.com/images/code-compass-screenshot.png "Code-Compass in action") 10 | 11 | Simply open a source file (.java, .py, .js) or a requirements file (pom.xml, requirements.txt, package.json), then launch Code-Compass by entering 'code-compass' in the command palette. 12 | 13 | Code-Compass will keep track of your active file editor and continuously analyse its current dependencies. You can then search for libraries that are a good fit with your current development context, by entering a keyword (tag, intent) in the searchbox on the left of the screen. 14 | 15 | ## Requirements 16 | 17 | The extension supports Java, JavaScript and Python projects. 18 | 19 | This extension contributes the following settings: 20 | 21 | * `code-compass.url`: URL of the backend server (default: www.code-compass.com). 22 | 23 | ## Known Issues 24 | 25 | When upgrading to a new release, sometimes you can get into a strange behaviour. To resolve this, please uninstall the extension, reload vscode and then remove the directory ~/.vscode/extension/nokia-bell-labs.code-compass-x.x.x . Finally, re-install the extension from VSIX. 26 | 27 | ## Release Notes 28 | 29 | ### 0.0.1 30 | Initial release of code-compass 31 | ### 0.1.2 32 | Internal Trial version 33 | ### 0.1.3 34 | User preference changes are dynamically taken into account. This allows for example to point to a different server. 35 | ### 0.2.0 36 | Support for python and javascript added. 37 | ### 0.2.1 38 | 39 | - Anonymous user identification (to detect return-users only) 40 | - Dynamic intent quick-picks, relevant to your project context 41 | - Automatic loading of nearest libs to your context when no intent specified 42 | - Defense against unknown libs (show them as not recognized) 43 | - Turn off visualization-related messages by default (causing too much unnecessary load) 44 | - Convert package level java libs to module level java 45 | - Disabled experimental test-drive and snippet insertion features 46 | 47 | ### 0.2.2 48 | - Allow default ecosystem exploration when no active editor contains a supported file type. 49 | 50 | ### 0.2.3 51 | - Bug fix - duplicate requests sent to server 52 | - Bug fix - negative and error token detection 53 | 54 | ### 1.0.0 55 | - First public release 56 | 57 | ### 1.0.1 58 | - Fixed rendering issue 59 | 60 | ### 1.0.2 61 | - Added interactive elements to suggested items (feedback, documentation, add/remove to/from context) 62 | 63 | -------------------------------------------------------------------------------- /plugins/vscode/codeCompass.css: -------------------------------------------------------------------------------- 1 | .vscode-dark p { 2 | color: white; 3 | } 4 | 5 | .vscode-light h2 { 6 | color: rgb(25,71,145); 7 | } 8 | 9 | .vscode-light h3 { 10 | color: rgb(25,71,145); 11 | } 12 | 13 | .vscode-dark h2 { 14 | color: rgb(43,200,251); 15 | } 16 | 17 | .vscode-dark h3 { 18 | color: rgb(43,200,251); 19 | } 20 | 21 | .vscode-light .nokia-blue { 22 | background-color: rgb(25,71,145); 23 | color: white; 24 | } 25 | 26 | .vscode-dark .nokia-blue { 27 | background-color: rgb(43,200,251); 28 | color: black; 29 | } 30 | 31 | .vscode-light .hover-nokia-blue:hover { 32 | color:#fff!important; 33 | background-color:rgb(25,71,145)!important; 34 | } 35 | 36 | .vscode-dark .hover-nokia-blue:hover { 37 | color:black !important; 38 | background-color:rgb(43,200,251)!important; 39 | } 40 | 41 | .vscode-dark .quick-pick-button { 42 | border-color: rgb(43,200,251); 43 | } 44 | 45 | .suggest-item p { 46 | color: black; 47 | } 48 | 49 | .scroll { 50 | width: calc(100%-40px); 51 | margin: 40px; 52 | padding: 0px; 53 | padding-top: 0px; 54 | } 55 | 56 | .scroll ul { 57 | /* max-height: 400px; */ 58 | /* overflow:hidden; */ 59 | /* overflow-y:scroll; */ 60 | } 61 | 62 | .scroll-horizontal ul { 63 | width: 100%; 64 | overflow: hidden; 65 | overflow-x: scroll; 66 | } 67 | 68 | .horizontal-list li { 69 | float: left; 70 | display: block; 71 | padding-right: 10px; 72 | padding-top: 5px; 73 | padding-bottom: 5px; 74 | } 75 | 76 | td .button-col { 77 | width: 135px; 78 | } 79 | 80 | .license { 81 | color:steelblue; 82 | font-size:7pt; 83 | border-color:steelblue; 84 | border-style:solid; 85 | border-width:0.5pt; 86 | border-radius:2pt; 87 | margin-left: 5pt; 88 | } 89 | 90 | .description { 91 | text-overflow: ellipsis; 92 | overflow: hidden; 93 | /* white-space: nowrap; */ 94 | } 95 | 96 | .tooltip { 97 | position: relative; 98 | display: inline-block; 99 | } 100 | 101 | .tooltip .tooltiptext { 102 | visibility: hidden; 103 | width: 120px; 104 | background-color: black; 105 | color: #fff; 106 | text-align: center; 107 | border-radius: 6px; 108 | padding: 5px 0; 109 | position: absolute; 110 | z-index: 1; 111 | bottom: 150%; 112 | left: 50%; 113 | margin-left: -60px; 114 | } 115 | 116 | .tooltip .tooltiptext::after { 117 | content: ""; 118 | position: absolute; 119 | top: 100%; 120 | left: 50%; 121 | margin-left: -5px; 122 | border-width: 5px; 123 | border-style: solid; 124 | border-color: black transparent transparent transparent; 125 | } 126 | 127 | .tooltip:hover .tooltiptext { 128 | visibility: visible; 129 | } 130 | 131 | -------------------------------------------------------------------------------- /scripts/filter_import_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | import gzip 8 | import json 9 | import glob 10 | from tqdm import tqdm 11 | import re 12 | import sys 13 | import os 14 | import hashlib 15 | 16 | 17 | # minimum # imports per source file 18 | minsrcfileimports = 1 19 | # minimum # remaining source files per project 20 | minsrcfiles = 1 21 | #filter import-duplicates? 22 | filterduplicates = True 23 | 24 | ########## 25 | 26 | basedir = sys.argv[1] if len(sys.argv) > 1 else '../datasets/python' 27 | 28 | os.makedirs(basedir+'/processed', exist_ok=True) 29 | 30 | 31 | def load_json_gz(filename): 32 | with gzip.open(filename, 'r') as fd: 33 | return json.loads(fd.read()) 34 | 35 | def store_json_gz(filename, data, sort=False): 36 | with gzip.open(filename, 'w') as fd: 37 | fd.write(json.dumps(data, sort_keys=sort, indent=2).encode('utf-8')) 38 | 39 | def create_hash(fileimports): 40 | fileimportstring = "|".join(sorted(["|".join(sorted(imports)) for imports in fileimports.values()])) 41 | m = hashlib.md5() 42 | m.update(fileimportstring.encode()) 43 | return m.hexdigest() 44 | 45 | def dedup_projects(dedupprojectfileimports, projectfileimports): 46 | newprojectfileimports = {} 47 | refhashes = set(dedupprojectfileimports.values()) 48 | 49 | for project, fileimports in projectfileimports.items(): 50 | projecthash = create_hash(fileimports) 51 | if projecthash not in refhashes: 52 | dedupprojectfileimports[project] = projecthash 53 | refhashes.update(projecthash) 54 | newprojectfileimports[project] = fileimports 55 | 56 | return newprojectfileimports 57 | 58 | 59 | 60 | dedupprojectfileimports = {} 61 | nrawprojects = 0 62 | ndedupprojects = 0 63 | 64 | 65 | 66 | for rawfname in tqdm(glob.glob(basedir+'/raw/raw-import-ds*.json.gz')): 67 | projectfileimports = load_json_gz(rawfname) 68 | #print(len(projectfileimports), "projects in raw dataset", rawfname.split(' /')[-1]) 69 | 70 | #filter out srcfiles with too few imports 71 | projectfileimports = {projectname:{filename:imports for filename, imports in fileimports.items() if len(imports) >= minsrcfileimports} for projectname, fileimports in projectfileimports.items()} 72 | 73 | #filter out empty projects 74 | projectfileimports = {projectname:fileimports for projectname, fileimports in projectfileimports.items() if len(fileimports) >= minsrcfiles} 75 | 76 | nrawprojects += len(projectfileimports) 77 | #filter import-duplicate projects 78 | if filterduplicates: 79 | projectfileimports = dedup_projects(dedupprojectfileimports, projectfileimports) 80 | ndedupprojects += len(projectfileimports) 81 | 82 | #print(len(projectfileimports), "projects after filtering") 83 | store_json_gz(basedir+'/processed/projectfileimports.'+rawfname.split('-')[-1][2:], projectfileimports) 84 | 85 | print("Import deduplication:", nrawprojects, "->", ndedupprojects, "projects") 86 | -------------------------------------------------------------------------------- /scripts/create_crawl_scripts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | import json 8 | import os 9 | import sys 10 | 11 | basedir = sys.argv[1] if len(sys.argv) > 1 else '../datasets/python' 12 | 13 | 14 | maxprojects=10000 15 | gitclone = False 16 | 17 | with open('apikey.txt', 'r') as fd: 18 | APIKEY = fd.read().rstrip() 19 | 20 | 21 | if not APIKEY: 22 | print("Please first paste your API key in file apikey.txt!") 23 | sys.exit(1) 24 | 25 | print("APIKEY =", APIKEY) 26 | 27 | 28 | 29 | print('Loading gitGrab.json...') 30 | with open(basedir+'/gitGrab.json', 'rt') as fd: 31 | gitgrabs = sorted(json.loads(fd.read()), key=lambda x:-x['stars']) 32 | ngrabs = len(gitgrabs) 33 | lastds = 0 34 | downfd = None 35 | for i, gitgrab in enumerate(gitgrabs): 36 | ds = 1 + i//maxprojects 37 | if ds != lastds: 38 | print('Creating ds', ds) 39 | if downfd != None: downfd.close() 40 | lastds = ds 41 | fname = basedir+'/crawl_dataset%02d.sh'%ds 42 | downfd = open(fname, 'w') 43 | os.chmod(fname, 0o755) 44 | downfd.write('#!/bin/bash\n\n') 45 | #downfd.write('[ -f apikey.txt ] || { echo >&2 "Please provide your GitHub API key in file apikey.txt. Aborting. " ; exit 1; }\n\n') 46 | downfd.write('APIKEY='+APIKEY+'\n') 47 | downfd.write('echo APIKEY=$APIKEY\n\n') 48 | downfd.write('function download_git_project\n') 49 | downfd.write('{\n') 50 | downfd.write('\tlocal ds=$1\n') 51 | downfd.write('\tlocal projdir=$2\n') 52 | downfd.write('\tlocal projname=$3\n') 53 | downfd.write('\tmkdir -p dataset$ds/$projdir\n') 54 | if gitclone: 55 | downfd.write('\tmkdir -p /tmp/dataset$ds/$projdir\n') 56 | downfd.write('\tlocal currdir=$(pwd)\n') 57 | downfd.write('\tcd /tmp/dataset$ds/$projdir\n') 58 | downfd.write('\tgit clone git://github.com/$projdir/$projname.git &>> $currdir/dataset$ds/git-download.log\n') 59 | downfd.write('\ttar -czf $currdir/dataset$ds/$projdir/$projname.tar.gz $projname/\n') 60 | downfd.write('\tcd $currdir\n') 61 | downfd.write('\trm -rf /tmp/dataset$ds/$projdir\n') 62 | downfd.write('\tsleep 1\n') 63 | else: 64 | downfd.write('\t\tfname=dataset$ds/$projdir/$projname.tgz\n') 65 | downfd.write('\twhile [ ! -f $fname -a ! -f $fname.error ] ; do\n') 66 | downfd.write('\t\tcurl -L "https://api.github.com/repos/$projdir/$projname/tarball?access_token=$APIKEY" 2>/dev/null > $fname\n') 67 | downfd.write('\t\tif ! gzip -t $fname 2>/dev/null ; then\n') 68 | downfd.write('\t\t\tif grep -q "API Rate Limit Exceeded" $fname ; then\n') 69 | downfd.write('\t\t\t\trm $fname\n') 70 | downfd.write('\t\t\t\techo "COOLING OFF"\n') 71 | downfd.write('\t\t\t\tsleep 60 #cool off\n') 72 | downfd.write('\t\t\telse\n') 73 | downfd.write('\t\t\t\tmv $fname $fname.error\n') 74 | downfd.write('\t\t\t\techo "Skipping project $projdir/$projname"\n') 75 | downfd.write('\t\t\t\tbreak\n') 76 | downfd.write('\t\t\tfi\n') 77 | downfd.write('\t\tfi\n') 78 | downfd.write('\t\tsleep 1\n') 79 | downfd.write('\tdone\n') 80 | downfd.write('}\n\n') 81 | fullprojectname = gitgrab['full_name'] 82 | projectdir = fullprojectname.split('/')[0] 83 | projectname = "/".join(fullprojectname.split('/')[1:]) 84 | downfd.write('echo "#%d [%d stars] - %s" ; download_git_project %02d %s %s\n' % (i % maxprojects, gitgrab['stars'], projectname,ds,projectdir,projectname)) 85 | downfd.close() 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Marketplace Version](https://vsmarketplacebadge.apphb.com/version/NokiaBellLabs.code-compass.svg)](https://marketplace.visualstudio.com/items?itemName=NokiaBellLabs.code-compass) 2 | 3 | 4 | [Code Compass](https://www.bell-labs.com/code-compass) is a contextual search engine for software packages developed at [Nokia Bell Labs](https://www.bell-labs.com). It supercharges code reuse by recommending the best possible software libraries for your specific software project. See for yourself: 5 | 6 | ![showcase](assets/showcase.gif) 7 | 8 | Code Compass is available as a [website](https://www.bell-labs.com/code-compass), a [REST API](https://www.code-compass.com/apidoc/) and as an [IDE plug-in](https://marketplace.visualstudio.com/items?itemName=NokiaBellLabs.code-compass) for vscode. 9 | 10 | We index packages hosted on [NPM](https://www.npmjs.com) for JavaScript, [PyPI](https://pypi.python.org) for Python and [Maven Central](https://mvnrepository.com/) for Java. 11 | 12 | If you're looking for the similarly named code comprehension tool from Ericsson to explore large codebases, [look here](https://github.com/Ericsson/CodeCompass). Apart from the name, there is no relationship (formal or informal) between that project and this one. 13 | 14 | # Why? 15 | 16 | Modern software development is founded on code reuse through open source libraries and frameworks. These libraries are published in software package repositories, which [are growing](https://www.modulecounts.com) at an exponential rate. By building better software package search tools we aim to stimulate more code reuse and make software packages in the "long tail" more discoverable. 17 | 18 | A gentle introduction to the why, what and how of Code Compass can be found in this [introductory blog post](https://www.code-compass.com/blog/intro). 19 | 20 | # What? 21 | 22 | Code Compass is a contextual search engine for software packages. 23 | 24 | Code Compass differs from other package search engines in that you can "seed" the search with names of libraries that you already know or use. We call these "context libraries". Code Compass then uses these context libraries to "anchor" the search in those technology stacks that are most relevant to your code. 25 | 26 | When using the Visual Studio Code IDE extension there is no need to manually enter context libraries: Code Compass will automatically extract the import dependencies of the active source file to anchor its search. 27 | 28 | Note that Code Compass will never send your code to the server. Only the names of third-party modules imported in your code are sent. 29 | 30 | # How? 31 | 32 | Code Compass uses unsupervised machine learning to learn how to cluster similar software packages by their context of use, as determined by how libraries get imported alongside other libraries in large open source codebases. 33 | 34 | Software packages are represented as vectors which we call "library vectors" by analogy with [word vectors](https://blog.acolyer.org/2016/04/21/the-amazing-power-of-word-vectors/). Just like [word2vec](https://code.google.com/archive/p/word2vec/) turns words into vectors by analyzing how words co-occur in large text corpora, our "import2vec" turns libraries into vectors by analyzing how import statements co-occur in large codebases. 35 | 36 | You can read the details in [our MSR 2019 paper](https://arxiv.org/abs/1904.03990). Supplementary material including trained library embeddings for Java, JavaScript and Python is available on [Zenodo](https://zenodo.org/record/2546488). 37 | 38 | As an example, for Java we looked at a large number of open source projects on [GitHub](https://github.com) and libraries on [Maven Central](https://mvnrepository.com/) and studied how libraries are imported across these projects. We identified large clusters of projects related to web frameworks, cloud computing, network services and big data analytics. Well-known projects such as [Apache Hadoop](http://hadoop.apache.org/), [Spark](https://spark.apache.org/) and [Kafka](https://kafka.apache.org/) were all clustered into the same region because they are commonly used together to support big data analytics. 39 | 40 | Below is a 3D visualization (a [t-SNE](https://lvdmaaten.github.io/tsne/) plot) of the learned vector space for Java. Each dot represents a Java library and the various colored clusters correspond to different niche areas that were discovered in the data. We highlighted the names of [Apache projects](https://projects-new.apache.org/projects.html). 41 | 42 | ![3dviz](assets/java_ecosystem_3d_ann.png) 43 | 44 | # What's in this repo? 45 | 46 | * `docs/`: REST API docs for the Code Compass search service 47 | * `plugins/vscode/`: Visual Studio Code extension to integrate Code Compass into the IDE 48 | * `scripts/`: data extraction scripts to generate library import co-occurrences from source code 49 | * `nbs/`: Jupyter notebooks with TensorFlow models to train library embeddings from import co-occurrence data 50 | 51 | # Team 52 | 53 | Code Compass is developed by a research team in the [Application Platforms and Software Systems](https://www.bell-labs.com/our-research/areas/applications-and-platforms/) Lab of [Nokia Bell Labs](https://www.bell-labs.com). 54 | 55 | See [CONTRIBUTORS](./CONTRIBUTORS) for an alphabetic list of contributors to Code Compass. 56 | 57 | # Contributing 58 | 59 | If you would like to train embeddings for other languages, have a look at the scripts under `import2vec` to get an idea of what data is required. 60 | 61 | If you have suggestions for improvement, user feedback or want to report a bug, please open an issue in this repository. 62 | 63 | # License 64 | 65 | BSD3 66 | -------------------------------------------------------------------------------- /scripts/gitgrab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | import json 8 | from github import Github 9 | from tqdm import tqdm 10 | from datetime import date 11 | import pandas as pd 12 | import time 13 | #import traceback 14 | import sys 15 | 16 | 17 | basedir = sys.argv[1] if len(sys.argv) > 1 else '.' 18 | language = sys.argv[2] if len(sys.argv) > 2 else 'python' 19 | full = True if len(sys.argv) > 3 and sys.argv[3].lower() == 'full' else False 20 | maxprojects = int(sys.argv[4]) if len(sys.argv) > 4 else 0 21 | minstars = int(sys.argv[5]) if len(sys.argv) > 5 else 2 22 | # maxsize (in kb) 23 | maxsize = int(sys.argv[6]) if len(sys.argv) > 6 else 0 24 | maxstars = 1000000 25 | 26 | 27 | basequery = 'language:'+language 28 | 29 | if language == 'all': 30 | basequery = '' 31 | elif language == 'javascript': 32 | basequery = basequery + ' language:typescript' 33 | 34 | gitgrab = [] 35 | 36 | daterange = pd.date_range(date(2008, 1,1), date.today()) 37 | 38 | outfilename = basedir+('/gitGrabFull.json' if full else '/gitGrab.json') 39 | 40 | ################### 41 | 42 | 43 | 44 | with open('apikey.txt', 'r') as fd: 45 | APIKEY = fd.read().rstrip() 46 | 47 | 48 | if not APIKEY: 49 | print("Please first paste your API key in file apikey.txt!") 50 | sys.exit(1) 51 | 52 | #print("APIKEY =", APIKEY) 53 | # Access by token 54 | g = Github(APIKEY, per_page=100) 55 | 56 | ################### 57 | 58 | 59 | def cooldown(g): 60 | first = True 61 | remainingthreshold = 100 if full else 50 62 | while True: 63 | try: 64 | core = g.get_rate_limit().core 65 | search = g.get_rate_limit().search 66 | if core.remaining >= remainingthreshold and search.remaining >= 5: return 67 | #TODO: check with resettime 68 | if first: 69 | print("Cooling down until:", core.reset.isoformat(), 'UTC', core, search) 70 | first = False 71 | time.sleep(60) 72 | except: 73 | #traceback.print_exc() 74 | time.sleep(5) 75 | 76 | def search_repos(g, query): 77 | repos = None 78 | cooldown(g) 79 | while repos == None: 80 | try: 81 | repos = g.search_repositories(query=query) 82 | except: 83 | print("RETRYING REPOS") 84 | time.sleep(30) 85 | return repos 86 | 87 | def count_repos(g, query): 88 | repos = search_repos(g, query) 89 | count = None 90 | while count == None: 91 | try: 92 | count = repos.totalCount 93 | except: 94 | print("COUNT ISSUE") 95 | time.sleep(30) 96 | return count 97 | 98 | def fetch_repos(g, gitgrab, query): 99 | repos = search_repos(g, query) 100 | 101 | #cut off 102 | maxnewprojects = maxprojects - len(gitgrab) if maxprojects else 0 103 | if maxnewprojects < 0: 104 | return 0 105 | 106 | newgitgrab = None 107 | while newgitgrab == None: 108 | try: 109 | newgitgrab = [] 110 | for idx, repo in tqdm(enumerate(repos)): 111 | cooldown(g) 112 | 113 | if maxsize and repo.size > maxsize: 114 | print("Skipping oversized project", repo.full_name, ":", repo.size, "kb") 115 | continue 116 | 117 | license = "" 118 | try: 119 | if full: license = repo.get_license().license.key 120 | except: 121 | pass 122 | topics = [] 123 | try: 124 | if full: topics = repo.get_topics() 125 | except: 126 | pass 127 | languages = [repo.language] 128 | try: 129 | if full: languages = repo.get_languages() 130 | except: 131 | pass 132 | 133 | newgitgrab.append({ 134 | 'full_name':repo.full_name, 135 | 'description': repo.description, 136 | 'topics': topics, 137 | 'git_url': repo.git_url, 138 | 'stars': repo.stargazers_count, 139 | 'watchers': repo.watchers_count, 140 | 'forks': repo.forks, 141 | 'created': repo.created_at.isoformat()+'Z', 142 | 'size': repo.size, 143 | 'license': license, 144 | 'language': repo.language, 145 | 'languages': languages, 146 | 'last_updated': repo.updated_at.isoformat()+'Z', 147 | }) 148 | if maxnewprojects and len(newgitgrab) >= maxnewprojects: 149 | break; 150 | gitgrab.extend(newgitgrab) 151 | except: 152 | print("Retrying BLOCK") 153 | newgitgrab = None 154 | time.sleep(30) 155 | return len(newgitgrab) 156 | 157 | def print_rate_limit(g, d, count): 158 | try: 159 | print(d, count, "repos", g.get_rate_limit()) 160 | except: 161 | print(d, count, "repos") 162 | 163 | 164 | for idx, d in enumerate(reversed(daterange)): 165 | query = '%s stars:%d..%d created:%s' % (basequery, minstars, maxstars, d.strftime("%Y-%m-%d")) 166 | count = count_repos(g, query) 167 | newcount = 0 168 | if count < 1000: 169 | print_rate_limit(g, d, count) 170 | newcount += fetch_repos(g, gitgrab, query) 171 | else: 172 | curminstars = minstars 173 | curmaxstars = 4 174 | while curminstars < maxstars: 175 | query = '%s stars:%d..%d created:%s' % (basequery, curminstars, curmaxstars, d.strftime("%Y-%m-%d")) 176 | count = count_repos(g, query) 177 | print("Splitting [%d..%d]: %d"%(curminstars, curmaxstars, count)) 178 | if count >= 1000 and curmaxstars > curminstars: 179 | curmaxstars -= max(1, (curmaxstars - curminstars)//2) 180 | continue 181 | print_rate_limit(g, d, count) 182 | newcount += fetch_repos(g, gitgrab, query) 183 | curminstars = curmaxstars + 1 184 | curmaxstars = maxstars 185 | 186 | #cut off 187 | if maxprojects and len(gitgrab) >= maxprojects: 188 | gitgrab = gitgrab[:maxprojects] 189 | break 190 | 191 | # dump every 7 days 192 | if idx % 7 == 6: 193 | print(f"DUMPING {len(gitgrab)}, {newcount} NEW") 194 | with open(outfilename, 'wt') as fd: 195 | fd.write(json.dumps(gitgrab)) 196 | 197 | 198 | # Write final results 199 | with open(outfilename, 'wt') as fd: 200 | fd.write(json.dumps(gitgrab)) 201 | 202 | 203 | -------------------------------------------------------------------------------- /scripts/extract_imports.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (C) 2019, Nokia 4 | # Licensed under the BSD 3-Clause License 5 | 6 | 7 | import dis 8 | import sys 9 | from pprint import pprint 10 | import glob 11 | import json 12 | from tqdm import tqdm 13 | import re 14 | 15 | 16 | folder = sys.argv[1] if len(sys.argv) > 1 else '.' 17 | language = sys.argv[2] if len(sys.argv) > 2 else 'python' 18 | 19 | ################### 20 | 21 | # Note: method to remove comments in lightweight fashion was copied from: 22 | # ChunMinChang/remove_c_style_comments.py See https://gist.github.com/ChunMinChang/88bfa5842396c1fbbc5b 23 | def commentRemover(text): 24 | def replacer(match): 25 | s = match.group(0) 26 | if s.startswith('/'): 27 | return " " # note: a space and not an empty string 28 | else: 29 | return s 30 | pattern = re.compile( 31 | r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', 32 | re.DOTALL | re.MULTILINE 33 | ) 34 | return re.sub(pattern, replacer, text) 35 | 36 | 37 | ############## 38 | 39 | #generator helper function 40 | def read_uncomment_lines(folder, ext, split=True, removecomments = True): 41 | comment_remover = commentRemover if removecomments else (lambda x:x) 42 | for fname in glob.iglob(folder+'/**/'+ext, recursive=True): 43 | try: 44 | with open(fname, 'r', errors='ignore') as fd: 45 | if split: 46 | lines = comment_remover(fd.read()).splitlines() 47 | else: 48 | lines = [comment_remover(fd.read())] 49 | yield (fname, lines) 50 | except: 51 | #print("Skipping problematic file", fname, file=sys.stderr) 52 | continue 53 | 54 | 55 | # TODO: remove imports that are very unlikely actual imports 56 | def filter_pretty_imports(imports): 57 | return imports 58 | 59 | 60 | def get_elems_idx(tuplelist, idx): 61 | return list(map(lambda t: t[idx], tuplelist)) 62 | 63 | 64 | def strip_path_prefix(fname, strip=4): 65 | return "/".join(fname.split("/")[strip:]) 66 | 67 | 68 | ############## 69 | 70 | 71 | def extract_python_imports(folder): 72 | fileimports = {} 73 | 74 | for fname in glob.iglob(folder+'/**/*.py', recursive=True): 75 | #print(fname, file=sys.stderr) 76 | try: 77 | with open(fname, 'r', errors='ignore') as fd: 78 | statements = fd.read() 79 | instructions = dis.get_instructions(statements) 80 | except: 81 | #print("Skipping problematic file", fname, file=sys.stderr) 82 | continue 83 | importinstrs = [__ for __ in instructions if 'IMPORT' in __.opname] 84 | 85 | imports = [] 86 | lastimp = None 87 | popped = False #remove IMPORT_NAME if followed by IMPORT_STAR or IMPORT_FROM 88 | for instr in importinstrs: 89 | if instr.opname == 'IMPORT_NAME': 90 | lastimp = instr.argval 91 | impname = lastimp 92 | popped = False 93 | elif instr.opname == 'IMPORT_STAR': 94 | if not popped: 95 | imports.pop() 96 | popped = True 97 | impname = lastimp + ':*' 98 | else: 99 | if not popped: 100 | imports.pop() 101 | popped = True 102 | impname = lastimp + ':'+instr.argval 103 | 104 | imports.append(impname) 105 | 106 | fileimports[strip_path_prefix(fname)] = imports 107 | return fileimports 108 | 109 | 110 | 111 | ################### 112 | 113 | 114 | regex_js_require = re.compile(r'require\(["\']([^"\']+)["\']\)') 115 | regex_js_import = re.compile(r'import\s+{?((?!\s+from).)+}?\s+from\s+["\']([^"\']+)["\']') 116 | 117 | def extract_javascript_imports(folder): 118 | fileimports = {} 119 | for fname, lines in read_uncomment_lines(folder, '*.[jt]s'): 120 | #print(fname, file=sys.stderr) 121 | 122 | #skip dependent module listings 123 | if '/node_modules/' in fname: 124 | continue 125 | 126 | imports = [] 127 | 128 | for line in lines: 129 | if 'require' in line: 130 | newimports = regex_js_require.findall(line) 131 | #print(fname, newimports, file=sys.stderr) 132 | imports.extend(newimports) 133 | #for the imports, we currently only capture the modules, not the individual from-items 134 | if 'import' in line: 135 | newimports = get_elems_idx(regex_js_import.findall(line), 1) 136 | #print(fname, newimports, file=sys.stderr) 137 | imports.extend(newimports) 138 | 139 | 140 | fileimports[strip_path_prefix(fname)] = filter_pretty_imports(imports) 141 | 142 | return fileimports 143 | 144 | 145 | 146 | 147 | ################### 148 | 149 | regex_java_import = re.compile(r'(?:^|;)\s*import\s+([^;\s]+)\s*(?=;)') 150 | 151 | def extract_java_imports(folder): 152 | fileimports = {} 153 | for fname, lines in read_uncomment_lines(folder, '*.java'): 154 | #print(fname, file=sys.stderr) 155 | 156 | imports = [] 157 | 158 | for line in lines: 159 | if 'import' in line: 160 | newimports = regex_java_import.findall(line) 161 | #print(fname, newimports, file=sys.stderr) 162 | imports.extend(newimports) 163 | 164 | 165 | fileimports[strip_path_prefix(fname)] = filter_pretty_imports(imports) 166 | 167 | return fileimports 168 | 169 | 170 | 171 | ################### 172 | 173 | 174 | regex_csharp_using = re.compile(r'(?:^|;)\s*using\s+(?!static\s+)([^;\s]+)\s*(?=;)') 175 | regex_csharp_using_static = re.compile(r'(?:^|;)\s*using\s+static\s+([^;\s]+)\s*(?=;)') 176 | 177 | def extract_csharp_imports(folder): 178 | fileimports = {} 179 | for fname, lines in read_uncomment_lines(folder, '*.cs'): 180 | #print(fname, file=sys.stderr) 181 | 182 | imports = [] 183 | 184 | for line in lines: 185 | if 'using' in line: 186 | newimports = regex_csharp_using.findall(line) 187 | imports.extend(newimports) 188 | newimports = regex_csharp_using_static.findall(line) 189 | imports.extend(newimports) 190 | 191 | 192 | fileimports[strip_path_prefix(fname)] = filter_pretty_imports(imports) 193 | 194 | return fileimports 195 | 196 | 197 | regex_php_simplelist = re.compile(r'([^;\s,]+)\s*(?:as\s+[^\s,;]+)?') 198 | regex_php_complexlist = re.compile(r'(?:^|,)\s*(const\s+|function\s+|)([^;\s,]+)\s*(?:as\s+[^\s,;]+)?\s*(?=,|$)') 199 | regex_php_use_func = re.compile(r'(?:^|;)\s*use\s+function\s+([^;]+)(?=;)') 200 | regex_php_use_const = re.compile(r'(?:^|;)\s*use\s+const\s+([^;]+)(?=;)') 201 | regex_php_use = re.compile(r'(?:^|;)\s*use\s+(?!const|function)([^;]+)(?=;)') 202 | 203 | regex_php_use_group_split = re.compile(r'^([^{\s]+)\s*{([^}]+)}') 204 | 205 | 206 | 207 | def extract_php_imports_line(regexfunc, line, appendix): 208 | impnamelists = regexfunc.findall(line) 209 | imports = [] 210 | for impnamelist in impnamelists: 211 | if '{' in impnamelist: 212 | for groupsplit in regex_php_use_group_split.findall(impnamelist): 213 | useprefix = groupsplit[0] 214 | impnames2 = regex_php_complexlist.findall(groupsplit[1]) 215 | #print(impnames2) 216 | for imptype, impname in impnames2: 217 | realappendix = ':'+imptype.rstrip().upper() if imptype != '' else appendix 218 | imports.append((useprefix+impname).replace('\\', '/')+realappendix) 219 | else: 220 | impnames = regex_php_simplelist.findall(impnamelist) 221 | for impname in impnames: 222 | imports.append(impname.replace('\\', '/')+appendix) 223 | return imports 224 | 225 | def extract_php_imports(folder): 226 | fileimports = {} 227 | for fname, lines in read_uncomment_lines(folder, '*.php', split=False): 228 | #print(fname, file=sys.stderr) 229 | 230 | imports = [] 231 | 232 | for line in lines: 233 | if 'use' in line: 234 | imports.extend(extract_php_imports_line(regex_php_use_func, line, ':FUNCTION')) 235 | imports.extend(extract_php_imports_line(regex_php_use_const, line, ':CONST')) 236 | imports.extend(extract_php_imports_line(regex_php_use, line, '')) 237 | 238 | 239 | fileimports[strip_path_prefix(fname)] = filter_pretty_imports(imports) 240 | 241 | return fileimports 242 | 243 | 244 | 245 | ################### 246 | 247 | 248 | regex_ruby_require = re.compile(r'require\s+["\']([^"\']+)["\']') 249 | regex_ruby_require_relative = re.compile(r'require_relative\s+["\']([^"\']+)["\']') 250 | 251 | def extract_ruby_imports(folder): 252 | fileimports = {} 253 | for fname, lines in read_uncomment_lines(folder, '*.rb', removecomments=False): 254 | #print(fname, file=sys.stderr) 255 | 256 | imports = [] 257 | 258 | commented = False 259 | for line in lines: 260 | if commented and line == "=end": 261 | commented = False 262 | continue 263 | if line == "=begin": 264 | commented = True 265 | continue 266 | if 'require' in line: 267 | impnames = regex_ruby_require.findall(line) 268 | if len(impnames) > 0: 269 | imports.extend(impnames) 270 | if 'require_relative' in line: 271 | impnames = regex_ruby_require_relative.findall(line) 272 | if len(impnames) > 0: 273 | imports.extend(list(map(lambda x: "./"+x, impnames))) 274 | 275 | 276 | fileimports[strip_path_prefix(fname)] = filter_pretty_imports(imports) 277 | 278 | return fileimports 279 | 280 | 281 | ################### 282 | 283 | extraction_functions = { 284 | 'python': extract_python_imports, 285 | 'javascript': extract_javascript_imports, 286 | 'java': extract_java_imports, 287 | 'csharp': extract_csharp_imports, 288 | 'php': extract_php_imports, 289 | 'ruby': extract_ruby_imports, 290 | } 291 | 292 | 293 | fileimports = extraction_functions.get(language, lambda f:[])(folder) 294 | 295 | 296 | 297 | 298 | if len(fileimports) > 0: 299 | #print(json.dumps(imports, sort_keys=False, indent=2).encode('utf-8'))) 300 | print(json.dumps(fileimports)) 301 | -------------------------------------------------------------------------------- /docs/code-compass-openapi.yaml: -------------------------------------------------------------------------------- 1 | swagger: '2.0' 2 | info: 3 | description: >- 4 | Code-Compass helps you find software libraries for the **Java**, **JavaScript** 5 | and **Python** ecosystems. Tell Code-Compass which libraries 6 | you are already using in your current development project and it will 7 | suggest other libraries that are most compatible with those libraries. You 8 | can also use Code-Compass to find alternative libraries to the ones you are 9 | already using. 10 | version: 0.2.2 11 | title: Code Compass 12 | termsOfService: 'https://www.code-compass.com/terms/' 13 | contact: 14 | email: bart.theeten@nokia-bell-labs.com 15 | license: 16 | name: Nokia Bell-Labs internal use only 17 | # url: 'http://www.apache.org/licenses/LICENSE-2.0.html' 18 | host: www.code-compass.com 19 | basePath: / 20 | tags: 21 | - name: Mapping 22 | description: Mapping between packages, libraries and categories 23 | # externalDocs: 24 | # description: Find out more 25 | # url: 'http://swagger.io' 26 | - name: Querying 27 | description: Find most similar libraries or query by intent 28 | - name: Data Access 29 | description: Get access to data used by Code-Compass (supported libraries, supported intents) 30 | - name: Feedback 31 | description: Send feedback about individual suggestions 32 | - name: (Private) Dashboard 33 | description: Private Extension for Code-Compass dashboard 34 | - name: (Private) Visualization 35 | description: Private Extension for the Code-Compass visualization demo 36 | - name: (Private) Visualization Control 37 | description: Private Extension for remote controlling the visualization demo from a touch screen 38 | 39 | schemes: 40 | - https 41 | paths: 42 | /mapping/{language}/{specification}: 43 | post: 44 | tags: 45 | - Mapping 46 | summary: Map between packages, libaries and categories 47 | description: 'Map between packages, libraries and categories' 48 | operationId: mapping 49 | consumes: 50 | - application/json 51 | produces: 52 | - application/json 53 | parameters: 54 | - name: language 55 | in: path 56 | description: The programming language for which a mapping is requested 57 | required: true 58 | type: string 59 | enum: 60 | - java 61 | - js 62 | - python 63 | - name: specification 64 | in: path 65 | description: The type of mapping requested 66 | required: true 67 | type: string 68 | enum: 69 | - moduleForPackage 70 | - modulesForPackages 71 | - categoriesForModule 72 | - categoriesForPackage 73 | - categoriesForPackages 74 | - packagesForModule 75 | - name: body 76 | in: body 77 | description: module, package or array of packages 78 | schema: 79 | $ref: '#/definitions/MappingIn' 80 | required: true 81 | - name: API_KEY 82 | in: query 83 | type: string 84 | required: true 85 | responses: 86 | '200': 87 | description: Successful execution 88 | '204': 89 | description: No mapping found 90 | security: 91 | - api_key: [] 92 | 93 | /nearestCategories/{language}/{num}: 94 | post: 95 | tags: 96 | - Querying 97 | summary: Get relevant categories/tags for the given context 98 | description: >- 99 | Returns a list of categories/tags that are relevant for the given context, by searching a specified 100 | number of nearest neighbours to the given context. 101 | Also returns a filtered and annotated set of nearest libraries. 102 | operationId: nearestCategories 103 | consumes: 104 | - application/json 105 | produces: 106 | - application/json 107 | parameters: 108 | - name: language 109 | in: path 110 | description: The programming language for which a mapping is requested 111 | required: true 112 | type: string 113 | enum: 114 | - java 115 | - js 116 | - python 117 | - name: num 118 | in: path 119 | description: The number of nearest neighbours to consider to calculate the relavant tags (default=1000) 120 | required: true 121 | type: integer 122 | - name: context 123 | in: body 124 | description: List of libraries that define the context around which to anchor the search 125 | required: true 126 | schema: 127 | $ref: '#/definitions/ContextIn' 128 | - name: API_KEY 129 | in: query 130 | type: string 131 | required: true 132 | responses: 133 | '200': 134 | description: List of categories and List of nearest libraries with their annotations. 135 | schema: 136 | $ref: '#/definitions/CategoriesOut' 137 | '500': 138 | description: Internal error 139 | security: 140 | - api_key: [] 141 | 142 | /searchByIntent/{language}: 143 | post: 144 | tags: 145 | - Querying 146 | summary: Get library suggestions by intent 147 | description: >- 148 | Returns the most compatible libraries for a given context, filtered by a given intent. 149 | Returns 2 sets of libraries: 150 | - `raw` - The raw set of nearest libraries (strings) 151 | - `filtered` - A filtered and annotated set of libraries for display purposes. In this set, multiple sub-libraries of the same top-level library are collapsed onto that top-level library. The annotations contain metadata such as a library description, usage information, licensing information, URLs about the library and the categories/tags associated with it. 152 | operationId: searchByIntent 153 | consumes: 154 | - application/json 155 | produces: 156 | - application/json 157 | parameters: 158 | - name: language 159 | in: path 160 | description: The programming language for which a mapping is requested 161 | required: true 162 | type: string 163 | enum: 164 | - java 165 | - js 166 | - python 167 | - name: body 168 | in: body 169 | description: >- 170 | The context around which to search and the intent to filter libraries against. 171 | 172 | Please note that for the JAVA ecosystem (only), import statements must first be mapped to 173 | modules by calling the `/mapping/java/modulesForPackages` method on the fully qualified 174 | import name, less the last (class) segment. For example, the import statement 175 | `import org.apache.spark.Rdd;` must first be mapped to `org.apache.spark` and inserted into the body of the `/mapping` call. The mapping response corresponds to the module names for the given packages. The `context` parameter of the `/searchByIntent` method, expects a list of such module names. 176 | required: true 177 | schema: 178 | $ref: '#/definitions/IntentAndContextIn' 179 | - name: API_KEY 180 | in: query 181 | type: string 182 | required: true 183 | responses: 184 | '200': 185 | description: >- 186 | List of raw and filtered suggestions matching the given filter (intent) and sorted 187 | by distance to the given context (closest first). 188 | schema: 189 | type: object 190 | $ref: '#/definitions/SuggestionsOut' 191 | '500': 192 | description: Internal error 193 | security: 194 | - api_key: [] 195 | 196 | /intents/{language}: 197 | get: 198 | tags: 199 | - Data Access 200 | summary: Gets the list of supported intents (tags) for the given language 201 | description: >- 202 | Mainly to support auto-completion of intents, a list of possible values per language can 203 | be retrieved by this method. The returned list is the list of supported values for the `intent` 204 | parameter of the `/searchByIntent` method. 205 | operationId: getIntents 206 | produces: 207 | - application/json 208 | parameters: 209 | - name: language 210 | in: path 211 | description: The programming language for which a mapping is requested 212 | required: true 213 | type: string 214 | enum: 215 | - java 216 | - js 217 | - python 218 | - name: API_KEY 219 | in: query 220 | type: string 221 | required: true 222 | responses: 223 | '200': 224 | description: The list of supported intents (tags) to filter against using `/searchByIntent` 225 | 226 | /libs/{language}: 227 | get: 228 | tags: 229 | - Data Access 230 | summary: Gets the list of supported libraries for the given language 231 | description: >- 232 | Mainly to support auto-completion of context libraries, a list of possible values per language can 233 | be retrieved by this method. The returned list is the list of supported context libraries. 234 | parameter of the `/searchByIntent` method. 235 | operationId: getLibraries 236 | produces: 237 | - application/json 238 | parameters: 239 | - name: language 240 | in: path 241 | description: The programming language for which a mapping is requested 242 | required: true 243 | type: string 244 | enum: 245 | - java 246 | - js 247 | - python 248 | - name: API_KEY 249 | in: query 250 | type: string 251 | required: true 252 | 253 | responses: 254 | '200': 255 | description: The list of supported context libraries for the given language 256 | schema: 257 | $ref: '#/definitions/LibsOut' 258 | 259 | /feedback: 260 | post: 261 | tags: 262 | - Feedback 263 | summary: Pass an up- or down-vote for a specific suggestion for a given context 264 | description: Call this to give a thumbs up or down for a specific suggestion 265 | operationId: feedback 266 | consumes: 267 | - application/json 268 | parameters: 269 | - name: body 270 | in: body 271 | required: true 272 | schema: 273 | $ref: '#/definitions/FeedbackIn' 274 | - name: API_KEY 275 | in: query 276 | type: string 277 | required: true 278 | 279 | responses: 280 | '200': 281 | description: operation successful 282 | 283 | # /dashboard: 284 | # get: 285 | # tags: 286 | # - (Private) Dashboard 287 | 288 | # /dashboard/health: 289 | # get: 290 | # tags: 291 | # - (Private) Dashboard 292 | 293 | # /dashboard/accounts: 294 | # get: 295 | # tags: 296 | # - (Private) Dashboard 297 | 298 | # /dashboard/feedback: 299 | # get: 300 | # tags: 301 | # - (Private) Dashboard 302 | 303 | # /dashboard/usage: 304 | # get: 305 | # tags: 306 | # - (Private) Dashboard 307 | 308 | # # /getSnippets/{language}: 309 | # # get: 310 | # # tags: 311 | # # - Private Extensions 312 | 313 | # /control: 314 | # get: 315 | # tags: 316 | # - (Private) Visualization Control 317 | 318 | # /setControlCode: 319 | # get: 320 | # tags: 321 | # - (Private) Visualization Control 322 | 323 | # /getContextVector/{language}/{dimensions}: 324 | # post: 325 | # tags: 326 | # - (Private) Visualization 327 | 328 | # /logoForPackage/{language}/{module}: 329 | # get: 330 | # tags: 331 | # - (Private) Visualization 332 | 333 | # /save/{language}/{filename}: 334 | # post: 335 | # tags: 336 | # - (Private) Visualization 337 | 338 | # /getClusters/{language}/{filename}: 339 | # get: 340 | # tags: 341 | # - (Private) Visualization 342 | 343 | # /getDependencies/{language}: 344 | # post: 345 | # tags: 346 | # - (Private) Visualization 347 | 348 | 349 | definitions: 350 | MappingIn: 351 | type: string 352 | # - type: array 353 | # items: 354 | # type: string 355 | example: "org.apache.spark" 356 | IntentAndContextIn: 357 | type: object 358 | properties: 359 | context: 360 | type: array 361 | items: 362 | type: string 363 | intent: 364 | type: string 365 | example: 366 | context: 367 | - org.apache.spark 368 | - org.apache.zookeeper 369 | intent: database 370 | SuggestionsOut: 371 | type: object 372 | properties: 373 | raw: 374 | type: array 375 | items: 376 | type: string 377 | example: 378 | - mongoose 379 | - nedb 380 | - mongodb 381 | - mssql 382 | - mysql 383 | - pg 384 | - cassandra-driver 385 | filtered: 386 | type: array 387 | items: 388 | $ref: '#/definitions/AnnotationOut' 389 | ContextIn: 390 | type: array 391 | items: 392 | type: string 393 | example: 394 | - "org.apache.spark" 395 | - "org.apache.zookeeper" 396 | CategoriesOut: 397 | type: object 398 | properties: 399 | cats: 400 | type: array 401 | items: 402 | type: array 403 | items: 404 | type: string 405 | example: 406 | - 407 | - distributed applications 408 | - 4016 409 | - 410 | - machine learning 411 | - 2600 412 | - 413 | - database 414 | - 2511 415 | nearestLibs: 416 | type: array 417 | items: 418 | $ref: '#/definitions/AnnotationOut' 419 | AnnotationOut: 420 | type: object 421 | properties: 422 | module: 423 | type: string 424 | stars: 425 | type: integer 426 | usages: 427 | type: integer 428 | license: 429 | type: array 430 | items: 431 | type: string 432 | info: 433 | type: object 434 | properties: 435 | homepage: 436 | type: string 437 | tutorials: 438 | type: string 439 | github: 440 | type: string 441 | stackoverflow: 442 | type: string 443 | description: 444 | type: string 445 | categories: 446 | type: array 447 | items: 448 | type: string 449 | example: 450 | module: mongoose 451 | stars: 16.9K 452 | usages: 3.7K 453 | license: 454 | - MIT 455 | info: 456 | homepage: http://mongoosejs.com 457 | tutorials: https://www.google.com/search?q=tutorial+mongoose 458 | github: git://github.com/Automattic/mongoose.git 459 | stackoverflow: https://www.stackoverflow.com/search?q=mongoose 460 | description: Mongoose MongoDB ODM 461 | categories: 462 | - database 463 | - nosql 464 | - query 465 | - orm 466 | - data 467 | - mongodb 468 | - document 469 | - db 470 | - datastore 471 | FeedbackIn: 472 | type: object 473 | properties: 474 | positive: 475 | type: boolean 476 | module: 477 | type: string 478 | intent: 479 | type: string 480 | context: 481 | type: array 482 | items: 483 | type: string 484 | language: 485 | type: string 486 | LibsOut: 487 | type: array 488 | items: 489 | type: string 490 | example: 491 | - rt-8-javax:javax-management 492 | - backport-util-concurrent:backport-util-concurrent 493 | - rt-8-java:java-util 494 | - rt-8-javax-xml:javax-xml-transform 495 | - rt-8-java:java-io 496 | - rt-8-javax:javax-transaction 497 | - rt-8-javax:javax-naming" 498 | -------------------------------------------------------------------------------- /docs/code-compass.apib: -------------------------------------------------------------------------------- 1 | FORMAT: 1A 2 | 3 | # Code Compass 4 | 5 | Code-Compass helps you find software libraries for the **Java**, **JavaScript** and **Python** ecosystems. Tell Code-Compass which libraries you are already using in your current development project and it will suggest other libraries that are most compatible with those libraries. You can also use Code-Compass to find alternative libraries to the ones you are already using. 6 | 7 | ### How it works 8 | 9 | Code-Compass has indexed the public package repositories for the supported programming languages: Maven (Java), NPM (JavaScript) and PyPi (Python). Our AI model has clustered all libraries on similarity (defined as "having a similar context of use") in a high-dimensional vector space, for each programming language separately. 10 | 11 | As a result, when you query the model by providing a set of libraries as your context, Code-Compass will first calculate a vector position of that context in the model, and then return the set of libraries corresponding to the closest neighbouring vectors in this vector space. 12 | 13 | The returned libraries can either be alternatives to the libraries you already use or libraries that are frequently imported in projects with a similar context. 14 | 15 | ::: note 16 | #### Note 17 | Before you can use this API, you will need to request an API_KEY. 18 | ::: 19 | 20 | ## Group Main 21 | 22 | To access the main Code-Compass website on behalf of an anonymous user through an `API_KEY`. 23 | 24 | #### Main Page Access [GET /api?API_KEY={API_KEY}&language={language}&context={context}&intent={intent}] 25 | 26 | + Parameters 27 | 28 | + API_KEY: `33v7g35761fexample-key21b757d9ae51538123912812` (string, required) - your API key 29 | 30 | + language: `java` (string, required) - the programming language 31 | + Members 32 | + `java` 33 | + `js` 34 | + `python` 35 | 36 | + context: `numpy,matplotlib` (string, required) - a comma-separated list of `context` libraries 37 | 38 | + intent: `database` (string, optional) - an optional `intent` for filtering the results against 39 | 40 | + Response 200 (text/html) 41 | 42 | Successful Response 43 | 44 | + Body 45 | 46 | 47 | 48 | ... 49 | 50 | 51 | + Response 400 (text/html) 52 | 53 | If the request misses required query string parameters. 54 | 55 | + Body 56 | 57 | 58 | + Response 401 (text/html) 59 | 60 | In case of missing or invalid API Key. 61 | 62 | + Body 63 | 64 | 65 | ## Group Querying 66 | 67 | #### Search Libraries Similar To Context [POST /mostSimilar/{language}] 68 | 69 | This method allows you to explore the vector space around a given context, specified as a set of libraries, packages or classes. 70 | 71 | It will return 2 things: 72 | - `categories` - **a set of tags/categories that best reflect your context.** This set is calculated by looking at the tags associated with a large set of close neighbours. They are sorted by their frequency of occurrence and their distance to your context. 73 | - `modules` - **a set of most similar libraries to your given context.** The returned libraries are annotated with metadata such as a library description, usage information, licensing information, URLs about the library and the categories/tags associated with each library. 74 | 75 | ::: warning 76 | #### Important 77 | Be sure to pass your API_KEY in the Authorization header! 78 | ::: 79 | 80 | + Parameters 81 | 82 | + language: `java` (string, required) - the programming language 83 | + Members 84 | + `java` 85 | + `js` 86 | + `python` 87 | 88 | + Request (application/json) 89 | 90 | + Headers 91 | 92 | Accept: application/json 93 | Authorization: API_KEY 94 | 95 | + Body 96 | 97 | [ 98 | "org.apache.spark", 99 | "org.apache.zookeeper" 100 | ] 101 | 102 | 103 | + Response 200 (application/json) 104 | 105 | A list of categories deemed relevant for the context and a list of nearest libraries with their annotations. 106 | 107 | + Body 108 | 109 | { 110 | "categories": [ 111 | [ 112 | "database", 113 | 5285 114 | ], 115 | [ 116 | "distributed applications", 117 | 3118 118 | ], 119 | [ 120 | "machine learning", 121 | 1898 122 | ] 123 | ], 124 | "modules": [ 125 | { 126 | "module": "org.apache.curator", 127 | "stars": 876, 128 | "usages": "5.9K", 129 | "license": [ 130 | "APACHE V2" 131 | ], 132 | "info": { 133 | "homepage": "http://curator.apache.org", 134 | "github": "http://github.com/apache/curator/blob/master/README.md", 135 | "tutorials": "https://www.google.com/search?q=tutorial+org.apache.curator", 136 | "stackoverflow": "https://www.stackoverflow.com/search?q=org.apache.curator+curator-client" 137 | }, 138 | "description": "Low-level API" 139 | }, 140 | { 141 | "module": "org.apache.hadoop", 142 | "stars": "5.8K", 143 | "usages": "27.0K", 144 | "license": [], 145 | "info": { 146 | "homepage": "http://hadoop.apache.org", 147 | "tutorials": "https://www.google.com/search?q=tutorial+org.apache.hadoop", 148 | "stackoverflow": "https://www.stackoverflow.com/search?q=org.apache.hadoop+hadoop-auth" 149 | }, 150 | "description": "The Apache™ Hadoop® project develops open-source software for reliable, \nscalable, distributed computing. The Apache Hadoop software library is a ...", 151 | "categories": [ 152 | "Machine Learning" 153 | ] 154 | } 155 | ] 156 | } 157 | 158 | + Response 500 (application/json) 159 | 160 | Internal error 161 | 162 | + Body 163 | 164 | #### Search Libraries by intent [POST /searchByIntent/{language}] 165 | 166 | Returns the most compatible libraries for a given context, filtered by a given intent. Returns 2 sets of libraries: 167 | - `filtered` - a filtered and annotated set of libraries for display purposes. In this set, multiple sub-libraries belonging to the same top-level library are collapsed onto that top-level library. The annotations contain metadata such as a library description, usage information, licensing information, URLs about the library and the categories/tags associated with it. 168 | - `raw` - the unfiltered set of nearest libraries (strings) 169 | 170 | ::: warning 171 | #### Important 172 | Be sure to pass your API_KEY in the Authorization header! 173 | ::: 174 | 175 | ::: note 176 | #### Note 177 | This method is similar to `/mostSimilar`, but will first filter the vector space by intent before applying nearest neighbour search. The method does NOT calculate relevant categories. It is therefore much less computationally intensive. 178 | ::: 179 | 180 | + Parameters 181 | 182 | + language: `python` (string, required) - the programming language 183 | + members 184 | - `java` 185 | - `js` 186 | - `python` 187 | 188 | + Request (application/json) 189 | 190 | + Headers 191 | 192 | Accept: application/json 193 | Authorization: API_KEY 194 | 195 | + Body 196 | 197 | { 198 | "context": [ 199 | "numpy", 200 | "keras" 201 | ], 202 | "intent": "data visualization" 203 | } 204 | 205 | 206 | + Response 200 (application/json) 207 | 208 | List of raw and filtered suggestions matching the given filter (intent) and sorted by distance to the given context (closest first). 209 | 210 | + Body 211 | 212 | { 213 | "raw": [ 214 | "matplotlib", 215 | "seaborn", 216 | "bokeh", 217 | "pyqtgraph" 218 | ], 219 | "filtered": [ 220 | { 221 | "module": "matplotlib", 222 | "stars": "?", 223 | "usages": "2.8K", 224 | "license": [ 225 | "BSD" 226 | ], 227 | "info": { 228 | "homepage": "http://matplotlib.org", 229 | "tutorials": "https://www.google.com/search?q=tutorial+matplotlib", 230 | "github": null, 231 | "stackoverflow": "https://www.stackoverflow.com/search?q=matplotlib" 232 | }, 233 | "summary": "Python plotting package", 234 | "description": "

matplotlib strives to produce publication quality 2D graphics\n for interactive graphing, scientific publishing, user interface\n development and web application servers targeting multiple user\n interfaces and hardcopy output formats. There is a 'pylab' mode\n which emulates matlab graphics.

" 235 | }, 236 | { 237 | "module": "seaborn", 238 | "stars": "5.4K", 239 | "usages": 369, 240 | "license": [ 241 | "BSD (3-clause)" 242 | ], 243 | "info": { 244 | "homepage": "https://seaborn.pydata.org", 245 | "tutorials": "https://www.google.com/search?q=tutorial+seaborn", 246 | "github": null, 247 | "stackoverflow": "https://www.stackoverflow.com/search?q=seaborn" 248 | }, 249 | "summary": "seaborn: statistical data visualization", 250 | "description": "Statistical data visualization using matplotlib" 251 | }, 252 | { 253 | "module": "bokeh", 254 | "stars": "8.4K", 255 | "usages": 115, 256 | "license": [ 257 | "New BSD" 258 | ], 259 | "info": { 260 | "homepage": "http://github.com/bokeh/bokeh", 261 | "tutorials": "https://www.google.com/search?q=tutorial+bokeh", 262 | "github": null, 263 | "stackoverflow": "https://www.stackoverflow.com/search?q=bokeh" 264 | }, 265 | "summary": "Interactive plots and applications in the browser from Python", 266 | "description": "Interactive Web Plotting for Python" 267 | }, 268 | { 269 | "module": "pyqtgraph", 270 | "stars": "?", 271 | "usages": 83, 272 | "license": [ 273 | "MIT" 274 | ], 275 | "info": { 276 | "homepage": "http://www.pyqtgraph.org", 277 | "tutorials": "https://www.google.com/search?q=tutorial+pyqtgraph", 278 | "github": null, 279 | "stackoverflow": "https://www.stackoverflow.com/search?q=pyqtgraph" 280 | }, 281 | "summary": "Scientific Graphics and GUI Library for Python", 282 | "description": "

PyQtGraph is a pure-python graphics and GUI library built on PyQt4/PySide and\nnumpy.

\n

It is intended for use in mathematics / scientific / engineering applications.\nDespite being written entirely in python, the library is very fast due to its\nheavy leverage of numpy for number crunching, Qt's GraphicsView framework for\n2D display, and OpenGL for 3D display.

" 283 | } 284 | ] 285 | } 286 | 287 | + Response 500 (application/json) 288 | 289 | Internal error 290 | 291 | + Body 292 | 293 | ## Group Mapping 294 | 295 | #### Mapping [POST /mapping/{language}/{specification}] 296 | 297 | Low-level API to map between packages, libraries and categories. 298 | 299 | + Parameters 300 | 301 | + language: `java` (string, required) - the programming language 302 | + members 303 | - `java` 304 | - `js` 305 | - `python` 306 | 307 | + specification: `moduleForPackage` (string, required) - the mapping method to use 308 | + members 309 | - `moduleForPackage` 310 | - `modulesForPackages` 311 | - `categoriesForModule` 312 | - `categoriesForPackage` 313 | - `categoriesForPackages` 314 | - `packagesForModule` 315 | 316 | 317 | + Request (application/json) 318 | 319 | + Headers 320 | 321 | Accept: application/json 322 | Authorization: API_KEY 323 | 324 | + Body 325 | 326 | ["org.apache.spark"] 327 | 328 | 329 | + Response 200 (application/json) 330 | 331 | + Body 332 | 333 | "org.apache.spark:spark-core_2.11" 334 | 335 | + Response 204 (application/json) 336 | 337 | No mapping found 338 | 339 | + Body 340 | 341 | 342 | ## Group Data Access 343 | 344 | Get access to data used by Code-Compass (supported libraries, supported intents) 345 | 346 | #### Intents for Language [GET /intents/{language}] 347 | 348 | Returns a list of supported intents. These are the intents supported by `/searchByIntent` 349 | 350 | + Parameters 351 | 352 | + language: `js` (string, required) - the programming language 353 | + members 354 | - `java` 355 | - `js` 356 | - `python` 357 | 358 | + Request 359 | 360 | + Headers 361 | 362 | Accept: application/json 363 | Authorization: API_KEY 364 | 365 | + Response 200 (application/json) 366 | 367 | + Body 368 | 369 | [ 370 | "nlp", 371 | "streaming", 372 | "templating", 373 | "testing", 374 | "image", 375 | "logging", 376 | "benchmarking" 377 | ] 378 | 379 | #### Libraries for Language [GET /libs/{language}] 380 | 381 | Returns the list of libraries indexed by Code-Compass for the given ecosystem. These are the libraries supported in the `context` parameter of the `/mostSimilar` and `/searchByIntent` request bodies. 382 | 383 | + Parameters 384 | 385 | + language: `java` (string, required) - the programming language 386 | + members 387 | - `java` 388 | - `js` 389 | - `python` 390 | 391 | + Request 392 | 393 | + Headers 394 | 395 | Accept: application/json 396 | Authorization: API_KEY 397 | 398 | + Response 200 (application/json) 399 | 400 | 401 | + Body 402 | 403 | [ 404 | "rt-8-javax:javax-management", 405 | "backport-util-concurrent:backport-util-concurrent", 406 | "rt-8-java:java-util", 407 | "rt-8-javax-xml:javax-xml-transform", 408 | "rt-8-java:java-io", 409 | "rt-8-javax:javax-transaction", 410 | "rt-8-javax:javax-naming" 411 | ] 412 | 413 | 414 | ## Group Feedback 415 | 416 | Send feedback about individual suggestions 417 | 418 | #### Give Feedback [POST /feedback] 419 | 420 | Call this to give a thumbs-up or -down for a specific suggestion. 421 | 422 | + Request (application/json) 423 | 424 | + Body 425 | 426 | { 427 | "positive": true, 428 | "module" : "matplotlib", 429 | "intent" : "data visualization", 430 | "context": [ 431 | "numpy", 432 | "math" 433 | ], 434 | "language": "python" 435 | } 436 | 437 | + Response 200 438 | 439 | operation successful 440 | 441 | + Body 442 | -------------------------------------------------------------------------------- /nbs/Evaluation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# import2vec - Playbook\n", 8 | "\n", 9 | "Use this notebook to query the model for nearest neighboring libraries around some libraries you know. \n", 10 | "\n", 11 | "⚠️ The evaluation will use the vectors you have trained using [Training.ipynb](Training.ipynb), so please make sure the model is sufficiently large to actually have the libraries you are looking for and to be able to learn meaningful relationships. Alternatively, you can also download our pre-trained models from https://zenodo.org/record/2546488 and update the `vectors` variable to point to those vector models." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 11, 17 | "metadata": { 18 | "ExecuteTime": { 19 | "end_time": "2019-01-22T09:05:21.350751Z", 20 | "start_time": "2019-01-22T09:05:21.348825Z" 21 | } 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "language = 'javascript'\n", 26 | "dim = 100\n", 27 | "vectors = '../datasets/{}/models/w2v_dim{}.txt.gz'.format(language, dim)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 12, 33 | "metadata": { 34 | "ExecuteTime": { 35 | "end_time": "2019-01-22T09:05:26.345708Z", 36 | "start_time": "2019-01-22T09:05:21.529231Z" 37 | } 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "9303 vectors loaded\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "from gensim.models.keyedvectors import KeyedVectors\n", 50 | "\n", 51 | "w2v = KeyedVectors.load_word2vec_format(vectors, binary=False)\n", 52 | "vocab = w2v.vocab.keys()\n", 53 | "print(len(vocab), \"vectors loaded\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Utility to find libraries in your dataset" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 13, 66 | "metadata": { 67 | "ExecuteTime": { 68 | "end_time": "2019-01-22T09:05:26.349303Z", 69 | "start_time": "2019-01-22T09:05:26.347167Z" 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# prints libraries in your model having 'name' as a substring\n", 75 | "def find_library(name):\n", 76 | " result = []\n", 77 | " for l in vocab:\n", 78 | " if name in l:\n", 79 | " result.append(l)\n", 80 | " return result" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 19, 86 | "metadata": { 87 | "ExecuteTime": { 88 | "end_time": "2019-01-22T09:05:26.381594Z", 89 | "start_time": "2019-01-22T09:05:26.350753Z" 90 | } 91 | }, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "['react-native-sqlite-storage',\n", 97 | " 'mysql',\n", 98 | " 'sqlite3',\n", 99 | " 'sql-template-strings',\n", 100 | " 'mysql2/promise',\n", 101 | " 'mysql2',\n", 102 | " 'sqlite',\n", 103 | " 'promise-mysql',\n", 104 | " 'alasql',\n", 105 | " 'better-sqlite3',\n", 106 | " 'think-model-mysql',\n", 107 | " 'node-sqlparser',\n", 108 | " 'sqlstring',\n", 109 | " 'mssql',\n", 110 | " 'sqlops']" 111 | ] 112 | }, 113 | "execution_count": 19, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "find_library('sql')" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "### Nearest Neighbor Search" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 15, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "\u001b[0;31mSignature:\u001b[0m\n", 138 | " \u001b[0mw2v\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmost_similar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n", 139 | "\u001b[0;34m\u001b[0m \u001b[0mpositive\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", 140 | "\u001b[0;34m\u001b[0m \u001b[0mnegative\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", 141 | "\u001b[0;34m\u001b[0m \u001b[0mtopn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", 142 | "\u001b[0;34m\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", 143 | "\u001b[0;34m\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n", 144 | "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 145 | "\u001b[0;31mSource:\u001b[0m \n", 146 | " \u001b[0;32mdef\u001b[0m \u001b[0mmost_similar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpositive\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnegative\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 147 | "\u001b[0;34m\u001b[0m \u001b[0;34m\"\"\"Find the top-N most similar words.\u001b[0m\n", 148 | "\u001b[0;34m Positive words contribute positively towards the similarity, negative words negatively.\u001b[0m\n", 149 | "\u001b[0;34m\u001b[0m\n", 150 | "\u001b[0;34m This method computes cosine similarity between a simple mean of the projection\u001b[0m\n", 151 | "\u001b[0;34m weight vectors of the given words and the vectors for each word in the model.\u001b[0m\n", 152 | "\u001b[0;34m The method corresponds to the `word-analogy` and `distance` scripts in the original\u001b[0m\n", 153 | "\u001b[0;34m word2vec implementation.\u001b[0m\n", 154 | "\u001b[0;34m\u001b[0m\n", 155 | "\u001b[0;34m Parameters\u001b[0m\n", 156 | "\u001b[0;34m ----------\u001b[0m\n", 157 | "\u001b[0;34m positive : list of str, optional\u001b[0m\n", 158 | "\u001b[0;34m List of words that contribute positively.\u001b[0m\n", 159 | "\u001b[0;34m negative : list of str, optional\u001b[0m\n", 160 | "\u001b[0;34m List of words that contribute negatively.\u001b[0m\n", 161 | "\u001b[0;34m topn : int or None, optional\u001b[0m\n", 162 | "\u001b[0;34m Number of top-N similar words to return, when `topn` is int. When `topn` is None,\u001b[0m\n", 163 | "\u001b[0;34m then similarities for all words are returned.\u001b[0m\n", 164 | "\u001b[0;34m restrict_vocab : int, optional\u001b[0m\n", 165 | "\u001b[0;34m Optional integer which limits the range of vectors which\u001b[0m\n", 166 | "\u001b[0;34m are searched for most-similar values. For example, restrict_vocab=10000 would\u001b[0m\n", 167 | "\u001b[0;34m only check the first 10000 word vectors in the vocabulary order. (This may be\u001b[0m\n", 168 | "\u001b[0;34m meaningful if you've sorted the vocabulary by descending frequency.)\u001b[0m\n", 169 | "\u001b[0;34m\u001b[0m\n", 170 | "\u001b[0;34m Returns\u001b[0m\n", 171 | "\u001b[0;34m -------\u001b[0m\n", 172 | "\u001b[0;34m list of (str, float) or numpy.array\u001b[0m\n", 173 | "\u001b[0;34m When `topn` is int, a sequence of (word, similarity) is returned.\u001b[0m\n", 174 | "\u001b[0;34m When `topn` is None, then similarities for all words are returned as a\u001b[0m\n", 175 | "\u001b[0;34m one-dimensional numpy array with the size of the vocabulary.\u001b[0m\n", 176 | "\u001b[0;34m\u001b[0m\n", 177 | "\u001b[0;34m \"\"\"\u001b[0m\u001b[0;34m\u001b[0m\n", 178 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtopn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mtopn\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 179 | "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 180 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 181 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpositive\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 182 | "\u001b[0;34m\u001b[0m \u001b[0mpositive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 183 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnegative\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 184 | "\u001b[0;34m\u001b[0m \u001b[0mnegative\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 185 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 186 | "\u001b[0;34m\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minit_sims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 187 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 188 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpositive\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnegative\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 189 | "\u001b[0;34m\u001b[0m \u001b[0;31m# allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])\u001b[0m\u001b[0;34m\u001b[0m\n", 190 | "\u001b[0;34m\u001b[0m \u001b[0mpositive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpositive\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 191 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 192 | "\u001b[0;34m\u001b[0m \u001b[0;31m# add weights for each word, if not already present; default to 1.0 for positive and -1.0 for negative words\u001b[0m\u001b[0;34m\u001b[0m\n", 193 | "\u001b[0;34m\u001b[0m \u001b[0mpositive\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\u001b[0m\n", 194 | "\u001b[0;34m\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1.0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring_types\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m\u001b[0m\n", 195 | "\u001b[0;34m\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpositive\u001b[0m\u001b[0;34m\u001b[0m\n", 196 | "\u001b[0;34m\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 197 | "\u001b[0;34m\u001b[0m \u001b[0mnegative\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\u001b[0m\n", 198 | "\u001b[0;34m\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring_types\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m\u001b[0m\n", 199 | "\u001b[0;34m\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnegative\u001b[0m\u001b[0;34m\u001b[0m\n", 200 | "\u001b[0;34m\u001b[0m \u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 201 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 202 | "\u001b[0;34m\u001b[0m \u001b[0;31m# compute the weighted average of all words\u001b[0m\u001b[0;34m\u001b[0m\n", 203 | "\u001b[0;34m\u001b[0m \u001b[0mall_words\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmean\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 204 | "\u001b[0;34m\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpositive\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mnegative\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 205 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 206 | "\u001b[0;34m\u001b[0m \u001b[0mmean\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 207 | "\u001b[0;34m\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 208 | "\u001b[0;34m\u001b[0m \u001b[0mmean\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mword_vec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muse_norm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 209 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 210 | "\u001b[0;34m\u001b[0m \u001b[0mall_words\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvocab\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 211 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mmean\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 212 | "\u001b[0;34m\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cannot compute similarity with no input\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 213 | "\u001b[0;34m\u001b[0m \u001b[0mmean\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmatutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munitvec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mREAL\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 214 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 215 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtopn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 216 | "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmost_similar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 217 | "\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\n", 218 | "\u001b[0;34m\u001b[0m \u001b[0mlimited\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvectors_norm\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrestrict_vocab\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvectors_norm\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mrestrict_vocab\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 219 | "\u001b[0;34m\u001b[0m \u001b[0mdists\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlimited\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 220 | "\u001b[0;34m\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtopn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\n", 221 | "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdists\u001b[0m\u001b[0;34m\u001b[0m\n", 222 | "\u001b[0;34m\u001b[0m \u001b[0mbest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmatutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margsort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtopn\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_words\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreverse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\n", 223 | "\u001b[0;34m\u001b[0m \u001b[0;31m# ignore (don't return) words from the input\u001b[0m\u001b[0;34m\u001b[0m\n", 224 | "\u001b[0;34m\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex2word\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msim\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbest\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msim\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mall_words\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\n", 225 | "\u001b[0;34m\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mtopn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 226 | "\u001b[0;31mFile:\u001b[0m ~/anaconda/envs/test/lib/python3.7/site-packages/gensim/models/keyedvectors.py\n", 227 | "\u001b[0;31mType:\u001b[0m method\n" 228 | ] 229 | }, 230 | "metadata": {}, 231 | "output_type": "display_data" 232 | } 233 | ], 234 | "source": [ 235 | "?? w2v.most_similar" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 18, 241 | "metadata": { 242 | "ExecuteTime": { 243 | "end_time": "2019-01-22T09:05:26.416224Z", 244 | "start_time": "2019-01-22T09:05:26.410757Z" 245 | }, 246 | "scrolled": true 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "[('pg', 0.6635434031486511),\n", 253 | " ('ioredis', 0.6613692045211792),\n", 254 | " ('body-parser', 0.6572729349136353),\n", 255 | " ('dotenv', 0.6570829749107361),\n", 256 | " ('express', 0.6560029983520508),\n", 257 | " ('jsonwebtoken', 0.6221519112586975),\n", 258 | " ('helmet', 0.6159390211105347),\n", 259 | " ('cors', 0.6155110597610474),\n", 260 | " ('sequelize', 0.6083593368530273),\n", 261 | " ('mongoose', 0.6004490852355957),\n", 262 | " ('log4js', 0.5931798815727234),\n", 263 | " ('aws-sdk', 0.5810474157333374),\n", 264 | " ('passport-github', 0.5806360244750977),\n", 265 | " ('cookie-parser', 0.5773148536682129),\n", 266 | " ('express-jwt', 0.5626721382141113),\n", 267 | " ('express-session', 0.5596650838851929),\n", 268 | " ('socket.io', 0.5591639280319214),\n", 269 | " ('redis', 0.5585757493972778),\n", 270 | " ('mongodb', 0.5527012944221497),\n", 271 | " ('discord.js', 0.5520840883255005)]" 272 | ] 273 | }, 274 | "execution_count": 18, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "w2v.most_similar(['mysql'], topn=20)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "### Analogical Reasoning" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 17, 293 | "metadata": { 294 | "ExecuteTime": { 295 | "end_time": "2019-01-22T09:05:30.414605Z", 296 | "start_time": "2019-01-22T09:05:30.403377Z" 297 | } 298 | }, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "[('koa-static', 0.7851258516311646),\n", 304 | " ('koa-router', 0.776435136795044),\n", 305 | " ('koa-bodyparser', 0.7712484002113342),\n", 306 | " ('koa-json', 0.6993788480758667),\n", 307 | " ('koa-body', 0.6988320350646973),\n", 308 | " ('koa-views', 0.6964592933654785),\n", 309 | " ('koa-logger', 0.683550238609314),\n", 310 | " ('koa-onerror', 0.652797281742096),\n", 311 | " ('@koa/cors', 0.6326575875282288),\n", 312 | " ('koa-session', 0.6273043751716614)]" 313 | ] 314 | }, 315 | "execution_count": 17, 316 | "metadata": {}, 317 | "output_type": "execute_result" 318 | } 319 | ], 320 | "source": [ 321 | "# express : body-parser :: koa : ?\n", 322 | "w2v.most_similar(['express-session', 'koa'],['express'])" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [] 331 | } 332 | ], 333 | "metadata": { 334 | "kernelspec": { 335 | "display_name": "Python 3", 336 | "language": "python", 337 | "name": "python3" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.7.3" 350 | } 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 2 354 | } 355 | -------------------------------------------------------------------------------- /plugins/vscode/src/extension.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * VSCode plugin for Code-Compass (https://www.bell-labs.com/code-compass). 3 | * Powered by import2vec (https://arxiv.org/abs/1904.03990). 4 | * 5 | * Copyright (C) 2019, Nokia 6 | * Licensed under the BSD 3-Clause License 7 | * 8 | * Author: Bart Theeten (bart.theeten@nokia-bell-labs.com) 9 | * Date: May 2019 10 | **/ 11 | 'use strict'; 12 | 13 | import * as vscode from 'vscode'; 14 | import { URL } from 'url'; 15 | const fs = require('fs'); 16 | const uuid = require('uuid/v4'); 17 | const xml2js = require('xml2js'); 18 | 19 | const HttpsProxyAgent = require('https-proxy-agent'); 20 | 21 | let SUPPORTED_FILE_TYPES : Array = ['.java', '.js', '.py', '.xml', '.json', '.txt']; 22 | 23 | var proxyConfig; 24 | var config; 25 | var USER_KEY:string; // anonymous identification of the user 26 | var PROXY:string; 27 | 28 | var PROTOCOL = ''; 29 | var HOSTNAME = ''; 30 | var PORT = 0; 31 | var serverURL; 32 | var VERSION = vscode.extensions.getExtension('NokiaBellLabs.code-compass').packageJSON.version; 33 | 34 | var protocol; 35 | var agent; 36 | 37 | // Track currently webview panel 38 | let currentPanel: vscode.WebviewPanel | undefined = undefined; 39 | 40 | var extensionPath : string; 41 | var editor : any = vscode.window.activeTextEditor; // the editor from which codeCompass was launched 42 | var language : string = 'java'; 43 | var ctx : string[] = []; 44 | 45 | var suggestCache = {}; // map from suggested module to list of raw modules (used for highlighting) 46 | 47 | var snippets: {}; 48 | 49 | let error = null; 50 | 51 | 52 | // Initialize all parameters. Must be re-loaded when the configuration has changed. 53 | function init() { 54 | proxyConfig = vscode.workspace.getConfiguration('http'); 55 | config = vscode.workspace.getConfiguration('code-compass'); 56 | 57 | console.log("Proxy: " + JSON.stringify(proxyConfig)); 58 | console.log("Configuration: " + JSON.stringify(config)); 59 | 60 | PROXY = proxyConfig.proxy; 61 | 62 | if (config.url !== "") { // specifying a URL overrides DEPLOYMENT settings 63 | var u = new URL(config.url); 64 | PROTOCOL = u.protocol; 65 | HOSTNAME = u.hostname; 66 | console.log("PROTOCOL:", PROTOCOL); 67 | console.log("PORT", u.port); 68 | PORT = parseInt(u.port) || (PROTOCOL === 'http:' ? 80 : 443); 69 | } 70 | else { 71 | PROTOCOL = 'https:'; 72 | HOSTNAME = 'www.code-compass.com'; 73 | PORT = 443; 74 | } 75 | 76 | serverURL = `${PROTOCOL}//${HOSTNAME}:${PORT}`; 77 | 78 | console.log("SETTINGS:"); 79 | console.log(" - backend URL: " + serverURL); 80 | console.log(" - proxy: " + PROXY ? PROXY : "no proxy"); 81 | console.log(" - version: " + VERSION); 82 | 83 | protocol = (PROTOCOL === 'https:') ? require('https') : require('http'); 84 | agent = (PROXY && PROXY!=="") ? new HttpsProxyAgent(PROXY) : undefined; 85 | } 86 | init(); 87 | 88 | // register for configuration changes 89 | vscode.workspace.onDidChangeConfiguration(() => { 90 | console.log("\n\n*** CONFIGURATION CHANGED => RELOADING ***\n\n"); 91 | init(); 92 | // since serverURL is passed in the webview content HTML, we must reload it 93 | if (currentPanel) { 94 | currentPanel.webview.html = getWebViewContent(); 95 | } 96 | }); 97 | 98 | // this method is called when your extension is activated 99 | // your extension is activated the very first time the command is executed 100 | export function activate(context: vscode.ExtensionContext) { 101 | console.log("\n\n========="); 102 | console.log(" ACTIVATE "); 103 | console.log("=========\n\n"); 104 | 105 | if (context.globalState.get("userKey")) { 106 | console.log("USER = " + context.globalState.get("userKey")); 107 | } else { 108 | context.globalState.update("userKey", uuid()); 109 | console.log("GENERATING USER KEY:", context.globalState.get("userKey")); 110 | } 111 | USER_KEY = context.globalState.get("userKey"); 112 | extensionPath = context.extensionPath; 113 | 114 | // Use the console to output diagnostic information (console.log) and errors (console.error) 115 | // This line of code will only be executed once when your extension is activated 116 | console.log('Extension "codeCompass-plugin" is now active!'); 117 | 118 | vscode.window.onDidChangeActiveTextEditor(edit => { 119 | console.log("DID CHANGE ACTIVE TEXT EDITOR: " + edit.document.fileName); 120 | editor = edit; 121 | onGetContext(); 122 | }); 123 | 124 | // get snippets from server 125 | getRequest('/getSnippets/java', (err, data) => { 126 | if (err) { 127 | console.error("Could not load snippets from server"); 128 | let errMsg = `Failed to communicate with server. Please verify that you can reach ${serverURL}. You may need to configure a proxy in Code > Preferences > Settings, key = http.proxy`; 129 | error = errMsg; 130 | } else { 131 | console.log("Got " + Object.keys(data).length + " snippets from server"); 132 | snippets = data; 133 | console.log("SNIPPETS = " + JSON.stringify(snippets, null, 2)); 134 | } 135 | }); 136 | 137 | let disposable = vscode.commands.registerCommand('extension.showCodeCompass', () => { 138 | 139 | const columnToShowIn = vscode.window.activeTextEditor ? vscode.window.activeTextEditor.viewColumn : undefined; 140 | if (currentPanel) { 141 | // If we already have a panel, show it in the target column 142 | currentPanel.reveal(columnToShowIn); 143 | } else { 144 | // Create and show a new webview 145 | currentPanel = vscode.window.createWebviewPanel( 146 | 'codeCompass', // Identifies the type of the webview. Used internally 147 | "Code Compass", // Title 148 | -2, // View Column Beside 149 | { 150 | enableScripts: true, 151 | retainContextWhenHidden: true 152 | } 153 | ); 154 | 155 | // Reset when the current panel is closed 156 | currentPanel.onDidDispose(() => { 157 | currentPanel = undefined; 158 | }, null, context.subscriptions); 159 | 160 | if (error) { 161 | console.log("showing error page"); 162 | currentPanel.webview.html = getErrorView(error); 163 | } else { 164 | console.log("loading regular page"); 165 | currentPanel.webview.html = getWebViewContent(); 166 | } 167 | 168 | // Handle messages from the webview 169 | currentPanel.webview.onDidReceiveMessage(message => { 170 | console.log("Received message from webView: " + JSON.stringify(message)); 171 | switch (message.command) { 172 | case 'alert': 173 | vscode.window.showErrorMessage(message.text); 174 | return; 175 | case 'feedback': 176 | postRequest(`/feedback?userKey=${USER_KEY}`, message.data); 177 | return; 178 | case 'getContext': 179 | // Send a message to our webview. 180 | onGetContext(); 181 | return; 182 | case 'setLanguage': 183 | language = message.language; 184 | return; 185 | case 'getSuggestions': 186 | getModulesForIntent(message.intent); 187 | return; 188 | case 'getModulesForFilter': 189 | getModulesForFilter(message.context, message.intent, message.filter); 190 | return; 191 | case 'getNearestCategories': 192 | getNearestCategories(message.context, message.num); 193 | return; 194 | case 'getFilteredNearestCategories': 195 | getFilteredNearestCategories(message.context, message.filter, message.num); 196 | return; 197 | case 'getIntents': 198 | getRequest(`/intents/${message.language}`, (err, intents) => { 199 | if (err) { 200 | console.error("Couldn't download intent list: " + JSON.stringify(err)); 201 | } else { 202 | console.log("Downloaded intents: " + JSON.stringify(intents)); 203 | currentPanel.webview.postMessage({ command: 'setIntentsForLanguage', intents: intents, language: message.language }); 204 | } 205 | }); 206 | return; 207 | case 'getLibs': 208 | getRequest(`/libs/${message.language}`, (err, libs) => { 209 | if (err) { 210 | console.error(`Couldn't download library list for ${message.language}: ${JSON.stringify(err)}`); 211 | } else { 212 | console.log(`${libs.total} libs downloaded`); 213 | currentPanel.webview.postMessage({ command: 'setLibsForLanguage', libs: libs.libs, language: message.language }); 214 | } 215 | }); 216 | return; 217 | case 'openURL': 218 | console.log("Opening URL in default browser: " + message.url); 219 | vscode.commands.executeCommand('vscode.open', vscode.Uri.parse(message.url)); 220 | return; 221 | case 'showMainView': 222 | console.log("Showing main view"); 223 | currentPanel.webview.html = getWebViewContent(); 224 | return; 225 | case 'tryAgain': 226 | console.log("Trying to connect to server..."); 227 | getRequest('/getSnippets/java', (err, data) => { 228 | if (err) { 229 | console.log("Still failed to connect..."); 230 | currentPanel.webview.postMessage({ msg: 'disconnected' }); 231 | } else { 232 | error = null; 233 | snippets = data; 234 | console.log("Connection ok"); 235 | currentPanel.webview.postMessage({ msg: 'connected' }); 236 | } 237 | }); 238 | return; 239 | default: 240 | console.error("Unsupported command:", message.command); 241 | } 242 | }, undefined, context.subscriptions); 243 | } 244 | }); 245 | 246 | context.subscriptions.push(disposable); 247 | } 248 | 249 | function onGetContext() { 250 | getActiveContext((c, l) => { 251 | ctx = c; 252 | language = l; 253 | console.log("context, language =", ctx, language); 254 | if (!currentPanel) { 255 | console.error("NO CURRENT PANEL"); 256 | return; 257 | } 258 | if (l === 'unsupported') { 259 | console.log("No context: unsupported file extension."); 260 | currentPanel.webview.postMessage({ command: 'setStatus', error: true, msg: "Please select a supported file type (.java, .js, .py or pom.xml, package.json or requirements.txt) in another editor pane.

You can also browse the supported ecosytems by clicking one of the supported language icons."}); 261 | return; 262 | } else if (l === 'no_editor') { 263 | currentPanel.webview.postMessage({ command: 'setStatus', error: true, msg: "Code-Compass works best when it sits alongside an active editor pane containing a supported source file (.java, .js, .py or pom.xml, package.json or requirements.txt). So please use split-screen and select a supported file type in the other pane.

You can also browse the supported ecosytems by clicking one of the supported language icons."}); 264 | return; 265 | } 266 | let fileName = editor.document.fileName; 267 | if (fileName.includes('/')) { 268 | fileName = fileName.substr(fileName.lastIndexOf('/')+1); 269 | } else if (fileName.includes('\\')) { 270 | fileName = fileName.substr(fileName.lastIndexOf('\\')+1); 271 | } 272 | let msg = `Scanned ${ctx.length} ${language.toUpperCase()} project dependencies in ${fileName}. `; 273 | currentPanel.webview.postMessage({ command: 'setStatus', msg: msg }); 274 | console.log("Setting context:", ctx); 275 | currentPanel.webview.postMessage({ command: 'setContext', context: ctx, language: l }); 276 | }); 277 | } 278 | 279 | function getNearestCategories(ctx, num) { 280 | postRequest(`/nearestCategories/${language}/${num}`, ctx, (err, result) => { 281 | console.log("CONTEXT:", ctx); 282 | if (err) { 283 | console.log("ERROR - getNearestCategories: " + JSON.stringify(err)); 284 | currentPanel.webview.postMessage({ 285 | command: 'setNearestCategories', 286 | categories: [], 287 | nearestSuggestions: [] 288 | }).then(() => console.log("getNearestCategories ERROR sent"), (err) => console.error(JSON.stringify(err))); 289 | } else { 290 | console.log(`nearest categories are: ${JSON.stringify(result.cats)}`); 291 | console.log(`nearest libraries are: ${JSON.stringify(result.nearestLibs)}`); 292 | currentPanel.webview.postMessage({ 293 | command: 'setNearestCategories', 294 | categories: result.cats, 295 | nearestSuggestions: result.nearestLibs, 296 | }).then(() => console.log("getNearestCategories sent"), (err) => console.error(JSON.stringify(err))); 297 | } 298 | }); 299 | } 300 | 301 | function getFilteredNearestCategories(ctx, filter, num) { 302 | postRequest(`/filteredNearestCategories/${language}/${num}`, {context: ctx, filter: filter}, (err, result) => { 303 | console.log("CONTEXT:", ctx); 304 | if (err) { 305 | console.log("ERROR - getNearestCategories: " + JSON.stringify(err)); 306 | currentPanel.webview.postMessage({ 307 | command: 'setNearestCategories', 308 | categories: [], 309 | nearestSuggestions: [] 310 | }).then(() => console.log("getNearestCategories ERROR sent"), (err) => console.error(JSON.stringify(err))); 311 | } else { 312 | console.log(`nearest categories are: ${JSON.stringify(result.cats)}`); 313 | console.log(`nearest libraries are: ${JSON.stringify(result.nearestLibs)}`); 314 | currentPanel.webview.postMessage({ 315 | command: 'setNearestCategories', 316 | categories: result.cats, 317 | nearestSuggestions: result.nearestLibs, 318 | }).then(() => console.log("getNearestCategories sent"), (err) => console.error(JSON.stringify(err))); 319 | } 320 | }); 321 | } 322 | 323 | function getModulesForIntent(intent) { 324 | intent = intent.toLowerCase().replace(/ /g, '_'); 325 | console.log('intent = ' + intent); 326 | postRequest(`/searchByIntent/${language}`, {context: ctx, intent: intent}, (err, mods) => { 327 | if (err) { 328 | console.log("ERROR - getModulesForIntent:", err); 329 | currentPanel.webview.postMessage({ 330 | command: 'setSuggestions', 331 | suggestions: mods.filtered, 332 | rawSuggestions: mods.raw 333 | }).then(() => console.log("setSuggestions ERROR sent"), (err) => console.error(JSON.stringify(err))); 334 | } else { 335 | console.log(`closest modules for intent ${intent}`); 336 | suggestCache = {}; 337 | mods.filtered.forEach(mod => suggestCache[mod.module] = mods.raw.filter(m => (m.startsWith(mod.module + ':') || m === mod.module))); 338 | mods.filtered = mods.filtered.map(mod => { 339 | mod.snippet = snippets[mod.module]; 340 | return mod; 341 | }); 342 | console.log("sending setSuggestions:", mods.filtered); 343 | currentPanel.webview.postMessage({ 344 | command: 'setSuggestions', 345 | suggestions: mods.filtered, 346 | rawSuggestions: mods.raw 347 | }).then(() => console.log("setSuggestions sent"), (err) => console.error(JSON.stringify(err))); 348 | } 349 | }); 350 | } 351 | 352 | function getModulesForFilter(ctx, intent, filter) { 353 | intent = intent.toLowerCase().replace(/ /g, '_'); 354 | console.log('intent = ' + intent); 355 | postRequest(`/searchByIntent/${language}`, {context: ctx, intent: intent, filter:filter}, (err, mods) => { 356 | if (err) { 357 | console.log("ERROR - getModulesForIntent:", err); 358 | currentPanel.webview.postMessage({ 359 | command: 'setSuggestions', 360 | suggestions: mods.filtered, 361 | rawSuggestions: mods.raw 362 | }).then(() => console.log("setSuggestions ERROR sent"), (err) => console.error(JSON.stringify(err))); 363 | } else { 364 | console.log(`closest modules for intent ${intent}`); 365 | suggestCache = {}; 366 | mods.filtered.forEach(mod => suggestCache[mod.module] = mods.raw.filter(m => (m.startsWith(mod.module + ':') || m === mod.module))); 367 | mods.filtered = mods.filtered.map(mod => { 368 | mod.snippet = snippets[mod.module]; 369 | return mod; 370 | }); 371 | console.log("sending setSuggestions:", mods.filtered); 372 | currentPanel.webview.postMessage({ 373 | command: 'setSuggestions', 374 | suggestions: mods.filtered, 375 | rawSuggestions: mods.raw 376 | }).then(() => console.log("setSuggestions sent"), (err) => console.error(JSON.stringify(err))); 377 | } 378 | }); 379 | } 380 | 381 | function getModulesForPackages(packages, callback) { 382 | postRequest(`/mapping/${language}/modulesForPackages`, packages, (err, mods) => { 383 | console.log(`modules for packages ${JSON.stringify(packages)} are: ${JSON.stringify(mods)}`); 384 | if (err) { 385 | console.log("ERROR - getModulesForPackages: " + JSON.stringify(err)); 386 | callback(err, null); 387 | } else { 388 | callback(null, mods); 389 | } 390 | }); 391 | } 392 | 393 | function setLanguage(lang) { 394 | language = lang; 395 | console.log("Language set to " + language); 396 | } 397 | 398 | function getFileExtension(filename) { 399 | return filename.toLowerCase().substring(filename.lastIndexOf('.')); 400 | } 401 | 402 | function isSupportedFileExtension(ext) { 403 | console.log("File extension: " + ext); 404 | return SUPPORTED_FILE_TYPES.indexOf(ext) !== -1; 405 | } 406 | 407 | // calculates the package-level context libraries in the active editor 408 | function getActiveContext(callback) { 409 | console.log("Getting context..."); 410 | if (vscode.window.visibleTextEditors.length === 0) { 411 | console.log("No editor"); 412 | callback([], 'no_editor'); // No open text editor 413 | return; 414 | } 415 | if (!editor) { 416 | editor = vscode.window.activeTextEditor || vscode.window.visibleTextEditors[0]; 417 | } 418 | let ext = getFileExtension(editor.document.fileName); 419 | console.log("Active editor extension: " + ext); 420 | if (!isSupportedFileExtension(ext)) { 421 | console.log("Extension " + ext + " not supported"); 422 | // fallback to first visible editor 423 | editor = vscode.window.visibleTextEditors[0]; 424 | } 425 | let doc = editor.document; 426 | console.log("Getting active context from " + doc.fileName); 427 | ext = getFileExtension(doc.fileName); 428 | if (!isSupportedFileExtension(ext)) { 429 | console.log("Extension " + ext + " not supported"); 430 | callback([], 'unsupported'); 431 | return; 432 | } 433 | let str = doc.getText(); 434 | let imps : string[] = []; 435 | 436 | switch(ext) { 437 | case ".java": 438 | imps = parseJava(str); 439 | getModulesForPackages(imps, (err, mods) => { 440 | if (err) { 441 | console.error("ERROR mapping packages to modules:", err); 442 | callback([], language); 443 | } else { 444 | console.log("Got modules for packages:", mods); 445 | callback(mods, language); 446 | } 447 | }); 448 | return; 449 | case ".js" : imps = parseJavaScript(str); break; 450 | case ".py" : imps = parsePython(str); break; 451 | case ".xml" : imps = parsePomXml(str); break; 452 | case ".json": imps = parsePackageJson(str); break; 453 | case ".txt": imps = parseRequirementsTxt(str); break; 454 | default: 455 | console.error("No parser for filetype " + ext); 456 | } 457 | callback(imps, language); 458 | } 459 | 460 | function parseJava(str) { 461 | setLanguage('java'); 462 | var regex = /^\s*import ([^;\s]+)\s*;/gm; 463 | let lines = str.split(/[\n\r]/); 464 | let m: any; 465 | var imps : string[] = []; 466 | let pos = -1; 467 | while ((m = regex.exec(str)) !== null) { 468 | // This is necessary to avoid infinite loops with zero-width matches 469 | if (m.index === regex.lastIndex) { 470 | regex.lastIndex++; 471 | } 472 | m.forEach((match: string, groupIndex: number) => { 473 | if (groupIndex > 0) { 474 | let packageImp = match.substring(0, match.lastIndexOf('.')); 475 | if (imps.indexOf(packageImp) === -1) { 476 | imps.push(packageImp); 477 | } 478 | } else { 479 | pos = m.index; 480 | } 481 | }); 482 | } 483 | if (imps.length === 0) { 484 | vscode.window.showWarningMessage("No import statements found in active editor"); 485 | return []; 486 | } else { 487 | return imps; 488 | } 489 | } 490 | 491 | function parseJavaScript(str) { 492 | setLanguage('js'); 493 | var regexs = [/require\(["']([^"']+)["']\)/gm, /import ["']([^"']+)["']/gm, /import\s+{?(?!\s+from).+}?\s+from\s+["']([^"']+)["']/gm]; 494 | const acceptedFirstCharacter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@"; 495 | let m: any; 496 | var imps : string[] = []; 497 | for (var i in regexs) { 498 | let regex = regexs[i]; 499 | while ((m = regex.exec(str)) !== null) { 500 | if (m.index === regex.lastIndex) { 501 | regex.lastIndex++; 502 | } 503 | m.forEach((match, groupIndex) => { 504 | if (groupIndex > 0) { 505 | if (acceptedFirstCharacter.includes(match[0]) && imps.indexOf(match) === -1) { 506 | imps.push(match); 507 | } 508 | } 509 | }); 510 | } 511 | } 512 | return imps; 513 | } 514 | 515 | function parsePython(str) { 516 | setLanguage('python'); 517 | var regex = /^\s*import\s+(.+)$/gm; 518 | let m: any; 519 | var imps : string[] = []; 520 | while ((m = regex.exec(str)) !== null) { 521 | if (m.index === regex.lastIndex) { 522 | regex.lastIndex++; 523 | } 524 | m.forEach((match, groupIndex) => { 525 | if (groupIndex > 0) { 526 | let parts = match.split(' as '); 527 | let impps = parts[0].split(','); 528 | impps.forEach(imp => { 529 | imp = imp.trim(); 530 | imp = imp.split('.')[0]; 531 | if (imps.indexOf(imp) === -1) { 532 | imps.push(imp); 533 | } 534 | }); 535 | } 536 | }); 537 | } 538 | regex = /^\s*from\s+([\w\.]+)\simport\s(.+)$/gm; 539 | let from : string; 540 | while ((m = regex.exec(str)) !== null) { 541 | if (m.index === regex.lastIndex) { 542 | regex.lastIndex++; 543 | } 544 | m.forEach((match, groupIndex) => { 545 | if (groupIndex === 1) { 546 | from = match; 547 | } else if (groupIndex === 2) { 548 | let parts = match.split(' as '); 549 | let impps = parts[0].split(','); 550 | impps.forEach(imp => { 551 | imp = from; 552 | imp = imp.split('.')[0]; 553 | if (imps.indexOf(imp) === -1) { 554 | imps.push(imp); 555 | } 556 | }); 557 | } 558 | }); 559 | } 560 | return imps; 561 | } 562 | 563 | function parsePomXml(str) { 564 | console.log("here2"); 565 | setLanguage('java'); 566 | let imps : string[] = []; 567 | let json : string = xml2js.parseString(str, (err, result) => { 568 | if (err) { 569 | console.error(err); 570 | } else { 571 | let deps = result.project.dependencies[0].dependency; 572 | deps.forEach(dep => { 573 | let imp = dep.groupId[0] + ":" + dep.artifactId[0]; 574 | console.log("POM dependency: " + imp); 575 | imps.push(imp); 576 | }); 577 | } 578 | }); 579 | return imps; 580 | } 581 | 582 | function parsePackageJson(str) { // parses a package.json file 583 | let imps : string[] = []; 584 | try { 585 | let obj = JSON.parse(str); 586 | let deps = obj.dependencies; 587 | if (deps) { 588 | imps = Object.keys(deps); 589 | } 590 | console.log("parseJson: dependencies = " + JSON.stringify(imps)); 591 | setLanguage('js'); 592 | } catch (e) { 593 | console.log("ERROR parsing JSON file: " + JSON.stringify(e)); 594 | } 595 | return imps; 596 | } 597 | 598 | function parseRequirementsTxt(str) { // parses a package.json file 599 | var imps : string[] = []; 600 | try { 601 | const regex = /^(\w+)[=<>]?/gm; 602 | let m : any; 603 | while ((m = regex.exec(str)) !== null) { 604 | if (m.index === regex.lastIndex) { 605 | regex.lastIndex++; 606 | } 607 | m.forEach((match, groupIndex) => { 608 | if (groupIndex > 0) { 609 | console.log(`Found match, group ${groupIndex}: ${match}`); 610 | imps.push(match); 611 | } 612 | }); 613 | } 614 | console.log("parseRequirementsTxt: dependencies = " + JSON.stringify(imps)); 615 | setLanguage('python'); 616 | } catch (e) { 617 | console.log("ERROR parsing TXT file: " + JSON.stringify(e)); 618 | } 619 | return imps; 620 | } 621 | 622 | 623 | function getWebViewContent() { 624 | let htmlContent = '' + fs.readFileSync(extensionPath + '/codeCompass.html'); 625 | htmlContent = htmlContent.replace(/\$\$\{serverURL\}/g, serverURL); 626 | let cssContent = '' + fs.readFileSync(extensionPath + '/codeCompass.css'); 627 | let pos = htmlContent.indexOf(''); 628 | htmlContent = htmlContent.substring(0,pos-1) + '' + htmlContent.substring(pos); 629 | return htmlContent; 630 | } 631 | 632 | function getErrorView(msg) { 633 | let htmlContent = '' + fs.readFileSync(extensionPath + '/errorPage.html'); 634 | htmlContent = htmlContent.replace(/\$\$\{serverURL\}/g, serverURL); 635 | return htmlContent; 636 | } 637 | 638 | // this method is called when your extension is deactivated 639 | export function deactivate() { 640 | } 641 | 642 | function getHttpOptions(method:string, event:string) { 643 | let options:any = { 644 | host: HOSTNAME, 645 | port: PORT, 646 | path: `${event}`, 647 | method: method, 648 | headers: { 649 | vscode_plugin_key: `VSCode-version-${VERSION}`, 650 | user_key: USER_KEY 651 | }, 652 | timeout: 10000, 653 | followRedirect: true, 654 | maxRedirects: 10 655 | }; 656 | if (agent) { 657 | options.agent = agent; 658 | } 659 | if (method === 'POST') { 660 | options.headers['Content-Type'] = 'application/json'; 661 | } 662 | return options; 663 | } 664 | 665 | function getRequest(path: string, callback?) { 666 | let options = getHttpOptions('GET', path); 667 | console.log("OPTIONS: " + JSON.stringify(options)); 668 | const req = protocol.request(options, (res: any) => { 669 | let r = ""; 670 | console.log(`GET status: ${res.statusCode}`); 671 | res.setEncoding('utf8'); 672 | res.on('data', (chunk: string) => { 673 | error = null; 674 | r += chunk; 675 | }); 676 | res.on('end', () => { 677 | error = null; 678 | console.log('GET response: -- end --'); 679 | if (callback) { 680 | try { 681 | callback(null, JSON.parse(r)); 682 | } catch(e) { 683 | // fallback to non-JSON response 684 | callback(null, r); 685 | } 686 | } 687 | }); 688 | }); 689 | req.on('error', (err: any) => { 690 | error = err; 691 | console.log(`ERROR in GET request: ${err.message}`); 692 | let errMsg = `Failed to communicate with server. ERROR: ${err.message}`; 693 | vscode.window.showErrorMessage(errMsg); 694 | currentPanel.webview.html = getErrorView(error); 695 | if (callback) { 696 | callback(err, null); 697 | } 698 | }); 699 | req.end(); 700 | } 701 | 702 | // sends a simple message to the visualization server using an HTTP GET request with query string 703 | function postRequest(path: string, data: any, callback?) { 704 | let options = getHttpOptions('POST', path); 705 | console.log("OPTIONS: " + JSON.stringify(options)); 706 | const req = protocol.request(options, (res: any) => { 707 | let result = ""; 708 | console.log(`GET status: ${res.statusCode}`); 709 | res.setEncoding('utf8'); 710 | res.on('data', (chunk: string) => { 711 | error = null; 712 | console.log(`GET response: ${chunk}`); 713 | result += chunk; 714 | }); 715 | res.on('end', () => { 716 | error = null; 717 | console.log('GET response: -- end --'); 718 | if (callback && result) { 719 | callback(null, JSON.parse(result)); 720 | } 721 | }); 722 | }); 723 | req.on('error', (err: any) => { 724 | console.log(`ERROR in POST request: ${err.message}`); 725 | let errMsg = `Failed to communicate with server. ERROR: ${err.message}`; 726 | error = err; 727 | vscode.window.showErrorMessage(errMsg); 728 | console.log("showing error page"); 729 | currentPanel.webview.html = getErrorView(error); 730 | if (callback) { 731 | callback(err, null); 732 | } 733 | }); 734 | if (data) { 735 | req.write(JSON.stringify(data)); 736 | } 737 | req.end(); 738 | } 739 | 740 | -------------------------------------------------------------------------------- /plugins/vscode/codeCompass.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Code Compass 7 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 98 | 99 | 100 | 101 |

102 |
103 |
104 |
Loading...
105 |
106 |
107 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 |

Code Compass

119 | 120 | 121 | 122 | 123 | 124 | 125 |

126 |
127 | 128 | 129 | 130 | 131 | 132 | 133 |
134 |
135 | 136 | 137 | 138 | 139 | 140 | 141 |
x
142 |
143 |
144 |

Error!

145 |

146 |

ok

147 |
148 |
149 |
150 |
151 |

Recommended libraries

152 | 155 |
156 |
157 |
158 | 159 | 160 | 189 | 199 | 200 |
161 |
162 | 188 |
190 | 193 |
194 |
195 |
    196 |
    197 |
    198 |
    201 |
    202 |
    203 | 204 |
    205 | 213 | 214 | 1024 | 1025 | 1026 | --------------------------------------------------------------------------------