├── .gitattributes ├── .gitconfig ├── .gitignore ├── .gitlab-ci.yml ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── docs ├── Makefile ├── conf.py ├── index.rst └── make.bat ├── gitconfig.sh ├── notebooks ├── demo.ipynb ├── lsp-ccls-ccls.ipynb └── lsp-ccls.ipynb ├── persper ├── analytics │ ├── analyzer.py │ ├── analyzer2.py │ ├── another_patch_parser.py │ ├── c.py │ ├── call_commit_graph.py │ ├── call_graph │ │ ├── c.py │ │ ├── cpp.py │ │ └── utils.py │ ├── commit_classifier.py │ ├── complexity.py │ ├── cpp.py │ ├── detect_change.py │ ├── devrank.py │ ├── error.py │ ├── git_tools.py │ ├── graph_server.py │ ├── inverse_diff.py │ ├── iterator.py │ ├── lsp_graph_server │ │ ├── README.md │ │ ├── __init__.py │ │ ├── callgraph │ │ │ ├── __init__.py │ │ │ ├── adapters.py │ │ │ ├── builder.py │ │ │ └── manager.py │ │ ├── ccls.py │ │ ├── cquery.py │ │ ├── fileparsers │ │ │ └── CPP14Lexer.py │ │ ├── jsonrpcutils.py │ │ ├── languageclient │ │ │ ├── __init__.py │ │ │ ├── lspclient.py │ │ │ ├── lspcontract.py │ │ │ └── lspserver.py │ │ ├── main.py │ │ ├── setup.cfg │ │ └── wildcards.py │ ├── multi_analyzer.py │ ├── pagerank.py │ ├── patch_parser.py │ ├── score.py │ ├── srcml.py │ └── write_graph_to_dot.py ├── analytics2 │ ├── abstractions │ │ ├── __init__.py │ │ ├── analyzers.py │ │ ├── callcommitgraph.py │ │ └── repository.py │ ├── devrank.py │ ├── memorycallcommitgraph.py │ ├── metaanalyzer.py │ ├── repository.py │ ├── setup.cfg │ └── utilities.py └── util │ ├── bidict.py │ ├── linguist.rb │ ├── normalize_score.py │ └── path.py ├── requirements.txt ├── setup-linux-ubuntu.sh ├── setup.py ├── test ├── README.md ├── __init__.py ├── cpp_test_files_repo │ ├── A │ │ └── main.cpp │ ├── B │ │ └── main_renamed.cpp │ ├── C │ │ ├── another_main.cpp │ │ └── main_renamed.cpp │ └── cg.dot ├── cpp_test_repo │ ├── A │ │ ├── Exceptions.h │ │ ├── TextFileParsers.cpp │ │ ├── TextFileParsers.h │ │ ├── TypeTraits.h │ │ ├── Utility.cpp │ │ ├── Utility.h │ │ ├── main.cpp │ │ ├── stdafx.cpp │ │ └── stdafx.h │ ├── B │ │ ├── Exceptions.h │ │ ├── TextFileParsers.cpp │ │ ├── TextFileParsers.h │ │ ├── TypeTraits.h │ │ ├── Utility-1.cpp │ │ ├── Utility.h │ │ ├── main.cpp │ │ ├── stdafx.cpp │ │ └── stdafx.h │ ├── C │ │ ├── CppProject1.vcxproj │ │ ├── CppProject1.vcxproj.filters │ │ ├── Exceptions.h │ │ ├── TextFileParsers.cpp │ │ ├── TextFileParsers.h │ │ ├── TypeTraits.h │ │ ├── Utility.cpp │ │ ├── Utility.h │ │ ├── main.cpp │ │ ├── stdafx.cpp │ │ └── stdafx.h │ ├── D │ │ ├── Exceptions.h │ │ ├── TextFileParsers.cpp │ │ ├── TextFileParsers.h │ │ ├── TypeTraits.h │ │ ├── Utility.cpp │ │ ├── Utility.h │ │ ├── main.cpp │ │ ├── stdafx.cpp │ │ └── stdafx.h │ └── cg.dot ├── pytest.ini ├── test_analytics │ ├── __init__.py │ ├── baseline │ │ ├── analyzer_pickling │ │ │ ├── A.g.json │ │ │ ├── B.g.json │ │ │ ├── C.g.json │ │ │ ├── D.g.json │ │ │ ├── E.g.json │ │ │ ├── F.g.json │ │ │ ├── G.g.json │ │ │ ├── H.g.json │ │ │ ├── I.g.json │ │ │ ├── J.g.json │ │ │ └── K.g.json │ │ ├── cpp_test_repo │ │ │ ├── A.g.json │ │ │ ├── B.g.json │ │ │ ├── C.g.json │ │ │ └── D.g.json │ │ ├── feature_branch │ │ │ ├── A.g.json │ │ │ ├── B.g.json │ │ │ ├── C.g.json │ │ │ ├── D.g.json │ │ │ ├── E.g.json │ │ │ ├── F.g.json │ │ │ ├── G.g.json │ │ │ ├── H.g.json │ │ │ ├── I.g.json │ │ │ ├── J.g.json │ │ │ └── K.g.json │ │ └── feature_branch_first_parent │ │ │ ├── A.g.json │ │ │ ├── B.g.json │ │ │ ├── C.g.json │ │ │ ├── D.g.json │ │ │ ├── E.g.json │ │ │ ├── F.g.json │ │ │ └── K.g.json │ ├── conftest.py │ ├── patch_test_files │ │ ├── example.cc │ │ ├── example.patch │ │ ├── example2.patch │ │ ├── example3.patch │ │ ├── example4.patch │ │ ├── example5.patch │ │ ├── example6.patch │ │ ├── example7.patch │ │ ├── example7_new.c │ │ └── example7_old.c │ ├── test_analyzer.py │ ├── test_analyzer_c.py │ ├── test_analyzer_cpp.py │ ├── test_analyzer_lsp_ccls.py │ ├── test_call_commit_graph.py │ ├── test_detect_change.py │ ├── test_devrank.py │ ├── test_diff.py │ ├── test_filter_commit.py │ ├── test_inverse_diff.py │ ├── test_modularity.py │ ├── test_multi_analyzer.py │ ├── test_score.py │ ├── test_srcml.py │ ├── util.py │ └── utility │ │ ├── __init__.py │ │ └── graph_baseline.py ├── test_analytics2 │ ├── __init__.py │ ├── helpers │ │ ├── __init__.py │ │ ├── callcommitgraph.py │ │ └── repository.py │ ├── setup.cfg │ ├── test_callcommitgraph.py │ ├── test_metaanalyzer.py │ ├── test_repository.py │ └── utilities.py └── test_feature_branch │ ├── A │ └── main.c │ ├── B │ └── main.c │ ├── C │ └── main.c │ ├── D │ ├── feature-G.c │ └── main.c │ ├── E │ ├── feature-G.c │ ├── feature-H.c │ └── main.c │ ├── F │ ├── feature-G.c │ ├── feature-H.c │ ├── feature-J.c │ └── main.c │ ├── G │ ├── feature-G.c │ └── main.c │ ├── H │ ├── feature-G.c │ ├── feature-H.c │ └── main.c │ ├── I │ ├── feature-G.c │ ├── feature-H.c │ └── main.c │ ├── J │ ├── feature-J.c │ └── main.c │ ├── K │ ├── feature-G.c │ ├── feature-H.c │ ├── feature-K.c │ └── main.c │ ├── cg.dot │ └── cg.png └── tools ├── build_history.py ├── excel_charts ├── distance.py ├── draw_charts.py ├── excel.py ├── gini │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── gini.png │ └── gini.py └── tests │ ├── __init__.py │ ├── draw_charts_test.xlsx │ └── test_distance.py ├── jira_stats ├── collect_git_urls.py ├── git_urls.csv ├── global_stats.ipynb └── process_stats.py ├── pickle_stats.ipynb ├── repo_crawler ├── .gitignore ├── README.md ├── check.ipynb ├── crawl_issues.py ├── github_comments.py ├── icse_repos.config ├── jira_issue.py ├── repo.config └── setup.sh ├── repo_creater ├── README.md └── create_repo.py └── repo_stats ├── setup_ubuntu.sh ├── stats_author.py ├── stats_commit.py ├── stats_pr.py ├── stats_pr.sh └── test ├── cassandra.pr.csv ├── couchdb.pr.csv ├── discourse.pr.csv ├── hbase.pr.csv ├── hive.pr.csv ├── jekyll.pr.csv ├── maven.pr.csv ├── opencv.pr.csv ├── rails.pr.csv ├── spark.pr.csv ├── stats_pr.sh ├── systemml.pr.csv ├── tensorflow.pr.csv ├── vagrant.pr.csv └── zookeeper.pr.csv /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb filter=nbstrip_full 2 | -------------------------------------------------------------------------------- /.gitconfig: -------------------------------------------------------------------------------- 1 | [filter "nbstrip_full"] 2 | clean = "jq --indent 1 \ 3 | '(.cells[] | select(has(\"outputs\")) | .outputs) = [] \ 4 | | (.cells[] | select(has(\"execution_count\")) | .execution_count) = null \ 5 | | .cells[].metadata = {} \ 6 | '" 7 | smudge = cat 8 | required = true 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .dropbox 2 | *.pickle 3 | *.xlsx 4 | repos 5 | .vscode 6 | .DS_Store 7 | .idea/ 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | .ccls-cache 97 | /bin 98 | /bin-* 99 | .pytest_cache 100 | /test/test_analytics/actualdump 101 | /test_scripts 102 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - build 3 | - test 4 | 5 | 6 | test_ci: 7 | stage: test 8 | image: hub.meri.dev/test-docker/test:latest 9 | # only: 10 | # - setup-ci 11 | before_script: 12 | - apt update && apt install -y libarchive-dev #libcurl4-openssl-dev 13 | - apt install -y zlib1g-dev libicu-dev libcurl3 libcurl-openssl1.0-dev 14 | - apt install -y build-essential cmake libssl-dev pkg-config cmake 15 | - wget http://131.123.42.38/lmcrs/beta/srcML-Ubuntu18.04.deb 16 | - dpkg -i srcML-Ubuntu18.04.deb 17 | - mkdir -p ~/.ssh 18 | - echo "${DEPLOY_KEY}" | tr -d '\r' > ~/.ssh/id_rsa 19 | - chmod 600 ~/.ssh/id_rsa 20 | - eval "$(ssh-agent -s)" 21 | - ssh-keyscan -H "gitlab.com" >> ~/.ssh/known_hosts 22 | - chmod 644 ~/.ssh/known_hosts 23 | - set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib 24 | - export LC_ALL=C.UTF-8 25 | - export LANG=C.UTF-8 26 | script: 27 | - git config --global user.email "merico@meri.co" 28 | - git config --global user.name "merico" 29 | - pip3 install pipenv 30 | - echo -e "machine gitlab.com\nlogin ${GITLAB_USER}\npassword ${GITLAB_PASSWD}" > ~/.netrc 31 | - git clone https://gitlab.com/persper/code-analytics.git && cd code-analytics 32 | #&& git checkout ${CI_COMMIT_REF_NAME} 33 | - export PYTHONPATH=$PYTHONPATH:/root/code-analytics 34 | - pipenv install --python 3.7 35 | - pipenv run pytest -s test/test_analytics 36 | - pipenv run pytest -s test/test_analytics2 37 | - echo "Done" 38 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | networkx = "*" 8 | numpy = "*" 9 | scipy = "*" 10 | requests = "*" 11 | lxml = "*" 12 | sklearn = "*" 13 | nltk = "*" 14 | openpyxl = "*" 15 | pytest = "*" 16 | pydot = "*" 17 | ipykernel = "*" 18 | matplotlib = "*" 19 | redis = "*" 20 | naked = "*" 21 | antlr4-python3-runtime = "*" 22 | python-jsonrpc-server = "==0.0.2" 23 | pytest-asyncio = "*" 24 | aenum = "*" 25 | pytest-cov = "*" 26 | gitpython = "*" 27 | sphinx = "*" 28 | python-louvain = "*" 29 | 30 | [dev-packages] 31 | 32 | [requires] 33 | python_version = "3.6" 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Persper Code Analytics Tool 2 | 3 | This project implements the DevRank algorithm for quantiying the structural value of code contributions as described in 4 | 5 | > J. Ren\*, H. Yin\*, Q. Hu, A. Fox, W. Koszek. Towards Quantifying the Development Value of Code Contributions. In *FSE (NIER)*, 2018. 6 | 7 | This repo contains a central code analyzer written in python, which given a target git repository, invokes language-specific call graph server to construct the call-commit graph (union of all commits' call graphs) while it iterates through the commits of the repository being analzyed. The resulted call-commit graph is stored in the [CallCommitGraph](/persper/analytics/call_commit_graph.py) class, which knows how to compute DevRanks for functions, commits, and developers. 8 | 9 | ## Get Started 10 | 11 | The following procedure is tested on Ubuntu 16.04 LTS. 12 | 13 | 1. Install Python (>=3.6) 14 | 15 | Download and install Python 3.6+: . 16 | 17 | Also, create a symbolic link from `python3` to `python` since some scripts reply on it. 18 | ```sh 19 | sudo ln -s /usr/bin/python3 /usr/bin/python 20 | ``` 21 | 22 | 2. Install python dependencies (we recommend to use pipenv) 23 | 24 | ```sh 25 | pipenv install 26 | ``` 27 | 28 | 3. Update git 29 | 30 | In order to uset the `--indent-heuristic` option of `git diff`, we require git version >= 2.11. Use the following commands to upgrade: 31 | 32 | ```sh 33 | sudo add-apt-repository ppa:git-core/ppa -y 34 | sudo apt-get update 35 | sudo apt-get install git -y 36 | git --version 37 | ``` 38 | 39 | 4. Add project directory to path 40 | 41 | Add the following line to your `~/.bashrc` file. 42 | 43 | ```sh 44 | export PYTHONPATH=$PYTHONPATH:/path/to/dir 45 | ``` 46 | 47 | To update your path for the remainder of the session. 48 | ```sh 49 | source ~/.bashrc 50 | ``` 51 | 52 | 5. Install srcML for parsing C/C++ and Java 53 | 54 | Please download from [here](https://www.srcml.org/#download) and follow the [instructions](http://131.123.42.38/lmcrs/beta/README). 55 | 56 | srcML also needs `libarchive-dev` and `libcurl4-openssl-dev`. Install them with the following commands: 57 | 58 | ```sh 59 | sudo apt install libarchive-dev 60 | sudo apt install libcurl4-openssl-dev 61 | ``` 62 | 63 | 6. Check setup correctness 64 | 65 | As the test process will create Git repositories, set up your global Git user name and email before testing: 66 | ```sh 67 | git config --global user.email "you@example.com" 68 | git config --global user.name "Your Name" 69 | ``` 70 | 71 | Run the test process: 72 | ```sh 73 | pipenv run pytest test/test_analytics 74 | ``` 75 | 76 | You should see all tests passed. 77 | 78 | ## Report Test Coverage 79 | 80 | We use [coverage.py](https://coverage.readthedocs.io/) and [pytest-cov](https://pytest-cov.readthedocs.io/en/latest/) to compute test coverage: 81 | 82 | ``` 83 | # Execution 84 | pytest --cov=persper/ test/test_analytics 85 | 86 | # Reporting 87 | coverage html 88 | 89 | # then visit htmlcov/index.html in your browser 90 | ``` 91 | 92 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Persper Code Analytics's documentation! 2 | ================================================== 3 | 4 | .. automodule:: persper.analytics.call_commit_graph 5 | :members: 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Contents: 10 | 11 | 12 | 13 | Indices and tables 14 | ================== 15 | 16 | * :ref:`genindex` 17 | * :ref:`modindex` 18 | * :ref:`search` 19 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /gitconfig.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | git config --local include.path ../.gitconfig 4 | -------------------------------------------------------------------------------- /notebooks/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# import deps\n", 10 | "import os\n", 11 | "from persper.analytics.c import CGraphServer\n", 12 | "from persper.analytics.analyzer2 import Analyzer\n", 13 | "from persper.analytics.graph_server import C_FILENAME_REGEXES\n", 14 | "from persper.util.path import root_path" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# configure your project\n", 24 | "repo_path = os.path.join(root_path, 'repos/')\n", 25 | "\n", 26 | "# configure alpha for devrank\n", 27 | "alpha = 0.5" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# start analysis and show commit devrank values\n", 37 | "az = Analyzer(repo_path, CGraphServer(C_FILENAME_REGEXES))\n", 38 | "await az.analyze()\n", 39 | "ccgraph = az.graph\n", 40 | "ccgraph.commit_devranks(alpha)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "code-analytics-8iDyuztf", 54 | "language": "python", 55 | "name": "code-analytics-8idyuztf" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.6.6" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /persper/analytics/another_patch_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | _hunkHeader = re.compile( 4 | r"^@@\s*\-(?P\d+),\s*\d+\s+\+(?P\d+),\s*\d+\s*@@") 5 | 6 | 7 | def parseUnifiedDiff(diffContent: str, lineNumberOffset: int = 0): 8 | """ 9 | Parse unified diff content, and return the ranges of added and removed lines. 10 | Returns 11 | (addedRanges, removedRanges), where 12 | addedRanges: a list of line ranges [[startLine1, endLine1], ...] added into the new file, 13 | using the 1-based line numbers in the new file. 14 | removedRanges: a list of line ranges [[startLine1, endLine1], ...] removed from the old file, 15 | using the 1-based line numbers in the old file. 16 | """ 17 | leftLine: int = None 18 | rightLine: int = None 19 | addedRanges = [] 20 | removedRanges = [] 21 | lastAddedRange: list = None 22 | lastRemovedRange: list = None 23 | isInPrologue = True 24 | for diffLine in diffContent.rstrip("\r\n\v").split("\n"): 25 | if diffLine.startswith("@@"): 26 | match = _hunkHeader.search(diffLine) 27 | if not match: 28 | if isInPrologue: 29 | continue 30 | raise ValueError(str.format( 31 | "Invalid diff line: {0}.", diffLine)) 32 | leftLine = int(match.group("LN")) + lineNumberOffset 33 | rightLine = int(match.group("RN")) + lineNumberOffset 34 | lastAddedRange = lastRemovedRange = None 35 | isInPrologue = False 36 | elif diffLine.startswith(" "): 37 | assert leftLine != None and rightLine != None 38 | leftLine += 1 39 | rightLine += 1 40 | lastAddedRange = lastRemovedRange = None 41 | elif diffLine.startswith("-"): 42 | assert leftLine != None and rightLine != None 43 | if lastRemovedRange: 44 | lastRemovedRange[1] = leftLine 45 | else: 46 | lastRemovedRange = [leftLine, leftLine] 47 | removedRanges.append(lastRemovedRange) 48 | leftLine += 1 49 | elif diffLine.startswith("+"): 50 | assert leftLine != None and rightLine != None 51 | if lastAddedRange: 52 | lastAddedRange[1] = rightLine 53 | else: 54 | lastAddedRange = [rightLine, rightLine] 55 | addedRanges.append(lastAddedRange) 56 | rightLine += 1 57 | elif diffLine.startswith("\\"): 58 | # \ No newline at end of file 59 | # Do nothing. We ignore blank lines. 60 | pass 61 | else: 62 | if isInPrologue: 63 | continue 64 | raise ValueError(str.format("Invalid diff line: {0}.", diffLine)) 65 | return addedRanges, removedRanges 66 | -------------------------------------------------------------------------------- /persper/analytics/call_graph/cpp.py: -------------------------------------------------------------------------------- 1 | 2 | ns = {'srcml': 'http://www.srcML.org/srcML/src', 'pos': 'http://www.srcML.org/srcML/position'} 3 | 4 | def get_func_ranges_cpp(root): 5 | func_ranges, func_names = [], [] 6 | for func_node in root.xpath('./srcml:constructor | ./srcml:function', namespaces=ns): 7 | 8 | func_name, start_line, end_line = handle_function(func_node) 9 | if not (func_name and start_line and end_line): 10 | continue 11 | 12 | func_ranges.append([start_line, end_line]) 13 | func_names.append(func_name) 14 | return func_names, func_ranges 15 | 16 | def handle_name(name_node): 17 | func_id, line = None, None 18 | if name_node != None: 19 | if name_node.text: 20 | func_id = name_node.text 21 | line = int(name_node.attrib['{http://www.srcML.org/srcML/position}line']) 22 | else: 23 | try: 24 | # alternative solution is to use 25 | # graphs.call_graph.utils.transform_node_to_src 26 | class_name = name_node[0].text 27 | line = int(name_node[0].attrib['{http://www.srcML.org/srcML/position}line']) 28 | assert(name_node[1].text == "::") 29 | func_name = name_node[2].text 30 | func_id = "{}::{}".format(class_name, func_name) 31 | except: 32 | import pdb 33 | pdb.set_trace() 34 | return func_id, line 35 | 36 | def handle_function(func_node): 37 | 38 | name_node = func_node.find('srcml:name', ns) 39 | func_id, start_line = handle_name(name_node) 40 | if not func_id or not start_line: 41 | print('Function name/start not found!') 42 | return None, None, None 43 | 44 | block_node = func_node.find('srcml:block', ns) 45 | try: 46 | pos_node = block_node.find('pos:position', ns) 47 | end_line = int(pos_node.attrib['{http://www.srcML.org/srcML/position}line']) 48 | except: 49 | return func_id, None, None 50 | 51 | return func_id, start_line, end_line 52 | -------------------------------------------------------------------------------- /persper/analytics/call_graph/utils.py: -------------------------------------------------------------------------------- 1 | ns = {'srcml': 'http://www.srcML.org/srcML/src', 2 | 'pos': 'http://www.srcML.org/srcML/position'} 3 | 4 | line_attr = '{http://www.srcML.org/srcML/position}line' 5 | 6 | 7 | def transform_node_to_src(node, s=None): 8 | """Print out the source code of a xml node""" 9 | if s is None: 10 | s = "" 11 | if node.text: 12 | s += node.text 13 | for child in node: 14 | s = transform_node_to_src(child, s) 15 | if node.tail: 16 | s += node.tail 17 | return s 18 | 19 | 20 | def remove_edges_of_node(G, n, in_edges=True, out_edges=True): 21 | """Remove edges of n, but keep the node itself in the graph 22 | 23 | >>> G3 = nx.DiGraph() 24 | >>> G3.add_path([0, 1, 2, 3, 4]) 25 | >>> remove_edges_of_node(G3, 2) 26 | >>> G3.nodes() 27 | [0, 1, 2, 3, 4] 28 | >>> G3.edges() 29 | [(0, 1), (3, 4)] 30 | 31 | """ 32 | try: 33 | nbrs = G._succ[n] 34 | except KeyError: # NetworkXError if not in self 35 | # raise NetworkXError("The node %s is not in the digraph."%(n, )) 36 | print("The node %s is not in the digraph." % n) 37 | return 38 | if out_edges: 39 | for u in nbrs: 40 | del G._pred[u][n] 41 | G._succ[n] = {} 42 | if in_edges: 43 | for u in G._pred[n]: 44 | del G._succ[u][n] 45 | G._pred[n] = {} 46 | -------------------------------------------------------------------------------- /persper/analytics/commit_classifier.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from git import Commit, DiffIndex, Repo 3 | 4 | 5 | class CommitClassifier(ABC): 6 | """ 7 | Defines the interface of any commit classifier 8 | """ 9 | 10 | @abstractmethod 11 | def predict(self, commit: Commit, diff_index: DiffIndex, repo: Repo): 12 | """ 13 | Args: 14 | commit: A gitpython's Commit object. 15 | diff_index: A gitpython's DiffIndex object. 16 | It is a list of Diff object, each containing the 17 | diff information between a pair of old/new source files. 18 | repo: A gitpython's Repo object. 19 | 20 | 21 | Returns: 22 | A list, representing the probability distribution of each label 23 | """ 24 | pass 25 | 26 | @property 27 | @abstractmethod 28 | def labels(self): 29 | """ 30 | Returns: 31 | A list of label (str), 32 | in the same order as `predict` method's output. 33 | """ 34 | pass 35 | -------------------------------------------------------------------------------- /persper/analytics/complexity.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict, List 3 | 4 | import numpy as np 5 | from networkx import DiGraph 6 | 7 | _logger = logging.getLogger(__file__) 8 | 9 | 10 | def eval_project_complexity(G: DiGraph, r_n: float, r_e: float): 11 | """ 12 | Evaluates project complexity from the specified bare call commit graph. 13 | remarks 14 | The formula is 15 | complexity = sum_by_node(added_units + removed_units) + r_n*len(nodes) + r_e*len(edges) 16 | """ 17 | logical_units = 0 18 | useFallback = None 19 | for _, data in G.nodes(data=True): 20 | added = 0 21 | removed = 0 22 | for _, v in data["history"].items(): 23 | if useFallback == None: 24 | useFallback = not "added_units" in v 25 | if useFallback: 26 | _logger.warning( 27 | "Will use LOC instead of logic units to measure complexity.") 28 | if useFallback: 29 | added += v["adds"] 30 | removed += v["dels"] 31 | else: 32 | added += v["added_units"] 33 | removed += v["removed_units"] 34 | logical_units += added + removed 35 | complexity = logical_units + r_n*len(G.nodes) + r_e*len(G.edges) 36 | return complexity 37 | -------------------------------------------------------------------------------- /persper/analytics/cpp.py: -------------------------------------------------------------------------------- 1 | from persper.analytics.c import CGraphServer 2 | 3 | CPPGraphServer = CGraphServer 4 | -------------------------------------------------------------------------------- /persper/analytics/devrank.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import linalg as LA 3 | from scipy.sparse import coo_matrix 4 | 5 | 6 | def devrank(G, weight_label, alpha=0.85, epsilon=1e-5, max_iters=300): 7 | """Memory efficient DevRank using scipy.sparse 8 | 9 | Args: 10 | G - A nx.Digraph object. 11 | weight_label - A string, each node in graph should have this attribute. 12 | - It will be used as the weight of each node. 13 | alpha - A float between 0 and 1, DevRank's damping factor. 14 | epsilon - A float. 15 | max_iters - An integer, specify max number of iterations to run. 16 | 17 | Returns: 18 | A dict with node names being keys and DevRanks being values. 19 | """ 20 | ni = {} 21 | for i, u in enumerate(G): 22 | ni[u] = i 23 | 24 | def sizeof(u): 25 | return G.node[u][weight_label] 26 | 27 | num_nodes = len(G.nodes()) 28 | row, col, data = [], [], [] 29 | for u in G: 30 | size_sum = 0 31 | for v in G[u]: 32 | size_sum += sizeof(v) 33 | for v in G[u]: 34 | row.append(ni[v]) 35 | col.append(ni[u]) 36 | data.append(sizeof(v) / size_sum) 37 | 38 | P = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsr() 39 | 40 | universe_size = 0 41 | for u in G: 42 | universe_size += sizeof(u) 43 | 44 | p = np.empty(num_nodes) 45 | for u in G: 46 | p[ni[u]] = sizeof(u) / universe_size 47 | 48 | v = np.ones(num_nodes) / num_nodes 49 | 50 | for i in range(max_iters): 51 | new_v = alpha * P.dot(v) 52 | gamma = LA.norm(v, 1) - LA.norm(new_v, 1) 53 | new_v += gamma * p 54 | delta = LA.norm(new_v - v, 1) 55 | if delta < epsilon: 56 | break 57 | v = new_v 58 | 59 | dr = {} 60 | for u in G: 61 | dr[u] = v[ni[u]] 62 | 63 | return dr 64 | -------------------------------------------------------------------------------- /persper/analytics/error.py: -------------------------------------------------------------------------------- 1 | 2 | class Error(Exception): 3 | """Base class for other errors""" 4 | pass 5 | 6 | 7 | class GraphServerError(Error): 8 | """Base class for all `GraphServer` errors""" 9 | pass 10 | 11 | 12 | class UnexpectedASTError(GraphServerError): 13 | """Raise for unexpected ast structure""" 14 | pass 15 | -------------------------------------------------------------------------------- /persper/analytics/git_tools.py: -------------------------------------------------------------------------------- 1 | from git.exc import InvalidGitRepositoryError, NoSuchPathError 2 | from git import Repo, Commit 3 | from typing import Union 4 | import sys 5 | import git 6 | import codecs 7 | 8 | EMPTY_TREE_SHA = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' 9 | 10 | 11 | def diff_with_first_parent(repo: Repo, commit: Commit): 12 | if len(commit.parents) == 0: 13 | return diff_with_commit(repo, commit, None) 14 | else: 15 | return diff_with_commit(repo, commit, commit.parents[0]) 16 | 17 | 18 | def diff_with_commit(repo: Repo, current_commit: Commit, base_commit_sha: str): 19 | # about git.NULL_TREE: https://github.com/gitpython-developers/GitPython/blob/master/git/diff.py#L87 20 | if current_commit is None: 21 | current_commit = git.NULL_TREE 22 | if base_commit_sha is None: 23 | base_commit = repo.tree(EMPTY_TREE_SHA) 24 | else: 25 | base_commit = repo.commit(base_commit_sha) 26 | return base_commit.diff(current_commit, create_patch=True, indent_heuristic=True, 27 | ignore_blank_lines=True, ignore_space_change=True) 28 | 29 | 30 | def initialize_repo(repo_path): 31 | try: 32 | repo = Repo(repo_path) 33 | except InvalidGitRepositoryError as e: 34 | print("Invalid Git Repository!") 35 | sys.exit(-1) 36 | except NoSuchPathError as e: 37 | print("No such path error!") 38 | sys.exit(-1) 39 | return repo 40 | 41 | 42 | def get_contents(repo, commit, path): 43 | """Get contents of a path within a specific commit""" 44 | if type(commit) == Commit: 45 | commit = commit.hexsha 46 | byte_str = repo.git.show('{}:{}'.format(commit, path), stdout_as_string=False) 47 | # default utf-8 48 | encoding = 'utf-8' 49 | # the following code is from: https://github.com/chardet/chardet/blob/master/chardet/universaldetector.py#L137 50 | # encoding names are from here: https://docs.python.org/3/library/codecs.html 51 | if byte_str.startswith(codecs.BOM_UTF8): 52 | # EF BB BF UTF-8 with BOM 53 | encoding = 'utf-8-sig' 54 | elif byte_str.startswith(codecs.BOM_UTF32_LE): 55 | # FF FE 00 00 UTF-32, little-endian BOM 56 | encoding = 'utf-32-le' 57 | elif byte_str.startswith(codecs.BOM_UTF32_BE): 58 | # 00 00 FE FF UTF-32, big-endian BOM 59 | encoding = 'utf-32-be' 60 | elif byte_str.startswith(codecs.BOM_LE): 61 | # FF FE UTF-16, little endian BOM 62 | encoding = 'utf-16-le' 63 | elif byte_str.startswith(codecs.BOM_BE): 64 | # FE FF UTF-16, big endian BOM 65 | encoding = 'utf-16-be' 66 | return byte_str.decode(encoding=encoding, errors='replace') 67 | -------------------------------------------------------------------------------- /persper/analytics/inverse_diff.py: -------------------------------------------------------------------------------- 1 | 2 | def inverse_diff(adds, dels): 3 | """ 4 | >>> adds = [[11, 1], [32, 1]] 5 | >>> dels = [[11, 11], [31, 32]] 6 | >>> _inverse_diff_result(adds, dels) 7 | ([[10, 1], [30, 2]], [[11, 11], [31, 31]]) 8 | """ 9 | diff = 0 10 | add_ptr, del_ptr = 0, 0 11 | num_adds, num_dels = len(adds), len(dels) 12 | inv_adds, inv_dels = [], [] 13 | 14 | def _handle_a(a): 15 | nonlocal diff 16 | inv_dels.append([diff + a[0] + 1, diff + a[0] + a[1]]) 17 | diff += a[1] 18 | 19 | def _handle_d(d): 20 | nonlocal diff 21 | inv_adds.append([diff + d[0] - 1, d[1] - d[0] + 1]) 22 | diff -= (d[1] - d[0] + 1) 23 | 24 | while add_ptr < num_adds or del_ptr < num_dels: 25 | if add_ptr < num_adds and del_ptr < num_dels: 26 | if adds[add_ptr][0] < dels[del_ptr][0]: 27 | _handle_a(adds[add_ptr]) 28 | add_ptr += 1 29 | else: 30 | _handle_d(dels[del_ptr]) 31 | del_ptr += 1 32 | elif add_ptr < num_adds and del_ptr >= num_dels: 33 | # we have finished dels 34 | _handle_a(adds[add_ptr]) 35 | add_ptr += 1 36 | else: 37 | # we have finished adds 38 | _handle_d(dels[del_ptr]) 39 | del_ptr += 1 40 | 41 | return inv_adds, inv_dels 42 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/README.md: -------------------------------------------------------------------------------- 1 | # lsp_graph_server 2 | 3 | To try out the graph server backed by LSP, especially the [ccls](https://github.com/MaskRay/ccls)-based one, you need 4 | * Compile [MaskRay/ccls](https://github.com/MaskRay/ccls), the customized ccls fork for graph server. 5 | 6 | * Place the compiled binary under `/bin` folder of the repository root, i.e. `/bin/ccls` or `/bin/ccls.exe`. 7 | 8 | ## Work with notebook 9 | 10 | * In the repository root, run `pipenv run ./tools/repo_creater/create_repo.py test/cpp_test_repo/` to create a cpp test repo. 11 | 12 | * `jupyter notebook`, then open `notebooks/lsp-ccls.ipynb` 13 | 14 | * Execute all the cells 15 | 16 | ## Work with unit tests 17 | 18 | * Open a shell under `/test/test_analytics`, run 19 | 20 | ```powershell 21 | # run all of the tests 22 | pipenv run pytest test_analyzer_lsp_ccls.py 23 | # or run a single test 24 | pipenv run pytest test_analyzer_lsp_ccls.py::testFeatureBranch 25 | ``` 26 | 27 | * The test results are compared against baseline (by commit) in `/test/test_analytics/baseline`. 28 | 29 | * If there are assertion errors during testing, you can see the actual run result in `/test/test_analytics/actualdump`. 30 | 31 | ## Current status 32 | 33 | ### C++ (ccls) 34 | 35 | * C++ (ccls) LSP server is written, but its overall analysis speed is slow on large repositories. E.g. on TensorFlow: 36 | ``` 37 | 2019-03-31 01:56:12,998 INFO [__main__] Checkpoint at 8300, 1429.66s, 14.3s/commit; total 649431.64s, 78.2s/commit. 38 | ``` 39 | * It's not stable on certain test cases (e.g. `cpp_test_repo`), sometimes regression may happen. 40 | 41 | ``` 42 | E AssertionError: Extra node: std::RowReader &std::operator>>(std::RowReader &reader, int &rhs). 43 | ``` 44 | 45 | * It relies on ANTLR-generated lexer to recognize the identifier token, on which to perform go to definition operations. This lexer is not so reliable when there is macro present in the file. If certain part of the file is not covered by ANTLR token, we won't perform go to defintion there. 46 | * You may see `jsonrpc.exceptions.JsonRpcInvalidRequest: not indexed` , this is due to ccls's job count reporting fluctuation. We have already wait for job count to become zero after sending 'didOpen' request. 47 | * Perhaps we should keep all the files open when analyzing for the call graph. -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/callgraph/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic data structures for call graph. 3 | """ 4 | import logging 5 | from io import IOBase 6 | from pathlib import Path, PurePath 7 | from typing import Dict, Iterable, List, NamedTuple, Tuple, Type, Union 8 | 9 | from persper.analytics.lsp_graph_server.languageclient.lspcontract import \ 10 | DocumentSymbol, Location, Position, SymbolInformation, SymbolKind, \ 11 | TextDocument, TextDocumentContentChangeEvent 12 | 13 | 14 | _logger = logging.getLogger(__name__) 15 | 16 | 17 | class CallGraphNode(NamedTuple): 18 | name: str 19 | kind: SymbolKind 20 | file: PurePath 21 | pos: Position 22 | length: int 23 | 24 | def __eq__(self, other): 25 | if not isinstance(other, CallGraphNode): 26 | return False 27 | return self.name == other.name and self.file == other.file and self.pos == other.pos and self.length == other.length 28 | 29 | def __hash__(self): 30 | return hash((self.name, self.kind, self.file, self.pos, self.length)) 31 | 32 | 33 | class CallGraphScope(NamedTuple): 34 | name: str 35 | kind: SymbolKind 36 | file: PurePath 37 | startPos: Position 38 | endPos: Position 39 | 40 | def __eq__(self, other): 41 | if not isinstance(other, CallGraphScope): 42 | return False 43 | return self.name == other.name and self.file == other.file and self.startPos == other.startPos \ 44 | and self.endPos == other.endPos 45 | 46 | def __hash__(self): 47 | return hash((self.name, self.kind, self.file, self.startPos, self.endPos)) 48 | 49 | 50 | class CallGraphBranch(NamedTuple): 51 | sourceScope: CallGraphScope 52 | definitionScope: CallGraphScope 53 | sourceToken: CallGraphNode 54 | definitionToken: CallGraphNode 55 | 56 | def __eq__(self, other): 57 | if not isinstance(other, CallGraphBranch): 58 | return False 59 | return self.sourceScope == other.sourceScope and self.definitionScope == other.definitionScope \ 60 | and self.sourceToken == other.sourceToken and self.definitionToken == other.definitionToken 61 | 62 | 63 | class CallGraph(): 64 | 65 | def __init__(self): 66 | self._items = set() 67 | 68 | @property 69 | def items(self): 70 | return self._items 71 | 72 | def add(self, branch: CallGraphBranch): 73 | if not branch.sourceScope: 74 | raise ValueError("branch.sourceScope should not be None.") 75 | if not branch.definitionScope: 76 | raise ValueError("branch.definitionScope should not be None.") 77 | self._items.append(branch) 78 | 79 | def clear(self): 80 | self._items.clear() 81 | 82 | def removeBySourceFiles(self, fileNames: Iterable[PurePath]): 83 | if not isinstance(fileNames, set): 84 | fileNames = set(fileNames) 85 | newItems = [i for i in self._items if i.sourceScope.file not in fileNames] 86 | _logger.info("Removed %d branches by %d files.", len(self._items) - len(newItems), len(fileNames)) 87 | self._items = newItems 88 | 89 | def dump(self, file: IOBase): 90 | for i in self._items: 91 | file.write(str(i)) 92 | file.write("\n") 93 | 94 | def dumpTo(self, fileName: str): 95 | with open(fileName, "wt") as f: 96 | self.dump(f) 97 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/callgraph/adapters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path, PurePath 3 | from typing import Iterable 4 | 5 | from persper.analytics.call_commit_graph import CallCommitGraph 6 | 7 | from . import CallGraph, CallGraphBranch 8 | _logger = logging.getLogger(__name__) 9 | 10 | 11 | class CallCommitGraphSynchronizer(CallGraph): 12 | def __init__(self, callCommitGraph: CallCommitGraph): 13 | super().__init__() 14 | self._callCommitGraph = callCommitGraph 15 | 16 | def add(self, branch: CallGraphBranch): 17 | if branch.sourceScope == branch.definitionScope: 18 | # e.g. variable referernces. 19 | return 20 | if branch.sourceScope is None or branch.definitionScope is None: 21 | _logger.debug("Ignored branch with None scope: %s", branch) 22 | return 23 | # assuming the referenced edges has already been registered, 24 | # or there will be Error 25 | self._callCommitGraph.add_edge(branch.sourceScope.name, branch.definitionScope.name) 26 | 27 | def removeBySourceFiles(self, fileNames: Iterable[PurePath]): 28 | pass 29 | 30 | def clear(self): 31 | self._callCommitGraph.reset() 32 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/jsonrpcutils.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | 4 | from jsonrpc.streams import JsonRpcStreamReader, JsonRpcStreamWriter 5 | 6 | 7 | class JsonRpcLogger(): 8 | def __init__(self, fileName): 9 | self._fileName = fileName 10 | self._file = open(fileName, "wt") 11 | 12 | def logTX(self, message: dict): 13 | self._file.write("{0} < {1}\n".format(datetime.now(), json.dumps(message))) 14 | if message.get("method", None) == "shutdown": 15 | self._file.flush() 16 | 17 | def logRX(self, message: dict): 18 | self._file.write("{0} > {1}\n".format(datetime.now(), json.dumps(message))) 19 | 20 | def __exit__(self, exc_type, exc_value, traceback): 21 | self._file.close() 22 | 23 | 24 | class LoggedJsonRpcStreamReader(JsonRpcStreamReader): 25 | def __init__(self, rfile, logger: JsonRpcLogger): 26 | super().__init__(rfile) 27 | self._logger = logger 28 | 29 | def listen(self, message_consumer): 30 | def wrapper(message): 31 | self._logger.logRX(message) 32 | message_consumer(message) 33 | super().listen(wrapper) 34 | 35 | 36 | class LoggedJsonRpcStreamWriter(JsonRpcStreamWriter): 37 | def __init__(self, wfile, logger: JsonRpcLogger, **json_dumps_args): 38 | super().__init__(wfile, **json_dumps_args) 39 | self._logger = logger 40 | 41 | def write(self, message): 42 | self._logger.logTX(message) 43 | super().write(message) 44 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/languageclient/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/persper/analytics/lsp_graph_server/languageclient/__init__.py -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/languageclient/lspclient.py: -------------------------------------------------------------------------------- 1 | """ 2 | LSP client implementation. 3 | """ 4 | import logging 5 | import threading 6 | 7 | from jsonrpc.dispatchers import MethodDispatcher 8 | from jsonrpc.endpoint import Endpoint 9 | from jsonrpc.streams import JsonRpcStreamReader, JsonRpcStreamWriter 10 | 11 | from persper.analytics.lsp_graph_server.languageclient.lspcontract import MessageType, Registration 12 | from persper.analytics.lsp_graph_server.languageclient.lspserver import LspServerStub 13 | from persper.analytics.lsp_graph_server.jsonrpcutils import LoggedJsonRpcStreamReader, LoggedJsonRpcStreamWriter, JsonRpcLogger 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | 18 | class LspClient(MethodDispatcher): 19 | def __init__(self, rx, tx, logFile: str = None): 20 | super().__init__() 21 | self._rpclogger = JsonRpcLogger(logFile) if logFile else None 22 | self._streamReader = LoggedJsonRpcStreamReader(rx, self._rpclogger) if logFile else JsonRpcStreamReader(rx) 23 | self._streamWriter = LoggedJsonRpcStreamWriter(tx, self._rpclogger) if logFile else JsonRpcStreamWriter(tx) 24 | self._nextJsonRpcMessageId = 0 25 | # Some language server, e.g. cquery, only supports numerical request Ids. 26 | self._endpoint = Endpoint(self, self._streamWriter.write, self.nextJsonRpcMessageId) 27 | self._listenerThread = None 28 | self._shutdownEvent = threading.Event() 29 | self._serverStub = LspServerStub(self._endpoint) 30 | 31 | def nextJsonRpcMessageId(self): 32 | self._nextJsonRpcMessageId += 1 33 | if self._nextJsonRpcMessageId >= 0x7FFFFFFF: 34 | self._nextJsonRpcMessageId = 0 35 | return str(self._nextJsonRpcMessageId) 36 | 37 | def start(self): 38 | self._listenerThread = threading.Thread(target=self._startListener, daemon=True) 39 | self._listenerThread.start() 40 | 41 | def stop(self): 42 | self._endpoint.shutdown() 43 | self._streamReader.close() 44 | self._streamWriter.close() 45 | self._shutdownEvent.set() 46 | self._listenerThread.join(timeout=30) 47 | 48 | def initializeServer(self): 49 | raise NotImplementedError() 50 | 51 | @property 52 | def server(self): 53 | return self._serverStub 54 | 55 | def _startListener(self): 56 | self._streamReader.listen(self._endpoint.consume) 57 | 58 | def m_window__show_message(self, type: MessageType, message: str): 59 | type = MessageType(type) 60 | _logger.info(type, message) 61 | 62 | def m_window__show_message_request(self, type: MessageType, message: str, actions): 63 | type = MessageType(type) 64 | print(type, message, actions) 65 | return actions[0]["title"] 66 | 67 | def m_window__log_message(self, type: MessageType, message: str): 68 | type = MessageType(type) 69 | _logger.info(type, message) 70 | 71 | def m_text_document__publish_diagnostics(self, uri: str, diagnostics): 72 | # ignore all diagnostic information for now. 73 | pass 74 | 75 | def m_client__register_capability(self, registrations: list): 76 | regs = [Registration.fromDict(r) for r in registrations] 77 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | max-line-length = 120 3 | -------------------------------------------------------------------------------- /persper/analytics/lsp_graph_server/wildcards.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | 5 | def translate(pat): 6 | """ 7 | Translate a shell PATTERN to a regular expression. 8 | There is no way to quote meta-characters. 9 | This version can handle **/ pattern properly, compared with fnmatch. 10 | """ 11 | 12 | i, n = 0, len(pat) 13 | res = '' 14 | while i < n: 15 | c = pat[i] 16 | i = i + 1 17 | if c == '*': 18 | if i < n and pat[i] == '*': 19 | res = res + '.*?' 20 | i = i + 1 21 | if i < n and pat[i] == os.sep: 22 | i = i + 1 23 | else: 24 | res = res + r'[^\\/]+' 25 | elif c == '?': 26 | res = res + '.' 27 | elif c == '[': 28 | j = i 29 | if j < n and pat[j] == '!': 30 | j = j + 1 31 | if j < n and pat[j] == ']': 32 | j = j + 1 33 | while j < n and pat[j] != ']': 34 | j = j + 1 35 | if j >= n: 36 | res = res + '\\[' 37 | else: 38 | stuff = pat[i:j].replace('\\', '\\\\') 39 | i = j + 1 40 | if stuff[0] == '!': 41 | stuff = '^' + stuff[1:] 42 | elif stuff[0] == '^': 43 | stuff = '\\' + stuff 44 | res = '%s[%s]' % (res, stuff) 45 | else: 46 | res = res + re.escape(c) 47 | return '(?ms)' + res + '$' 48 | -------------------------------------------------------------------------------- /persper/analytics/pagerank.py: -------------------------------------------------------------------------------- 1 | from numpy import linalg as LA 2 | import numpy as np 3 | from scipy.sparse import coo_matrix 4 | 5 | 6 | def pagerank(G, alpha=0.85, epsilon=1e-5, max_iters=300): 7 | """Memory efficient PageRank using scipy.sparse 8 | This function implements Algo 1. in "A Survey on PageRank Computing" 9 | """ 10 | ni = {} 11 | for i, u in enumerate(G): 12 | ni[u] = i 13 | 14 | num_nodes = len(G.nodes()) 15 | 16 | row, col, data = [], [], [] 17 | for u in G: 18 | num_out_edges = len(G[u]) 19 | if num_out_edges > 0: 20 | w = 1 / num_out_edges 21 | for v in G[u]: 22 | row.append(ni[v]) 23 | col.append(ni[u]) 24 | data.append(w) 25 | 26 | P = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsr() 27 | p = np.ones(num_nodes) / num_nodes 28 | v = np.ones(num_nodes) / num_nodes 29 | 30 | for i in range(max_iters): 31 | new_v = alpha * P.dot(v) 32 | gamma = LA.norm(v, 1) - LA.norm(new_v, 1) 33 | new_v += gamma * p 34 | delta = LA.norm(new_v - v, 1) 35 | if delta < epsilon: 36 | break 37 | v = new_v 38 | 39 | pr = {} 40 | for u in G: 41 | pr[u] = v[ni[u]] 42 | 43 | return pr 44 | -------------------------------------------------------------------------------- /persper/analytics/patch_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | class PatchParser(): 5 | 6 | def __init__(self): 7 | self.re_chunk_header = re.compile(r"""\@\@\s* 8 | \-(?P\d+)(,(?P\d+))?\s* 9 | \+(?P\d+)(,(?P\d+))?\s* 10 | \@\@ 11 | """, re.VERBOSE) 12 | 13 | def clean(self): 14 | self.additions = [] 15 | self.deletions = [] 16 | self.in_add, self.in_del = False, False 17 | self.in_chunk = False 18 | 19 | self.add_start, self.del_start = None, None 20 | self.add_num_lines = None 21 | self.cur = None 22 | 23 | def start_add(self): 24 | self.in_add = True 25 | self.add_start = self.cur - 1 26 | self.add_num_lines = 1 27 | 28 | def start_del(self): 29 | self.in_del = True 30 | self.del_start = self.cur 31 | 32 | def finish_add(self): 33 | self.in_add = False 34 | self.additions.append([self.add_start, self.add_num_lines]) 35 | 36 | def finish_del(self): 37 | self.in_del = False 38 | self.deletions.append([self.del_start, self.cur - 1]) 39 | 40 | def parse(self, text): 41 | self.clean() 42 | for line in text.split('\n'): 43 | line = line.strip() 44 | if not self.in_chunk: 45 | if line.startswith('@@'): 46 | self.in_chunk = True 47 | else: 48 | continue 49 | 50 | if line.startswith('@@'): 51 | m = self.re_chunk_header.search(line) 52 | self.cur = max(int(m.groups()[0]), 1) 53 | elif line.startswith('-'): 54 | # print("in minus") 55 | if self.in_add: 56 | self.finish_add() 57 | self.start_del() 58 | elif self.in_del: 59 | pass 60 | else: 61 | self.start_del() 62 | self.cur += 1 # always increment in minus 63 | elif line.startswith('+'): 64 | # print("in plus") 65 | if self.in_add: 66 | self.add_num_lines += 1 67 | elif self.in_del: 68 | self.finish_del() 69 | self.start_add() 70 | else: 71 | self.start_add() 72 | elif line.startswith('\\'): 73 | # Ignore \No newline at the end of file 74 | pass 75 | else: 76 | # print("in blank") 77 | if self.in_add: 78 | self.finish_add() 79 | elif self.in_del: 80 | self.finish_del() 81 | else: 82 | pass 83 | self.cur += 1 # always increment in blank 84 | 85 | if self.in_add: 86 | self.finish_add() 87 | elif self.in_del: 88 | self.finish_del() 89 | 90 | return self.additions, self.deletions 91 | -------------------------------------------------------------------------------- /persper/analytics/score.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | import numpy as np 3 | 4 | 5 | def normalize(scores: Dict[str, float]) -> Dict[str, float]: 6 | normalized_scores = {} 7 | score_sum = 0 8 | for _, score in scores.items(): 9 | score_sum += score 10 | 11 | for idx in scores: 12 | normalized_scores[idx] = scores[idx] / score_sum 13 | return normalized_scores 14 | 15 | 16 | def commit_overall_scores(commit_devranks: Dict[str, float], 17 | clf_results: Dict[str, List[float]], 18 | label_weights: List[float], 19 | top_one=False, 20 | additive=False) -> Dict[str, float]: 21 | overall_scores = {} 22 | for sha, dr in commit_devranks.items(): 23 | assert sha in clf_results, "Commit %s does not have label." 24 | if top_one: 25 | top_idx = np.argmax(clf_results[sha]) 26 | category_vec = np.zeros(len(label_weights)) 27 | category_vec[top_idx] = 1 28 | else: 29 | category_vec = clf_results[sha] 30 | 31 | if additive: 32 | overall_scores[sha] = np.dot(category_vec, label_weights) + len(commit_devranks) * dr 33 | else: 34 | overall_scores[sha] = np.dot(category_vec, label_weights) * dr 35 | 36 | return normalize(overall_scores) -------------------------------------------------------------------------------- /persper/analytics/write_graph_to_dot.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.cm as cm 3 | 4 | 5 | def map_to_color(x, m): 6 | color_list = [int(255 * c) for c in m.to_rgba(x)] 7 | return '#%02x%02x%02x%02x' % tuple(color_list) 8 | 9 | 10 | def write_G_to_dot_with_pr(G, pr, fname, edge_attrib=None, header_lines=None): 11 | norm = mpl.colors.Normalize(vmin=min(pr.values()), vmax=max(pr.values())) 12 | cmap = cm.Blues 13 | m = cm.ScalarMappable(norm, cmap=cmap) 14 | with open(fname, 'w+') as f: 15 | f.write('digraph graphname {\n') 16 | if header_lines: 17 | for line in header_lines: 18 | f.write(line) 19 | for n in G.nodes(data=False): 20 | color_str = map_to_color(pr[n], m) 21 | f.write( 22 | '\"%s\" [style=filled fillcolor="%s" tooltip=\"%f\"];\n' % 23 | (n, color_str, pr[n])) 24 | for e in G.edges_iter(data=True): 25 | if edge_attrib: 26 | f.write('\"{}\" -> \"{}\" [ label=\"{}\"];\n'.format( 27 | e[0], e[1], ' '.join(e[2][edge_attrib][:10]))) 28 | else: 29 | f.write('\"{}\" -> \"{}\";\n'.format(e[0], e[1])) 30 | 31 | f.write('}') 32 | -------------------------------------------------------------------------------- /persper/analytics2/abstractions/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common data structures and abstractions of code analytics. 3 | """ -------------------------------------------------------------------------------- /persper/analytics2/abstractions/analyzers.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod, abstractproperty 2 | from typing import IO, Iterable, NoReturn 3 | 4 | from aenum import Enum 5 | 6 | from persper.analytics2.abstractions.repository import ICommitInfo 7 | 8 | 9 | class CommitAnalysisStopReason(Enum): 10 | _init_ = "value __doc__" 11 | ReachedTerminalCommit = 0, "Terminal commit has reached." 12 | ReachedMaximumCommits = 1, "Maximum commit analysis count has reached." 13 | FatalError = 2, "An exception occurred during analyzing a commit." 14 | Abort = 3, "User or other external signal aborts the analysis before its completion." 15 | 16 | 17 | class AnalysisStatus(): 18 | """ 19 | An immutable status snapshot of meta analysis. Usually used to provide status information for `IPostAnalyzer`. 20 | """ 21 | 22 | def __init__(self, stop_reason: CommitAnalysisStopReason, exception: Exception, 23 | origin_commit_ref: str, terminal_commit_ref: str, 24 | analyzed_commits_ref: Iterable[str], last_commit_ref: str): 25 | self._stop_reason = stop_reason 26 | self._exception = exception 27 | self._origin_commit_ref = origin_commit_ref 28 | self._terminal_commit_ref = terminal_commit_ref 29 | self._analyzed_commits_ref = analyzed_commits_ref 30 | self._last_commit_ref = last_commit_ref 31 | 32 | @property 33 | def stop_reason(self): 34 | return self._stop_reason 35 | 36 | @property 37 | def exception(self): 38 | """ 39 | Gets the Exception that caused failure of analysis. 40 | """ 41 | return self._exception 42 | 43 | @property 44 | def origin_commit_ref(self): 45 | """ 46 | Gets the commit ref of intended analysis origin. 47 | """ 48 | return self._origin_commit_ref 49 | 50 | @property 51 | def terminal_commit_ref(self): 52 | """ 53 | Gets the commit ref of intended analysis terminal (inclusive). 54 | """ 55 | return self._terminal_commit_ref 56 | 57 | @property 58 | def analyzed_commits_ref(self): 59 | """ 60 | Gets a list of commits that are actually analyzed completely. 61 | remarks 62 | The list will exclude all the commits that are skipped or failed to analyze. 63 | """ 64 | return self._analyzed_commits_ref 65 | 66 | @property 67 | def last_commit_ref(self): 68 | """ 69 | Gets the the last commit ref being analyzed before the analysis stops. 70 | remarks 71 | If there are fatal error analyzing the commit, this member should be the commit that causes the error. 72 | """ 73 | return self._last_commit_ref 74 | 75 | 76 | class ICommitAnalyzer(ABC): 77 | """ 78 | Provides functionality for analyzing a single commit. 79 | remarks 80 | The implementation will decide where to put the analysis result. 81 | """ 82 | @abstractmethod 83 | def analyze(self, commit: ICommitInfo) -> None: 84 | pass 85 | 86 | 87 | class IPostAnalyzer(ABC): 88 | """ 89 | Provides functionality for doing post-analysis after the commit analysis ends due to 90 | completion or exception. 91 | """ 92 | @abstractmethod 93 | def analyze(self, status: AnalysisStatus) -> None: 94 | pass 95 | -------------------------------------------------------------------------------- /persper/analytics2/devrank.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from time import monotonic 3 | from typing import Iterable 4 | 5 | from persper.analytics2.abstractions.analyzers import (AnalysisStatus, 6 | ICommitAnalyzer, 7 | IPostAnalyzer) 8 | from persper.analytics2.abstractions.callcommitgraph import ( 9 | IGraphServer, IReadOnlyCallCommitGraph, IWriteOnlyCallCommitGraph) 10 | from persper.analytics2.abstractions.repository import ICommitInfo 11 | 12 | _logger = logging.getLogger(__file__) 13 | 14 | 15 | class CallCommitGraphAnalyzer(ICommitAnalyzer): 16 | def __init__(self, graph_servers: Iterable[IGraphServer], call_commit_graph: IWriteOnlyCallCommitGraph): 17 | assert graph_servers 18 | assert call_commit_graph 19 | self._graph_servers = list(graph_servers) 20 | # We only need this for flushing. 21 | # We actually can flush the graph at a later stage. 22 | self._call_commit_graph = call_commit_graph 23 | 24 | def analyze(self, commit: ICommitInfo): 25 | assert commit 26 | for gs in self._graph_servers: 27 | t0 = monotonic() 28 | _logger.info("Analyzing %s with %s...", commit, gs) 29 | assert isinstance(gs, IGraphServer) 30 | gs.update_graph(commit) 31 | _logger.info("%s finished in %.2fs.", gs, monotonic() - t0) 32 | t0 = monotonic() 33 | self._call_commit_graph.flush() 34 | _logger.info("Call commit graph flush used %.2fs.", monotonic() - t0) 35 | 36 | 37 | class DevRankAnalyzer(IPostAnalyzer): 38 | def __init__(self, call_commit_graph: IReadOnlyCallCommitGraph): 39 | assert call_commit_graph 40 | self._call_commit_graph = call_commit_graph 41 | 42 | def analyze(self, status: AnalysisStatus): 43 | # TODO put analysis code here. 44 | pass 45 | -------------------------------------------------------------------------------- /persper/analytics2/setup.cfg: -------------------------------------------------------------------------------- 1 | [pep8] 2 | max-line-length = 120 3 | -------------------------------------------------------------------------------- /persper/analytics2/utilities.py: -------------------------------------------------------------------------------- 1 | from persper.analytics2.abstractions.callcommitgraph import IWriteOnlyCallCommitGraph, NodeId 2 | 3 | 4 | class NodeHistoryAccumulator(): 5 | """ 6 | Provides convenient methods for accumulating node history. 7 | (i.e. the added/removed lines to the same node in a single commit) 8 | """ 9 | 10 | def __init__(self): 11 | # [NodeId]: [added_lines, removed_lines] 12 | self._nodes = {} 13 | 14 | def clear(self): 15 | """ 16 | Clears all the accumulated histroy information contained in this instance. 17 | """ 18 | self._nodes.clear() 19 | 20 | def add(self, node_id: NodeId, added_lines: int = 0, removed_lines: int = 0): 21 | """ 22 | Accumulates the added/removed lines of code to the specific node_id. 23 | """ 24 | info = self._nodes.get(node_id, None) 25 | if info == None: 26 | if not isinstance(node_id, NodeId): 27 | raise ValueError("node_id should be NodeId.") 28 | if not isinstance(added_lines, int): 29 | raise ValueError("added_lines should be int.") 30 | if not isinstance(removed_lines, int): 31 | raise ValueError("removed_lines should be int.") 32 | if added_lines != 0 or removed_lines != 0: 33 | info = [added_lines, removed_lines] 34 | self._nodes[node_id] = info 35 | else: 36 | info[0] += added_lines 37 | info[1] += removed_lines 38 | 39 | def get(self, node_id: NodeId): 40 | """ 41 | Gets the accumulated added/removed lines of code for the specified node ID. 42 | returns 43 | (added_lines: int, removed_lines: int) 44 | """ 45 | info = self._nodes.get(node_id, None) 46 | if info == None: 47 | if not isinstance(node_id, NodeId): 48 | raise ValueError("node_id should be NodeId.") 49 | return 0, 0 50 | return info[0], info[1] 51 | 52 | def apply(self, graph: IWriteOnlyCallCommitGraph, commit_hexsha: str): 53 | """ 54 | Applies the node history contained in this instance to the specified call commit graph. 55 | params 56 | graph: the call commit graph to be updated. 57 | commit_hexsha: When updating the call commit graph, specify the current commit hexsha. 58 | remarks 59 | You may want to call `clear` to reset the change history after calling this method. 60 | """ 61 | for id, (added, removed) in self._nodes: 62 | graph.update_node_history(id, commit_hexsha, added, removed) 63 | -------------------------------------------------------------------------------- /persper/util/bidict.py: -------------------------------------------------------------------------------- 1 | # References 2 | # https://stackoverflow.com/questions/3318625/efficient-bidirectional-hash-table-in-python?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 3 | # https://stackoverflow.com/questions/19855156/whats-the-exact-usage-of-reduce-in-pickler?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 4 | # https://stackoverflow.com/questions/21144845/how-can-i-unpickle-a-subclass-of-dict-that-validates-with-setitem-in-pytho?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 5 | 6 | 7 | class bidict(dict): 8 | def __init__(self, *args, **kwargs): 9 | super(bidict, self).__init__(*args, **kwargs) 10 | self.inverse = {} 11 | for key, value in self.items(): 12 | self.inverse.setdefault(value, []).append(key) 13 | 14 | def __setitem__(self, key, value): 15 | if key in self: 16 | self.inverse[self[key]].remove(key) 17 | super(bidict, self).__setitem__(key, value) 18 | self.inverse.setdefault(value, []).append(key) 19 | 20 | def __delitem__(self, key): 21 | self.inverse.setdefault(self[key], []).remove(key) 22 | if self[key] in self.inverse and not self.inverse[self[key]]: 23 | del self.inverse[self[key]] 24 | super(bidict, self).__delitem__(key) 25 | 26 | def __getstate__(self): 27 | return (self.inverse, dict(self)) 28 | 29 | def __setstate__(self, state): 30 | self.inverse, data = state 31 | self.update(data) 32 | 33 | def __reduce__(self): 34 | return (bidict, (), self.__getstate__()) 35 | -------------------------------------------------------------------------------- /persper/util/linguist.rb: -------------------------------------------------------------------------------- 1 | require 'rugged' 2 | require 'linguist' 3 | require 'json' 4 | 5 | repo_path = ARGV[0] 6 | repo = Rugged::Repository.new(repo_path) 7 | 8 | project = Linguist::Repository.new(repo, repo.head.target_id) 9 | puts JSON.pretty_generate(project.languages) 10 | -------------------------------------------------------------------------------- /persper/util/normalize_score.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | 4 | def normalize_with_coef(scores: Dict[str, float], coef=1.0) -> Dict[str, float]: 5 | normalized_scores = {} 6 | score_sum = 0 7 | for _, score in scores.items(): 8 | score_sum += score 9 | 10 | for idx in scores: 11 | normalized_scores[idx] = scores[idx] / score_sum * coef 12 | 13 | return normalized_scores 14 | -------------------------------------------------------------------------------- /persper/util/path.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def get_parent_dir(path): 5 | return os.path.abspath(os.path.join(path, os.pardir)) 6 | 7 | 8 | root_path = get_parent_dir( 9 | get_parent_dir(os.path.dirname(os.path.abspath(__file__)))) 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | joblib==0.11 2 | nltk==3.2.2 3 | openpyxl==2.3.2 4 | pytest==3.0.7 5 | numpy==1.12.1 6 | networkx==1.11 7 | matplotlib==1.5.3 8 | requests==2.13.0 9 | spacy==1.8.2 10 | scipy==0.18.1 11 | GitPython==2.1.10 12 | lxml==4.2.1 13 | sh==1.12.14 14 | scikit_learn==0.19.1 15 | pydotplus==2.0.2 16 | -------------------------------------------------------------------------------- /setup-linux-ubuntu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo update-locale LANGUAGE="en_US.UTF-8" 4 | sudo update-locale LC_ALL="en_US.UTF-8" 5 | export LC_ALL="en_US.UTF-8" 6 | export LC_CTYPE="en_US.UTF-8" 7 | 8 | sudo apt-get update 9 | sudo apt install -y python3 python3-pip 10 | sudo -H pip3 install --upgrade pip 11 | sudo -H pip3 install lxml networkx numpy scipy gitpython openpyxl 12 | 13 | if [ ! -f misc/.done ]; then 14 | cd misc/ 15 | sudo ./apply_patch.py 16 | touch .done 17 | cd .. 18 | fi 19 | 20 | if [ ! -d ./repos/linux ]; then 21 | git clone https://github.com/torvalds/linux.git ./repos/linux 22 | git -C ./repos/linux checkout v4.10 23 | fi 24 | 25 | if [ ! -f srcML-Ubuntu14.04-64.deb ]; then 26 | wget http://131.123.42.38/lmcrs/beta/srcML-Ubuntu14.04-64.deb 27 | sudo dpkg -i srcML-Ubuntu14.04-64.deb 28 | sudo apt install -y libarchive-dev libcurl3 29 | fi 30 | 31 | if [ ! -d ./repos/linux-4.10-xml/ ]; then 32 | ./graphs/srcml.py ./repos/linux ./repos/linux-4.10-xml/ 33 | fi 34 | 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name = "code-analytics", 5 | version = "0.1b1", 6 | install_requires = ['networkx', 'gitpython', 'numpy', 'scipy', 'requests', 'lxml', 'sklearn', 'nltk', 'openpyxl', 7 | 'pytest', 'pydot', 'ipykernel', 'matplotlib', 'antlr4-python3-runtime', 'python-jsonrpc-server', 'pytest-asyncio', 8 | 'aenum', 'pytest-cov', 'sphinx', 'python-louvain' ] 9 | ) -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | ## pytest 2 | 3 | Our recommended way to run tests is through [pytest](https://docs.pytest.org/en/latest/). 4 | 5 | It should have been installed if you have run `pipenv install`. Otherwise, install pytest with your favorite package manager: 6 | 7 | ```bash 8 | // pip 9 | $ pip install -U pytest 10 | 11 | // or conda 12 | $ conda install pytest 13 | ``` 14 | 15 | ## Run Tests 16 | 17 | To run the entire test suite, simply: 18 | 19 | ``` 20 | cd ${root} 21 | pipenv run pytest -s test/ 22 | ``` 23 | 24 | To test a specific module: 25 | 26 | ``` 27 | pipenv run pytest -s .py 28 | ``` 29 | 30 | To learn more about how pytest detects tests, follow this [link](https://docs.pytest.org/en/latest/goodpractices.html#goodpractices). 31 | 32 | ## Tests that are ignored 33 | 34 | You can ignore certain tests by customizing test collection using `conftest.py`. For details, please see [here](https://docs.pytest.org/en/latest/example/pythoncollection.html#customizing-test-collection). 35 | 36 | Here is a list of tests that are currently ignored: 37 | 38 | 1. `test/test_analytics/test_analyzer_cpp.py` 39 | 2. `test/test_analytics/test_analyzer_lsp_ccls.py` 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/__init__.py -------------------------------------------------------------------------------- /test/cpp_test_files_repo/A/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | 4 | int addition (int a, int b) 5 | { 6 | int r; 7 | r=a+b; 8 | return r; 9 | } 10 | 11 | int main () 12 | { 13 | int z; 14 | z = addition (5,3); 15 | cout << "The result is " << z; 16 | } 17 | -------------------------------------------------------------------------------- /test/cpp_test_files_repo/B/main_renamed.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | 4 | int addition (int a, int b) 5 | { 6 | int r; 7 | r=a+b; 8 | return r; 9 | } 10 | 11 | int main () 12 | { 13 | int z; 14 | z = addition (5,3); 15 | cout << "The result is " << z; 16 | } 17 | -------------------------------------------------------------------------------- /test/cpp_test_files_repo/C/another_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | 4 | void printmessage () 5 | { 6 | cout << "I'm a function!"; 7 | } 8 | 9 | int main () 10 | { 11 | printmessage(); 12 | } 13 | -------------------------------------------------------------------------------- /test/cpp_test_files_repo/C/main_renamed.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace std; 3 | 4 | int addition (int a, int b) 5 | { 6 | int r; 7 | r=a+b; 8 | return r; 9 | } 10 | 11 | int main () 12 | { 13 | int z; 14 | z = addition (5,3); 15 | cout << "The result is " << z; 16 | } 17 | -------------------------------------------------------------------------------- /test/cpp_test_files_repo/cg.dot: -------------------------------------------------------------------------------- 1 | digraph c_test_files_repo { 2 | A -> B -> C ; 3 | } -------------------------------------------------------------------------------- /test/cpp_test_repo/A/Exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // ????????????û???????????? 8 | class Exception : public std::exception 9 | { 10 | std::string msg; 11 | public: 12 | // ??????????????????????? 13 | Exception() : msg("?????????????") {} 14 | // ??????????????????????????? 15 | explicit Exception(const std::string& message) : msg(message) 16 | { 17 | #if _DEBUG 18 | std::cerr << "Exception constructed: " << message << std::endl; 19 | #endif 20 | } 21 | ~Exception() noexcept override { } 22 | const char* what() const noexcept override { return msg.c_str(); } 23 | }; 24 | 25 | // ????????????????????????????????????????? 26 | class InvalidCastException : public Exception 27 | { 28 | public: 29 | InvalidCastException(const std::string& message) : Exception(message) { } 30 | }; 31 | 32 | // ????????????????????????? 33 | class ArgumentException : public Exception 34 | { 35 | static std::string BuildMessage(const std::string& message, const std::string& argumentName) 36 | { 37 | if (argumentName.empty()) return message; 38 | return message + " ????????" + argumentName + "??"; 39 | } 40 | std::string _ArgumentName; 41 | public: 42 | const std::string& ArgumentName() const { return _ArgumentName; } 43 | public: 44 | ArgumentException(const std::string& message) 45 | : Exception(message) { } 46 | ArgumentException(const std::string& message, const std::string& argumentName) 47 | : Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { } 48 | }; 49 | 50 | // ????????????????????????????????????????? 51 | class OperationFailureException : public Exception 52 | { 53 | private: 54 | int _ErrorCode; 55 | public: 56 | explicit OperationFailureException(int errorCode) 57 | : Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode) 58 | { 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /test/cpp_test_repo/A/TextFileParsers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/TextFileParsers.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/A/TextFileParsers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/TextFileParsers.h -------------------------------------------------------------------------------- /test/cpp_test_repo/A/TypeTraits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | template 5 | struct is_flags : std::false_type 6 | { 7 | }; 8 | 9 | template constexpr bool is_flags_v = is_flags::value; 10 | -------------------------------------------------------------------------------- /test/cpp_test_repo/A/Utility.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/Utility.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/A/main.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "TextFileParsers.h" 3 | #include "Utility.h" 4 | 5 | using namespace std; 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | auto ifs = OpenAndValidate("config.txt"); 10 | auto parser = ConfigurationParser(ifs); 11 | cout << parser.GetBool("testBool", false) << endl; 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/cpp_test_repo/A/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/stdafx.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/A/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/stdafx.h -------------------------------------------------------------------------------- /test/cpp_test_repo/B/Exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // ????????????û???????????? 8 | class Exception : public std::exception 9 | { 10 | std::string msg; 11 | public: 12 | // ??????????????????????? 13 | Exception() : msg("?????????????") {} 14 | // ??????????????????????????? 15 | explicit Exception(const std::string& message) : msg(message) 16 | { 17 | #if _DEBUG 18 | std::cerr << "Exception constructed: " << message << std::endl; 19 | #endif 20 | } 21 | ~Exception() noexcept override { } 22 | const char* what() const noexcept override { return msg.c_str(); } 23 | }; 24 | 25 | // ????????????????????????????????????????? 26 | class InvalidCastException : public Exception 27 | { 28 | public: 29 | InvalidCastException(const std::string& message) : Exception(message) { } 30 | }; 31 | 32 | // ????????????????????????? 33 | class ArgumentException : public Exception 34 | { 35 | static std::string BuildMessage(const std::string& message, const std::string& argumentName) 36 | { 37 | if (argumentName.empty()) return message; 38 | return message + " ????????" + argumentName + "??"; 39 | } 40 | std::string _ArgumentName; 41 | public: 42 | const std::string& ArgumentName() const { return _ArgumentName; } 43 | public: 44 | ArgumentException(const std::string& message) 45 | : Exception(message) { } 46 | ArgumentException(const std::string& message, const std::string& argumentName) 47 | : Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { } 48 | }; 49 | 50 | // ????????????????????????????????????????? 51 | class OperationFailureException : public Exception 52 | { 53 | private: 54 | int _ErrorCode; 55 | public: 56 | explicit OperationFailureException(int errorCode) 57 | : Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode) 58 | { 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /test/cpp_test_repo/B/TextFileParsers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/TextFileParsers.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/B/TextFileParsers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/TextFileParsers.h -------------------------------------------------------------------------------- /test/cpp_test_repo/B/TypeTraits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | template 5 | struct is_flags : std::false_type 6 | { 7 | }; 8 | 9 | template constexpr bool is_flags_v = is_flags::value; 10 | -------------------------------------------------------------------------------- /test/cpp_test_repo/B/Utility-1.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/Utility-1.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/B/main.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "TextFileParsers.h" 3 | #include "Utility.h" 4 | 5 | using namespace std; 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | auto ifs = OpenAndValidate("config.txt"); 10 | auto parser = ConfigurationParser(ifs); 11 | cout << parser.GetBool("testBool", false) << endl; 12 | cout << parser.GetDouble("textDouble", 1.23) << endl; 13 | cout << parser.GetString("rawValue", "test") << endl; 14 | exception testException("message"); 15 | ReportException(testException); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /test/cpp_test_repo/B/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/stdafx.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/B/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/stdafx.h -------------------------------------------------------------------------------- /test/cpp_test_repo/C/CppProject1.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 6 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /test/cpp_test_repo/C/Exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // ????????????û???????????? 8 | class Exception : public std::exception 9 | { 10 | std::string msg; 11 | public: 12 | // ??????????????????????? 13 | Exception() : msg("?????????????") {} 14 | // ??????????????????????????? 15 | explicit Exception(const std::string& message) : msg(message) 16 | { 17 | #if _DEBUG 18 | std::cerr << "Exception constructed: " << message << std::endl; 19 | #endif 20 | } 21 | ~Exception() noexcept override { } 22 | const char* what() const noexcept override { return msg.c_str(); } 23 | }; 24 | 25 | // ????????????????????????????????????????? 26 | class InvalidCastException : public Exception 27 | { 28 | public: 29 | InvalidCastException(const std::string& message) : Exception(message) { } 30 | }; 31 | 32 | // ????????????????????????? 33 | class ArgumentException : public Exception 34 | { 35 | static std::string BuildMessage(const std::string& message, const std::string& argumentName) 36 | { 37 | if (argumentName.empty()) return message; 38 | return message + " ????????" + argumentName + "??"; 39 | } 40 | std::string _ArgumentName; 41 | public: 42 | const std::string& ArgumentName() const { return _ArgumentName; } 43 | public: 44 | ArgumentException(const std::string& message) 45 | : Exception(message) { } 46 | ArgumentException(const std::string& message, const std::string& argumentName) 47 | : Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { } 48 | }; 49 | 50 | // ????????????????????????????????????????? 51 | class OperationFailureException : public Exception 52 | { 53 | private: 54 | int _ErrorCode; 55 | public: 56 | explicit OperationFailureException(int errorCode) 57 | : Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode) 58 | { 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /test/cpp_test_repo/C/TextFileParsers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/TextFileParsers.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/C/TextFileParsers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/TextFileParsers.h -------------------------------------------------------------------------------- /test/cpp_test_repo/C/TypeTraits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | template 5 | struct is_flags : std::false_type 6 | { 7 | }; 8 | 9 | template constexpr bool is_flags_v = is_flags::value; 10 | -------------------------------------------------------------------------------- /test/cpp_test_repo/C/Utility.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/Utility.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/C/main.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "TextFileParsers.h" 3 | #include "Utility.h" 4 | 5 | using namespace std; 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | auto ifs = OpenAndValidate("config.txt"); 10 | auto parser = ConfigurationParser(ifs); 11 | cout << parser.GetBool("testBool", false) << endl; 12 | return 0; 13 | } 14 | -------------------------------------------------------------------------------- /test/cpp_test_repo/C/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/stdafx.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/C/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/stdafx.h -------------------------------------------------------------------------------- /test/cpp_test_repo/D/Exceptions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // ????????????û???????????? 8 | class Exception : public std::exception 9 | { 10 | std::string msg; 11 | public: 12 | // ??????????????????????? 13 | Exception() : msg("?????????????") {} 14 | // ??????????????????????????? 15 | explicit Exception(const std::string& message) : msg(message) 16 | { 17 | #if _DEBUG 18 | std::cerr << "Exception constructed: " << message << std::endl; 19 | #endif 20 | } 21 | ~Exception() noexcept override { } 22 | const char* what() const noexcept override { return msg.c_str(); } 23 | }; 24 | 25 | // ????????????????????????????????????????? 26 | class InvalidCastException : public Exception 27 | { 28 | public: 29 | InvalidCastException(const std::string& message) : Exception(message) { } 30 | }; 31 | 32 | // ????????????????????????? 33 | class ArgumentException : public Exception 34 | { 35 | static std::string BuildMessage(const std::string& message, const std::string& argumentName) 36 | { 37 | if (argumentName.empty()) return message; 38 | return message + " ????????" + argumentName + "??"; 39 | } 40 | std::string _ArgumentName; 41 | public: 42 | const std::string& ArgumentName() const { return _ArgumentName; } 43 | public: 44 | ArgumentException(const std::string& message) 45 | : Exception(message) { } 46 | ArgumentException(const std::string& message, const std::string& argumentName) 47 | : Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { } 48 | }; 49 | 50 | // ????????????????????????????????????????? 51 | class OperationFailureException : public Exception 52 | { 53 | private: 54 | int _ErrorCode; 55 | public: 56 | explicit OperationFailureException(int errorCode) 57 | : Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode) 58 | { 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /test/cpp_test_repo/D/TextFileParsers.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/TextFileParsers.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/D/TextFileParsers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/TextFileParsers.h -------------------------------------------------------------------------------- /test/cpp_test_repo/D/TypeTraits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | template 5 | struct is_flags : std::false_type 6 | { 7 | }; 8 | 9 | template constexpr bool is_flags_v = is_flags::value; 10 | -------------------------------------------------------------------------------- /test/cpp_test_repo/D/Utility.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/Utility.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/D/main.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "TextFileParsers.h" 3 | #include "Utility.h" 4 | 5 | using namespace std; 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | auto ifs = ifstream("config.txt"); 10 | string line{}; 11 | getline(ifs, line); 12 | cout << line << endl; 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/cpp_test_repo/D/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/stdafx.cpp -------------------------------------------------------------------------------- /test/cpp_test_repo/D/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/stdafx.h -------------------------------------------------------------------------------- /test/cpp_test_repo/cg.dot: -------------------------------------------------------------------------------- 1 | digraph cpp_test_branch { 2 | A -> B -> C -> D; 3 | } 4 | -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:inspect\.getargspec\(\) is deprecated:DeprecationWarning 4 | -------------------------------------------------------------------------------- /test/test_analytics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_analytics/__init__.py -------------------------------------------------------------------------------- /test/test_analytics/baseline/analyzer_pickling/A.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | } 13 | }, 14 | "size": null 15 | }, 16 | "int str_len(char *string)": { 17 | "files": [ 18 | "main.c" 19 | ], 20 | "history": { 21 | "A": { 22 | "adds": 6, 23 | "dels": 0 24 | } 25 | }, 26 | "size": null 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/analyzer_pickling/B.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | }, 13 | "B": { 14 | "adds": 0, 15 | "dels": 3 16 | } 17 | }, 18 | "size": null 19 | }, 20 | "char *str_append_chr(char *string, char append)": { 21 | "files": [ 22 | "main.c" 23 | ], 24 | "history": { 25 | "B": { 26 | "adds": 3, 27 | "dels": 0 28 | } 29 | }, 30 | "size": null 31 | }, 32 | "int str_equals(char *equal1, char *eqaul2)": { 33 | "files": [ 34 | "main.c" 35 | ], 36 | "history": { 37 | "B": { 38 | "adds": 11, 39 | "dels": 0 40 | } 41 | }, 42 | "size": null 43 | }, 44 | "int str_len(char *string)": { 45 | "files": [ 46 | "main.c" 47 | ], 48 | "history": { 49 | "A": { 50 | "adds": 6, 51 | "dels": 0 52 | } 53 | }, 54 | "size": null 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/analyzer_pickling/C.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "C", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "C", 9 | "weight": null 10 | } 11 | }, 12 | "nodes": { 13 | "char *str_append(char *string, char *append)": { 14 | "files": [ 15 | "main.c" 16 | ], 17 | "history": { 18 | "A": { 19 | "adds": 7, 20 | "dels": 0 21 | }, 22 | "B": { 23 | "adds": 0, 24 | "dels": 3 25 | } 26 | }, 27 | "size": null 28 | }, 29 | "char *str_append_chr(char *string, char append)": { 30 | "files": [ 31 | "main.c" 32 | ], 33 | "history": { 34 | "B": { 35 | "adds": 3, 36 | "dels": 0 37 | }, 38 | "C": { 39 | "adds": 30, 40 | "dels": 4 41 | } 42 | }, 43 | "size": null 44 | }, 45 | "int str_equals(char *equal1, char *eqaul2)": { 46 | "files": [ 47 | "main.c" 48 | ], 49 | "history": { 50 | "B": { 51 | "adds": 11, 52 | "dels": 0 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "int str_len(char *string)": { 58 | "files": [ 59 | "main.c" 60 | ], 61 | "history": { 62 | "A": { 63 | "adds": 6, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/analyzer_pickling/D.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "D", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "D", 9 | "weight": null 10 | }, 11 | "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": { 12 | "addedBy": "G", 13 | "weight": null 14 | }, 15 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": { 16 | "addedBy": "G", 17 | "weight": null 18 | }, 19 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": { 20 | "addedBy": "G", 21 | "weight": null 22 | } 23 | }, 24 | "nodes": { 25 | "char *str_append(char *string, char *append)": { 26 | "files": [ 27 | "main.c" 28 | ], 29 | "history": { 30 | "A": { 31 | "adds": 7, 32 | "dels": 0 33 | }, 34 | "B": { 35 | "adds": 0, 36 | "dels": 3 37 | } 38 | }, 39 | "size": null 40 | }, 41 | "char *str_append_chr(char *string, char append)": { 42 | "files": [ 43 | "main.c" 44 | ], 45 | "history": { 46 | "B": { 47 | "adds": 3, 48 | "dels": 0 49 | }, 50 | "C": { 51 | "adds": 30, 52 | "dels": 4 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "char *str_replace(char *search, char *replace, char *subject)": { 58 | "files": [ 59 | "feature-G.c" 60 | ], 61 | "history": { 62 | "G": { 63 | "adds": 26, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | }, 69 | "int str_equals(char *equal1, char *eqaul2)": { 70 | "files": [ 71 | "main.c" 72 | ], 73 | "history": { 74 | "B": { 75 | "adds": 11, 76 | "dels": 0 77 | } 78 | }, 79 | "size": null 80 | }, 81 | "int str_len(char *string)": { 82 | "files": [ 83 | "main.c" 84 | ], 85 | "history": { 86 | "A": { 87 | "adds": 6, 88 | "dels": 0 89 | } 90 | }, 91 | "size": null 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/analyzer_pickling/G.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "C", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "C", 9 | "weight": null 10 | }, 11 | "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": { 12 | "addedBy": "G", 13 | "weight": null 14 | }, 15 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": { 16 | "addedBy": "G", 17 | "weight": null 18 | }, 19 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": { 20 | "addedBy": "G", 21 | "weight": null 22 | } 23 | }, 24 | "nodes": { 25 | "char *str_append(char *string, char *append)": { 26 | "files": [ 27 | "main.c" 28 | ], 29 | "history": { 30 | "A": { 31 | "adds": 7, 32 | "dels": 0 33 | }, 34 | "B": { 35 | "adds": 0, 36 | "dels": 3 37 | } 38 | }, 39 | "size": null 40 | }, 41 | "char *str_append_chr(char *string, char append)": { 42 | "files": [ 43 | "main.c" 44 | ], 45 | "history": { 46 | "B": { 47 | "adds": 3, 48 | "dels": 0 49 | }, 50 | "C": { 51 | "adds": 30, 52 | "dels": 4 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "char *str_replace(char *search, char *replace, char *subject)": { 58 | "files": [ 59 | "feature-G.c" 60 | ], 61 | "history": { 62 | "G": { 63 | "adds": 26, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | }, 69 | "int str_equals(char *equal1, char *eqaul2)": { 70 | "files": [ 71 | "main.c" 72 | ], 73 | "history": { 74 | "B": { 75 | "adds": 11, 76 | "dels": 0 77 | } 78 | }, 79 | "size": null 80 | }, 81 | "int str_len(char *string)": { 82 | "files": [ 83 | "main.c" 84 | ], 85 | "history": { 86 | "A": { 87 | "adds": 6, 88 | "dels": 0 89 | } 90 | }, 91 | "size": null 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch/A.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | } 13 | }, 14 | "size": null 15 | }, 16 | "int str_len(char *string)": { 17 | "files": [ 18 | "main.c" 19 | ], 20 | "history": { 21 | "A": { 22 | "adds": 6, 23 | "dels": 0 24 | } 25 | }, 26 | "size": null 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch/B.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | }, 13 | "B": { 14 | "adds": 0, 15 | "dels": 3 16 | } 17 | }, 18 | "size": null 19 | }, 20 | "char *str_append_chr(char *string, char append)": { 21 | "files": [ 22 | "main.c" 23 | ], 24 | "history": { 25 | "B": { 26 | "adds": 3, 27 | "dels": 0 28 | } 29 | }, 30 | "size": null 31 | }, 32 | "int str_equals(char *equal1, char *eqaul2)": { 33 | "files": [ 34 | "main.c" 35 | ], 36 | "history": { 37 | "B": { 38 | "adds": 11, 39 | "dels": 0 40 | } 41 | }, 42 | "size": null 43 | }, 44 | "int str_len(char *string)": { 45 | "files": [ 46 | "main.c" 47 | ], 48 | "history": { 49 | "A": { 50 | "adds": 6, 51 | "dels": 0 52 | } 53 | }, 54 | "size": null 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch/C.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "C", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "C", 9 | "weight": null 10 | } 11 | }, 12 | "nodes": { 13 | "char *str_append(char *string, char *append)": { 14 | "files": [ 15 | "main.c" 16 | ], 17 | "history": { 18 | "A": { 19 | "adds": 7, 20 | "dels": 0 21 | }, 22 | "B": { 23 | "adds": 0, 24 | "dels": 3 25 | } 26 | }, 27 | "size": null 28 | }, 29 | "char *str_append_chr(char *string, char append)": { 30 | "files": [ 31 | "main.c" 32 | ], 33 | "history": { 34 | "B": { 35 | "adds": 3, 36 | "dels": 0 37 | }, 38 | "C": { 39 | "adds": 30, 40 | "dels": 4 41 | } 42 | }, 43 | "size": null 44 | }, 45 | "int str_equals(char *equal1, char *eqaul2)": { 46 | "files": [ 47 | "main.c" 48 | ], 49 | "history": { 50 | "B": { 51 | "adds": 11, 52 | "dels": 0 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "int str_len(char *string)": { 58 | "files": [ 59 | "main.c" 60 | ], 61 | "history": { 62 | "A": { 63 | "adds": 6, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch/D.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "D", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "D", 9 | "weight": null 10 | }, 11 | "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": { 12 | "addedBy": "G", 13 | "weight": null 14 | }, 15 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": { 16 | "addedBy": "G", 17 | "weight": null 18 | }, 19 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": { 20 | "addedBy": "G", 21 | "weight": null 22 | } 23 | }, 24 | "nodes": { 25 | "char *str_append(char *string, char *append)": { 26 | "files": [ 27 | "main.c" 28 | ], 29 | "history": { 30 | "A": { 31 | "adds": 7, 32 | "dels": 0 33 | }, 34 | "B": { 35 | "adds": 0, 36 | "dels": 3 37 | } 38 | }, 39 | "size": null 40 | }, 41 | "char *str_append_chr(char *string, char append)": { 42 | "files": [ 43 | "main.c" 44 | ], 45 | "history": { 46 | "B": { 47 | "adds": 3, 48 | "dels": 0 49 | }, 50 | "C": { 51 | "adds": 30, 52 | "dels": 4 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "char *str_replace(char *search, char *replace, char *subject)": { 58 | "files": [ 59 | "feature-G.c" 60 | ], 61 | "history": { 62 | "G": { 63 | "adds": 26, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | }, 69 | "int str_equals(char *equal1, char *eqaul2)": { 70 | "files": [ 71 | "main.c" 72 | ], 73 | "history": { 74 | "B": { 75 | "adds": 11, 76 | "dels": 0 77 | } 78 | }, 79 | "size": null 80 | }, 81 | "int str_len(char *string)": { 82 | "files": [ 83 | "main.c" 84 | ], 85 | "history": { 86 | "A": { 87 | "adds": 6, 88 | "dels": 0 89 | } 90 | }, 91 | "size": null 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch/G.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "C", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "C", 9 | "weight": null 10 | }, 11 | "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": { 12 | "addedBy": "G", 13 | "weight": null 14 | }, 15 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": { 16 | "addedBy": "G", 17 | "weight": null 18 | }, 19 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": { 20 | "addedBy": "G", 21 | "weight": null 22 | } 23 | }, 24 | "nodes": { 25 | "char *str_append(char *string, char *append)": { 26 | "files": [ 27 | "main.c" 28 | ], 29 | "history": { 30 | "A": { 31 | "adds": 7, 32 | "dels": 0 33 | }, 34 | "B": { 35 | "adds": 0, 36 | "dels": 3 37 | } 38 | }, 39 | "size": null 40 | }, 41 | "char *str_append_chr(char *string, char append)": { 42 | "files": [ 43 | "main.c" 44 | ], 45 | "history": { 46 | "B": { 47 | "adds": 3, 48 | "dels": 0 49 | }, 50 | "C": { 51 | "adds": 30, 52 | "dels": 4 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "char *str_replace(char *search, char *replace, char *subject)": { 58 | "files": [ 59 | "feature-G.c" 60 | ], 61 | "history": { 62 | "G": { 63 | "adds": 26, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | }, 69 | "int str_equals(char *equal1, char *eqaul2)": { 70 | "files": [ 71 | "main.c" 72 | ], 73 | "history": { 74 | "B": { 75 | "adds": 11, 76 | "dels": 0 77 | } 78 | }, 79 | "size": null 80 | }, 81 | "int str_len(char *string)": { 82 | "files": [ 83 | "main.c" 84 | ], 85 | "history": { 86 | "A": { 87 | "adds": 6, 88 | "dels": 0 89 | } 90 | }, 91 | "size": null 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch_first_parent/A.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | } 13 | }, 14 | "size": null 15 | }, 16 | "int str_len(char *string)": { 17 | "files": [ 18 | "main.c" 19 | ], 20 | "history": { 21 | "A": { 22 | "adds": 6, 23 | "dels": 0 24 | } 25 | }, 26 | "size": null 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch_first_parent/B.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": {}, 3 | "nodes": { 4 | "char *str_append(char *string, char *append)": { 5 | "files": [ 6 | "main.c" 7 | ], 8 | "history": { 9 | "A": { 10 | "adds": 7, 11 | "dels": 0 12 | }, 13 | "B": { 14 | "adds": 0, 15 | "dels": 3 16 | } 17 | }, 18 | "size": null 19 | }, 20 | "char *str_append_chr(char *string, char append)": { 21 | "files": [ 22 | "main.c" 23 | ], 24 | "history": { 25 | "B": { 26 | "adds": 3, 27 | "dels": 0 28 | } 29 | }, 30 | "size": null 31 | }, 32 | "int str_equals(char *equal1, char *eqaul2)": { 33 | "files": [ 34 | "main.c" 35 | ], 36 | "history": { 37 | "B": { 38 | "adds": 11, 39 | "dels": 0 40 | } 41 | }, 42 | "size": null 43 | }, 44 | "int str_len(char *string)": { 45 | "files": [ 46 | "main.c" 47 | ], 48 | "history": { 49 | "A": { 50 | "adds": 6, 51 | "dels": 0 52 | } 53 | }, 54 | "size": null 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch_first_parent/C.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "C", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "C", 9 | "weight": null 10 | } 11 | }, 12 | "nodes": { 13 | "char *str_append(char *string, char *append)": { 14 | "files": [ 15 | "main.c" 16 | ], 17 | "history": { 18 | "A": { 19 | "adds": 7, 20 | "dels": 0 21 | }, 22 | "B": { 23 | "adds": 0, 24 | "dels": 3 25 | } 26 | }, 27 | "size": null 28 | }, 29 | "char *str_append_chr(char *string, char append)": { 30 | "files": [ 31 | "main.c" 32 | ], 33 | "history": { 34 | "B": { 35 | "adds": 3, 36 | "dels": 0 37 | }, 38 | "C": { 39 | "adds": 30, 40 | "dels": 4 41 | } 42 | }, 43 | "size": null 44 | }, 45 | "int str_equals(char *equal1, char *eqaul2)": { 46 | "files": [ 47 | "main.c" 48 | ], 49 | "history": { 50 | "B": { 51 | "adds": 11, 52 | "dels": 0 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "int str_len(char *string)": { 58 | "files": [ 59 | "main.c" 60 | ], 61 | "history": { 62 | "A": { 63 | "adds": 6, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /test/test_analytics/baseline/feature_branch_first_parent/D.g.json: -------------------------------------------------------------------------------- 1 | { 2 | "edges": { 3 | "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": { 4 | "addedBy": "D", 5 | "weight": null 6 | }, 7 | "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": { 8 | "addedBy": "D", 9 | "weight": null 10 | }, 11 | "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": { 12 | "addedBy": "D", 13 | "weight": null 14 | }, 15 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": { 16 | "addedBy": "D", 17 | "weight": null 18 | }, 19 | "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": { 20 | "addedBy": "D", 21 | "weight": null 22 | } 23 | }, 24 | "nodes": { 25 | "char *str_append(char *string, char *append)": { 26 | "files": [ 27 | "main.c" 28 | ], 29 | "history": { 30 | "A": { 31 | "adds": 7, 32 | "dels": 0 33 | }, 34 | "B": { 35 | "adds": 0, 36 | "dels": 3 37 | } 38 | }, 39 | "size": null 40 | }, 41 | "char *str_append_chr(char *string, char append)": { 42 | "files": [ 43 | "main.c" 44 | ], 45 | "history": { 46 | "B": { 47 | "adds": 3, 48 | "dels": 0 49 | }, 50 | "C": { 51 | "adds": 30, 52 | "dels": 4 53 | } 54 | }, 55 | "size": null 56 | }, 57 | "char *str_replace(char *search, char *replace, char *subject)": { 58 | "files": [ 59 | "feature-G.c" 60 | ], 61 | "history": { 62 | "D": { 63 | "adds": 26, 64 | "dels": 0 65 | } 66 | }, 67 | "size": null 68 | }, 69 | "int str_equals(char *equal1, char *eqaul2)": { 70 | "files": [ 71 | "main.c" 72 | ], 73 | "history": { 74 | "B": { 75 | "adds": 11, 76 | "dels": 0 77 | } 78 | }, 79 | "size": null 80 | }, 81 | "int str_len(char *string)": { 82 | "files": [ 83 | "main.c" 84 | ], 85 | "history": { 86 | "A": { 87 | "adds": 6, 88 | "dels": 0 89 | } 90 | }, 91 | "size": null 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /test/test_analytics/conftest.py: -------------------------------------------------------------------------------- 1 | 2 | collect_ignore = ["test_analyzer_cpp.py", "test_analyzer_lsp_ccls.py"] 3 | -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example2.patch: -------------------------------------------------------------------------------- 1 | @@ -1 +1,7 @@ 2 | +/* 3 | + * linux/lib/errno.c 4 | + * 5 | + * (C) 1991 Linus Torvalds 6 | + */ 7 | + 8 | int errno; 9 | -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example3.patch: -------------------------------------------------------------------------------- 1 | @@ -6,12 +6,27 @@ int str_len(char *string) 2 | return count - string; 3 | } 4 | 5 | -/* added in A*/ 6 | -char* str_append(char* string, char* append) { 7 | +/* str_append is deleted in B */ 8 | + 9 | +/* added in B */ 10 | +char* str_append_chr(char* string, char append) { 11 | char* newstring = NULL; 12 | - size_t needed = snprintf(NULL, 0, "%s%s", string, append); 13 | + size_t needed = snprintf(NULL, 0, "%s%c", string, append); 14 | newstring = malloc(needed); 15 | - sprintf(newstring, "%s%s", string, append); 16 | + sprintf(newstring, "%s%c", string, append); 17 | return newstring; 18 | } 19 | 20 | +/* added in B */ 21 | +int str_equals(char *equal1, char *eqaul2) 22 | +{ 23 | + while(*equal1==*eqaul2) 24 | + { 25 | + if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 26 | + equal1++; 27 | + eqaul2++; 28 | + } 29 | + if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 30 | + else {return -1}; 31 | +} 32 | + -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example4.patch: -------------------------------------------------------------------------------- 1 | @@ -0,0 +1,27 @@ 2 | +/* added in G */ 3 | +char* str_replace(char* search, char* replace, char* subject) { 4 | + char* newstring = ""; 5 | + int i = 0; 6 | + for(i = 0; i < str_len(subject); i++) { 7 | + if (subject[i] == search[0]) { 8 | + int e = 0; 9 | + char* calc = ""; 10 | + for(e = 0; e < str_len(search); e++) { 11 | + if(subject[i+e] == search[e]) { 12 | + calc = str_append_chr(calc, search[e]); 13 | + } 14 | + } 15 | + if (str_equals(search, calc) == 0) { 16 | + newstring = str_append(newstring, replace); 17 | + i = i + str_len (search)-1; 18 | + } 19 | + else { 20 | + newstring = str_append_chr(newstring, subject[i]); 21 | + } 22 | + } 23 | + else { 24 | + newstring = str_append_chr(newstring, subject[i]); 25 | + } 26 | + } 27 | + return newstring; 28 | +} -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example5.patch: -------------------------------------------------------------------------------- 1 | --- patch_error.c 2019-02-27 16:20:31.000000000 -0800 2 | +++ new_patch_error.c 2019-02-27 16:15:41.000000000 -0800 3 | @@ -9,4 +9,4 @@ 4 | } 5 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 6 | else {return -1}; 7 | -} 8 | +} 9 | \ No newline at end of file 10 | -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example6.patch: -------------------------------------------------------------------------------- 1 | diff --git a/main.go b/main.go 2 | index 5398e6d..2a81399 100644 3 | --- a/main.go 4 | +++ b/main.go 5 | @@ -9,22 +9,20 @@ type animal interface { 6 | 7 | type cat int 8 | type dog int 9 | - 10 | func (c cat) printInfo(){ 11 | fmt.Println("a cat") 12 | } 13 | 14 | -func (d dog) printInfo(){ 15 | +func (c dog) printInfo(){ 16 | fmt.Println("a dog") 17 | } 18 | - 19 | +func invoke(a animal){ 20 | + a.printInfo() 21 | +} 22 | func main() { 23 | - var a animal 24 | var c cat 25 | - a=c 26 | - a.printInfo() 27 | - //other type 28 | - var d dog 29 | - a=d 30 | - a.printInfo() 31 | -} 32 | \ No newline at end of file 33 | + var d dog 34 | + //as value convert 35 | + invoke(c) 36 | + invoke(d) 37 | +} 38 | 39 | -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example7.patch: -------------------------------------------------------------------------------- 1 | --- old-feature-H.c 2019-03-13 15:49:07.000000000 -0700 2 | +++ new-feature-H.c 2019-03-13 15:49:25.000000000 -0700 3 | @@ -5,21 +5,38 @@ 4 | struct node *next; 5 | }*head; 6 | 7 | -/* added in H */ 8 | +/* added in H, edited in I */ 9 | void append(int num) 10 | { 11 | - struct node *temp,*right; 12 | - temp= (struct node *)malloc(sizeof(struct node)); 13 | - temp->data=num; 14 | - right=(struct node *)head; 15 | - while(right->next != NULL) 16 | - right=right->next; 17 | - right->next =temp; 18 | - right=temp; 19 | - right->next=NULL; 20 | + struct node *temp, *prev; 21 | + temp=head; 22 | + while(temp!=NULL) 23 | + { 24 | + if(temp->data==num) 25 | + { 26 | + if(temp==head) 27 | + { 28 | + head=temp->next; 29 | + free(temp); 30 | + return 1; 31 | + } 32 | + else 33 | + { 34 | + prev->next=temp->next; 35 | + free(temp); 36 | + return 1; 37 | + } 38 | + } 39 | + else 40 | + { 41 | + prev=temp; 42 | + temp= temp->next; 43 | + } 44 | + } 45 | + return 0; 46 | } 47 | 48 | -/* added in H */ 49 | +/* added in H, edited in G */ 50 | void add( int num ) 51 | { 52 | struct node *temp; 53 | @@ -30,36 +47,6 @@ 54 | head=temp; 55 | head->next=NULL; 56 | } 57 | - else 58 | - { 59 | - temp->next=head; 60 | - head=temp; 61 | - } 62 | } 63 | 64 | -/* added in H */ 65 | -void insert(int num) 66 | -{ 67 | - int c=0; 68 | - struct node *temp; 69 | - temp=head; 70 | - if(temp==NULL) 71 | - { 72 | - add(num); 73 | - } 74 | - else 75 | - { 76 | - while(temp!=NULL) 77 | - { 78 | - if(temp->datanext; 81 | - } 82 | - if(c==0) 83 | - add(num); 84 | - else if(cdata==num) 16 | { 17 | if(temp==head) 18 | { 19 | head=temp->next; 20 | free(temp); 21 | return 1; 22 | } 23 | else 24 | { 25 | prev->next=temp->next; 26 | free(temp); 27 | return 1; 28 | } 29 | } 30 | else 31 | { 32 | prev=temp; 33 | temp= temp->next; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | /* added in H, edited in G */ 40 | void add( int num ) 41 | { 42 | struct node *temp; 43 | temp=(struct node *)malloc(sizeof(struct node)); 44 | temp->data=num; 45 | if (head== NULL) 46 | { 47 | head=temp; 48 | head->next=NULL; 49 | } 50 | } 51 | 52 | /* insert() is deleted in I */ 53 | -------------------------------------------------------------------------------- /test/test_analytics/patch_test_files/example7_old.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H */ 9 | void append(int num) 10 | { 11 | struct node *temp,*right; 12 | temp= (struct node *)malloc(sizeof(struct node)); 13 | temp->data=num; 14 | right=(struct node *)head; 15 | while(right->next != NULL) 16 | right=right->next; 17 | right->next =temp; 18 | right=temp; 19 | right->next=NULL; 20 | } 21 | 22 | /* added in H */ 23 | void add( int num ) 24 | { 25 | struct node *temp; 26 | temp=(struct node *)malloc(sizeof(struct node)); 27 | temp->data=num; 28 | if (head== NULL) 29 | { 30 | head=temp; 31 | head->next=NULL; 32 | } 33 | else 34 | { 35 | temp->next=head; 36 | head=temp; 37 | } 38 | } 39 | 40 | /* added in H */ 41 | void insert(int num) 42 | { 43 | int c=0; 44 | struct node *temp; 45 | temp=head; 46 | if(temp==NULL) 47 | { 48 | add(num); 49 | } 50 | else 51 | { 52 | while(temp!=NULL) 53 | { 54 | if(temp->datanext; 57 | } 58 | if(c==0) 59 | add(num); 60 | else if(c None: 20 | assert commit 21 | index = self.analyzedCommits 22 | print("Current commit #{0}, hexsha {1}", index, commit.hexsha) 23 | if index == self._raiseExceptionAtIndex: 24 | raise Exception("Raised exception at commit #{0}.".format(index)) 25 | self.analyzedCommits.append(commit.hexsha) 26 | 27 | 28 | class DummyPostAnalyzer(IPostAnalyzer): 29 | def __init__(self): 30 | self.status = None 31 | 32 | def analyze(self, status: AnalysisStatus) -> None: 33 | self.status = status 34 | 35 | 36 | def test_meta_analyzer(): 37 | repoPath = prepare_repository("test_feature_branch") 38 | repo = GitRepository(repoPath) 39 | ca = DummyCommitAnalyzer() 40 | pa = DummyPostAnalyzer() 41 | ma = MetaAnalyzer(repo, [ca], [pa], origin_commit=None, terminal_commit="HEAD", analyzed_commits=()) 42 | status = ma.analyze(100) 43 | assert status == pa.status 44 | 45 | commits = [c.hexsha for c in islice(repo.enum_commits(None, "HEAD"), 101)] 46 | if len(commits) <= 100: 47 | assert pa.status.stop_reason == CommitAnalysisStopReason.ReachedTerminalCommit 48 | else: 49 | assert pa.status.stop_reason == CommitAnalysisStopReason.ReachedMaximumCommits 50 | commits = commits[:100] 51 | assert ca.analyzedCommits == commits 52 | assert status.analyzed_commits_ref == commits 53 | assert status.origin_commit_ref == None 54 | assert status.terminal_commit_ref == "HEAD" 55 | assert status.last_commit_ref == commits[-1] 56 | assert status.exception == None 57 | 58 | if len(commits) < 2: 59 | _logger.warning("Skipped exception test because it needs repository have at least 2 commits.") 60 | exceptionIndex = len(commits)//2 61 | ca = DummyCommitAnalyzer(raiseExceptionAtIndex=exceptionIndex) 62 | pa = DummyPostAnalyzer() 63 | ma = MetaAnalyzer(repo, [ca], [pa], origin_commit=None, terminal_commit="HEAD", analyzed_commits=()) 64 | status = ma.analyze(100) 65 | assert status == pa.status 66 | assert status.stop_reason == CommitAnalysisStopReason.FatalError 67 | assert isinstance(status.exception, Exception) 68 | assert status.analyzed_commits_ref == commits[:exceptionIndex] 69 | assert status.origin_commit_ref == None 70 | assert status.terminal_commit_ref == "HEAD" 71 | assert status.last_commit_ref == commits[exceptionIndex] 72 | -------------------------------------------------------------------------------- /test/test_analytics2/test_repository.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import subprocess 3 | import test.test_analytics2.helpers.repository as repositoryhelper 4 | 5 | from persper.analytics2.repository import GitRepository 6 | from persper.util.path import root_path 7 | 8 | 9 | def prepare_repository(repo_name: str): 10 | # build the repo first if not exists yet 11 | repo_path = os.path.join(root_path, 'repos/' + repo_name) 12 | script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py') 13 | test_src_path = os.path.join(root_path, 'test/' + repo_name) 14 | if not os.path.isdir(repo_path): 15 | cmd = '{} {}'.format(script_path, test_src_path) 16 | subprocess.call(cmd, shell=True) 17 | print("Repository path: ", repo_path) 18 | return repo_path 19 | 20 | 21 | def test_git_repository(): 22 | repoPath = prepare_repository("test_feature_branch") 23 | # TODO introduce some really complex repo, such as 24 | # repoPath = r"F:\WRS\testrepos\ccls" 25 | repo = GitRepository(repoPath) 26 | repositoryhelper.test_repository_history_provider(repo) 27 | -------------------------------------------------------------------------------- /test/test_analytics2/utilities.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import subprocess 3 | import test.test_analytics2.helpers.repository as repositoryhelper 4 | 5 | from persper.analytics2.repository import GitRepository 6 | from persper.util.path import root_path 7 | 8 | 9 | def prepare_repository(repo_name: str): 10 | # build the repo first if not exists yet 11 | repo_path = os.path.join(root_path, 'repos/' + repo_name) 12 | script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py') 13 | test_src_path = os.path.join(root_path, 'test/' + repo_name) 14 | if not os.path.isdir(repo_path): 15 | cmd = '{} {}'.format(script_path, test_src_path) 16 | subprocess.call(cmd, shell=True) 17 | print("Repository path: ", repo_path) 18 | return repo_path 19 | -------------------------------------------------------------------------------- /test/test_feature_branch/A/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* added in A*/ 10 | char* str_append(char* string, char* append) { 11 | char* newstring = NULL; 12 | size_t needed = snprintf(NULL, 0, "%s%s", string, append); 13 | newstring = malloc(needed); 14 | sprintf(newstring, "%s%s", string, append); 15 | return newstring; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /test/test_feature_branch/B/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | /* added in B */ 12 | char* str_append_chr(char* string, char append) { 13 | char* newstring = NULL; 14 | size_t needed = snprintf(NULL, 0, "%s%c", string, append); 15 | newstring = malloc(needed); 16 | sprintf(newstring, "%s%c", string, append); 17 | return newstring; 18 | } 19 | 20 | /* added in B */ 21 | int str_equals(char *equal1, char *eqaul2) 22 | { 23 | while(*equal1==*eqaul2) 24 | { 25 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 26 | equal1++; 27 | eqaul2++; 28 | } 29 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 30 | else {return -1}; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /test/test_feature_branch/C/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl. 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/D/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/D/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/E/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/E/feature-H.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H, edited in I */ 9 | void append(int num) 10 | { 11 | struct node *temp, *prev; 12 | temp=head; 13 | while(temp!=NULL) 14 | { 15 | if(temp->data==num) 16 | { 17 | if(temp==head) 18 | { 19 | head=temp->next; 20 | free(temp); 21 | return 1; 22 | } 23 | else 24 | { 25 | prev->next=temp->next; 26 | free(temp); 27 | return 1; 28 | } 29 | } 30 | else 31 | { 32 | prev=temp; 33 | temp= temp->next; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | /* added in H, edited in G */ 40 | void add( int num ) 41 | { 42 | struct node *temp; 43 | temp=(struct node *)malloc(sizeof(struct node)); 44 | temp->data=num; 45 | if (head== NULL) 46 | { 47 | head=temp; 48 | head->next=NULL; 49 | } 50 | } 51 | 52 | /* insert() is deleted in I */ 53 | -------------------------------------------------------------------------------- /test/test_feature_branch/E/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/F/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/F/feature-H.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H, edited in I */ 9 | void append(int num) 10 | { 11 | struct node *temp, *prev; 12 | temp=head; 13 | while(temp!=NULL) 14 | { 15 | if(temp->data==num) 16 | { 17 | if(temp==head) 18 | { 19 | head=temp->next; 20 | free(temp); 21 | return 1; 22 | } 23 | else 24 | { 25 | prev->next=temp->next; 26 | free(temp); 27 | return 1; 28 | } 29 | } 30 | else 31 | { 32 | prev=temp; 33 | temp= temp->next; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | /* added in H, edited in G */ 40 | void add( int num ) 41 | { 42 | struct node *temp; 43 | temp=(struct node *)malloc(sizeof(struct node)); 44 | temp->data=num; 45 | if (head== NULL) 46 | { 47 | head=temp; 48 | head->next=NULL; 49 | } 50 | } 51 | 52 | /* insert() is deleted in I */ 53 | -------------------------------------------------------------------------------- /test/test_feature_branch/F/feature-J.c: -------------------------------------------------------------------------------- 1 | /* added in J */ 2 | void display(struct node *r) 3 | { 4 | r=head; 5 | if(r==NULL) 6 | { 7 | return; 8 | } 9 | while(r!=NULL) 10 | { 11 | printf("%d ",r->data); 12 | r=r->next; 13 | } 14 | printf("\n"); 15 | } 16 | 17 | /* added in J */ 18 | int count() 19 | { 20 | struct node *n; 21 | int c=0; 22 | n=head; 23 | while(n!=NULL) 24 | { 25 | n=n->next; 26 | c++; 27 | } 28 | return c; 29 | } -------------------------------------------------------------------------------- /test/test_feature_branch/F/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/G/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/G/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B*/ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = NULL; 16 | size_t needed = snprintf(NULL, 0, "%s%c", string, append); 17 | newstring = malloc(needed); 18 | sprintf(newstring, "%s%c", string, append); 19 | return newstring; 20 | } 21 | 22 | /* added in B */ 23 | int str_equals(char *equal1, char *eqaul2) 24 | { 25 | while(*equal1==*eqaul2) 26 | { 27 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 28 | equal1++; 29 | eqaul2++; 30 | } 31 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 32 | else {return -1}; 33 | } -------------------------------------------------------------------------------- /test/test_feature_branch/H/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/H/feature-H.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H */ 9 | void append(int num) 10 | { 11 | struct node *temp,*right; 12 | temp= (struct node *)malloc(sizeof(struct node)); 13 | temp->data=num; 14 | right=(struct node *)head; 15 | while(right->next != NULL) 16 | right=right->next; 17 | right->next =temp; 18 | right=temp; 19 | right->next=NULL; 20 | } 21 | 22 | /* added in H */ 23 | void add( int num ) 24 | { 25 | struct node *temp; 26 | temp=(struct node *)malloc(sizeof(struct node)); 27 | temp->data=num; 28 | if (head== NULL) 29 | { 30 | head=temp; 31 | head->next=NULL; 32 | } 33 | else 34 | { 35 | temp->next=head; 36 | head=temp; 37 | } 38 | } 39 | 40 | /* added in H */ 41 | void insert(int num) 42 | { 43 | int c=0; 44 | struct node *temp; 45 | temp=head; 46 | if(temp==NULL) 47 | { 48 | add(num); 49 | } 50 | else 51 | { 52 | while(temp!=NULL) 53 | { 54 | if(temp->datanext; 57 | } 58 | if(c==0) 59 | add(num); 60 | else if(c 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/I/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/I/feature-H.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H, edited in I */ 9 | void append(int num) 10 | { 11 | struct node *temp, *prev; 12 | temp=head; 13 | while(temp!=NULL) 14 | { 15 | if(temp->data==num) 16 | { 17 | if(temp==head) 18 | { 19 | head=temp->next; 20 | free(temp); 21 | return 1; 22 | } 23 | else 24 | { 25 | prev->next=temp->next; 26 | free(temp); 27 | return 1; 28 | } 29 | } 30 | else 31 | { 32 | prev=temp; 33 | temp= temp->next; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | /* added in H, edited in G */ 40 | void add( int num ) 41 | { 42 | struct node *temp; 43 | temp=(struct node *)malloc(sizeof(struct node)); 44 | temp->data=num; 45 | if (head== NULL) 46 | { 47 | head=temp; 48 | head->next=NULL; 49 | } 50 | } 51 | 52 | /* insert() is deleted in I */ 53 | -------------------------------------------------------------------------------- /test/test_feature_branch/I/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/J/feature-J.c: -------------------------------------------------------------------------------- 1 | /* added in J */ 2 | void display(struct node *r) 3 | { 4 | r=head; 5 | if(r==NULL) 6 | { 7 | return; 8 | } 9 | while(r!=NULL) 10 | { 11 | printf("%d ",r->data); 12 | r=r->next; 13 | } 14 | printf("\n"); 15 | } 16 | 17 | /* added in J */ 18 | int count() 19 | { 20 | struct node *n; 21 | int c=0; 22 | n=head; 23 | while(n!=NULL) 24 | { 25 | n=n->next; 26 | c++; 27 | } 28 | return c; 29 | } -------------------------------------------------------------------------------- /test/test_feature_branch/J/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* added in A*/ 10 | char* str_append(char* string, char* append) { 11 | char* newstring = NULL; 12 | size_t needed = snprintf(NULL, 0, "%s%s", string, append); 13 | newstring = malloc(needed); 14 | sprintf(newstring, "%s%s", string, append); 15 | return newstring; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /test/test_feature_branch/K/feature-G.c: -------------------------------------------------------------------------------- 1 | /* added in G */ 2 | char* str_replace(char* search, char* replace, char* subject) { 3 | char* newstring = ""; 4 | int i = 0; 5 | for(i = 0; i < str_len(subject); i++) { 6 | if (subject[i] == search[0]) { 7 | int e = 0; 8 | char* calc = ""; 9 | for(e = 0; e < str_len(search); e++) { 10 | if(subject[i+e] == search[e]) { 11 | calc = str_append_chr(calc, search[e]); 12 | } 13 | } 14 | if (str_equals(search, calc) == 0) { 15 | newstring = str_append(newstring, replace); 16 | i = i + str_len (search)-1; 17 | } 18 | else { 19 | newstring = str_append_chr(newstring, subject[i]); 20 | } 21 | } 22 | else { 23 | newstring = str_append_chr(newstring, subject[i]); 24 | } 25 | } 26 | return newstring; 27 | } -------------------------------------------------------------------------------- /test/test_feature_branch/K/feature-H.c: -------------------------------------------------------------------------------- 1 | /* added in H */ 2 | struct node 3 | { 4 | int data; 5 | struct node *next; 6 | }*head; 7 | 8 | /* added in H, edited in I */ 9 | void append(int num) 10 | { 11 | struct node *temp, *prev; 12 | temp=head; 13 | while(temp!=NULL) 14 | { 15 | if(temp->data==num) 16 | { 17 | if(temp==head) 18 | { 19 | head=temp->next; 20 | free(temp); 21 | return 1; 22 | } 23 | else 24 | { 25 | prev->next=temp->next; 26 | free(temp); 27 | return 1; 28 | } 29 | } 30 | else 31 | { 32 | prev=temp; 33 | temp= temp->next; 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | /* added in H, edited in G */ 40 | void add( int num ) 41 | { 42 | struct node *temp; 43 | temp=(struct node *)malloc(sizeof(struct node)); 44 | temp->data=num; 45 | if (head== NULL) 46 | { 47 | head=temp; 48 | head->next=NULL; 49 | } 50 | } 51 | 52 | /* insert() is deleted in I */ 53 | -------------------------------------------------------------------------------- /test/test_feature_branch/K/feature-K.c: -------------------------------------------------------------------------------- 1 | /* added in J, edited in K */ 2 | void display(struct node *r) 3 | { 4 | r=head; 5 | if(r==NULL) 6 | { 7 | return; 8 | } 9 | printf("\n"); 10 | } 11 | 12 | /* added in J */ 13 | int count() 14 | { 15 | struct node *n; 16 | int c=0; 17 | n=head; 18 | while(n!=NULL) 19 | { 20 | n=n->next; 21 | c++; 22 | } 23 | return c; 24 | } -------------------------------------------------------------------------------- /test/test_feature_branch/K/main.c: -------------------------------------------------------------------------------- 1 | /* added in A */ 2 | int str_len(char *string) 3 | { 4 | char *count = string; 5 | while(*count) {count++;} 6 | return count - string; 7 | } 8 | 9 | /* str_append is deleted in B */ 10 | 11 | int str_equals(char *equal1, char *eqaul2); // Forward decl 12 | 13 | /* added in B, edited in C */ 14 | char* str_append_chr(char* string, char append) { 15 | char* newstring = ""; 16 | int i = 0; 17 | for(i = 0; i < str_len(subject); i++) { 18 | if (subject[i] == search[0]) { 19 | int e = 0; 20 | char* calc = ""; 21 | for(e = 0; e < str_len(search); e++) { 22 | if(subject[i+e] == search[e]) { 23 | calc = str_append_chr(calc, search[e]); 24 | } 25 | } 26 | if (str_equals(search, calc) == 0) { 27 | if(count > 0) { 28 | newstring = str_append(newstring, replace); 29 | i = i + str_len (search)-1; 30 | count = count - 1; 31 | } 32 | else { 33 | newstring = str_append_chr(newstring, subject[i]); 34 | } 35 | 36 | } 37 | else { 38 | newstring = str_append_chr(newstring, subject[i]); 39 | } 40 | } 41 | else { 42 | newstring = str_append_chr(newstring, subject[i]); 43 | } 44 | } 45 | return newstring; 46 | } 47 | 48 | /* added in B */ 49 | int str_equals(char *equal1, char *eqaul2) 50 | { 51 | while(*equal1==*eqaul2) 52 | { 53 | if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;} 54 | equal1++; 55 | eqaul2++; 56 | } 57 | if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;} 58 | else {return -1}; 59 | } -------------------------------------------------------------------------------- /test/test_feature_branch/cg.dot: -------------------------------------------------------------------------------- 1 | digraph test_feature_branch { 2 | A -> B -> C -> D -> E -> F -> K; 3 | B -> G -> D ; 4 | D -> H -> I -> E ; 5 | A -> J -> F ; 6 | } -------------------------------------------------------------------------------- /test/test_feature_branch/cg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_feature_branch/cg.png -------------------------------------------------------------------------------- /tools/build_history.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import pickle 7 | import subprocess 8 | from git import Repo 9 | from persper.graphs.analyzer import Analyzer 10 | from persper.graphs.c import CGraph 11 | from persper.util.path import root_path 12 | 13 | 14 | def usage(cmd): 15 | print("Usage: {0} [i]".format(cmd)) 16 | print("\tBuild history for data/branch_commits_chunk[i].pickle") 17 | 18 | 19 | def run(i): 20 | repo_path = os.path.join(root_path, 'repos/linux-complete') 21 | pickle_path = os.path.join( 22 | root_path, 'data/branch_commits_chunk' + i + '.pickle') 23 | with open(pickle_path, 'rb') as f: 24 | sha_lst = pickle.load(f) 25 | 26 | az = Analyzer(repo_path, CGraph()) 27 | r = Repo(repo_path) 28 | chunk_commits = [r.commit(sha) for sha in sha_lst] 29 | az.build_history(chunk_commits, phase='history-chunk-' + i) 30 | 31 | 32 | def main(): 33 | if len(sys.argv) == 2: 34 | i = sys.argv[1] 35 | run(i) 36 | else: 37 | usage(sys.argv[0]) 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /tools/excel_charts/distance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import math 4 | 5 | 6 | def deviation(map1, map2, index): 7 | n = len(map1) 8 | assert len(map2) == n 9 | var = 0 10 | for func, values in map1.items(): 11 | var += (values[index] - map2.get(func, values)[index])**2 12 | return math.sqrt(var / n) 13 | 14 | 15 | def pair_changes(map1, map2, index): 16 | n = len(map1) 17 | assert len(map2) == n 18 | p = 0 19 | keys = list(map1.keys()) 20 | for i in range(n - 1): 21 | for j in range(i + 1, n): 22 | d1 = map1[keys[i]][index] - map1[keys[j]][index] 23 | d2 = map2[keys[i]][index] - map2[keys[j]][index] 24 | if d1 == 0 and d2 == 0: 25 | continue 26 | elif d1 == 0 or d2 == 0: 27 | p += 1 28 | elif d1 * d2 < 0: 29 | p += 1 30 | return p 31 | -------------------------------------------------------------------------------- /tools/excel_charts/excel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from openpyxl import Workbook 4 | 5 | 6 | def fillout(worksheet, position, data): 7 | row = position[0] 8 | column = position[1] 9 | for i, array in enumerate(data): 10 | for j, value in enumerate(array): 11 | worksheet.cell(row=row + i, column=column + j, value=value) 12 | 13 | 14 | def fillin(worksheet, position, num_rows, num_columns): 15 | row = position[0] 16 | column = position[1] 17 | data = [[None for _ in range(num_columns)] for _ in range(num_rows)] 18 | for i in range(num_rows): 19 | for j in range(num_columns): 20 | data[i][j] = worksheet.cell(row=row + i, column=column + j).value 21 | return data 22 | 23 | 24 | def cell(worksheet, position, step_row=0, step_column=0, index=0): 25 | row = position[0] 26 | column = position[1] 27 | return worksheet.cell(row=row + index * step_row, 28 | column=column + index * step_column) 29 | 30 | 31 | def sheet(workbook, sheet_name): 32 | try: 33 | return workbook[sheet_name] 34 | except KeyError: 35 | return None 36 | 37 | 38 | def main(): 39 | wb = Workbook() 40 | ws = wb.active 41 | data = [[x] for x in range(10)] 42 | fillout(ws, (1, 1), data) 43 | 44 | data = [[x, 2 * x] for x in range(10)] 45 | fillout(ws, (2, 2), data) 46 | 47 | data = [['Sheet1 rank distance', 'Sheet1 value distance'], 48 | [0.0, 0.5], [1.0, 0.5]] 49 | fillout(ws, (1, 2), data) 50 | 51 | print(sheet(wb, 'Sheet')) 52 | print(sheet(wb, 'InvalidSheetName')) 53 | 54 | print(fillin(ws, (1, 1), 10, 1)) 55 | print(fillin(ws, (2, 2), 10, 2)) 56 | 57 | i = 0 58 | while True: 59 | c = cell(ws, (2, 1), step_column=2, index=i) 60 | if c.value is None: 61 | break 62 | print(c.value) 63 | i += 1 64 | 65 | wb.save('check.xlsx') 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /tools/excel_charts/gini/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /tools/excel_charts/gini/README.md: -------------------------------------------------------------------------------- 1 | # gini 2 | A Gini coefficient calculator in Python. 3 | 4 | ## Overview 5 | This is a function that calculates the Gini coefficient of a numpy array. Gini coefficients are often used to quantify income inequality, read more [here](http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm). 6 | 7 | The function in ```gini.py``` is based on the third equation from [here](http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm), which defines the Gini coefficient as: 8 | 9 | ![G = \dfrac{ \sum_{i=1}^{n} (2i - n - 1) x_i}{n \sum_{i=1}^{n} x_i}](https://github.com/oliviaguest/gini/raw/master/gini.png "Gini equation") 10 | 11 | 12 | ## Examples 13 | For a very unequal sample, 999 zeros and a single one, 14 | ``` 15 | >>> from gini import * 16 | >>> a = np.zeros((1000)) 17 | >>> a[0] = 1.0 18 | ``` 19 | the Gini coefficient is very close to 1.0: 20 | ``` 21 | >>> gini(a) 22 | 0.99890010998900103 23 | ``` 24 | 25 | For uniformly distributed random numbers, it will be low, around 0.33: 26 | ``` 27 | >>> s = np.random.uniform(-1,0,1000) 28 | >>> gini(s) 29 | 0.3295183767105907 30 | ``` 31 | 32 | For a homogeneous sample, the Gini coefficient is 0.0: 33 | ``` 34 | >>> b = np.ones((1000)) 35 | >>> gini(b) 36 | 0.0 37 | ``` 38 | 39 | ## Input Assumptions 40 | The Gini calculation by definition requires non-zero positive (ascending-order) sorted values within a 1d vector. This is dealt with within [```gini()```](https://github.com/oliviaguest/gini/blob/master/gini.py). So these four assumptions can be violated, as they are controlled for: 41 | ``` python 42 | def gini(array): 43 | """Calculate the Gini coefficient of a numpy array.""" 44 | # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif 45 | # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm 46 | array = array.flatten() #all values are treated equally, arrays must be 1d 47 | if np.amin(array) < 0: 48 | array -= np.amin(array) #values cannot be negative 49 | array += 0.0000001 #values cannot be 0 50 | array = np.sort(array) #values must be sorted 51 | index = np.arange(1,array.shape[0]+1) #index per array element 52 | n = array.shape[0]#number of array elements 53 | return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))) #Gini coefficient 54 | ``` 55 | 56 | ## Notes 57 | * It is significantly faster than (the [current implementation of](https://github.com/pysal/pysal/issues/855)) PySAL's Gini coefficient function (see [pysal.inequality.gini](http://pysal.readthedocs.io/en/latest/_modules/pysal/inequality/gini.html)) and outputs are indistinguishable before approximately 6 decimal places. In other words, the two functions are arithmetically identical. 58 | 59 | * It is slightly faster than the [Gini coefficient function by David on Ellipsix](http://www.ellipsix.net/blog/2012/11/the-gini-coefficient-for-distribution-inequality.html). 60 | 61 | Many other Gini coefficient functions found online do not produce equivalent results, hence why I wrote this. 62 | -------------------------------------------------------------------------------- /tools/excel_charts/gini/gini.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/gini/gini.png -------------------------------------------------------------------------------- /tools/excel_charts/gini/gini.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def gini(array): 4 | """Calculate the Gini coefficient of a numpy array.""" 5 | # based on bottom eq: 6 | # http://www.statsdirect.com/help/generatedimages/equations/equation154.svg 7 | # from: 8 | # http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm 9 | # All values are treated equally, arrays must be 1d: 10 | array = array.flatten() 11 | if np.amin(array) < 0: 12 | # Values cannot be negative: 13 | array -= np.amin(array) 14 | # Values cannot be 0: 15 | array += 0.0000001 16 | # Values must be sorted: 17 | array = np.sort(array) 18 | # Index per array element: 19 | index = np.arange(1,array.shape[0]+1) 20 | # Number of array elements: 21 | n = array.shape[0] 22 | # Gini coefficient: 23 | return ((np.sum((2 * index - n - 1) * array)) / (n * np.sum(array))) 24 | -------------------------------------------------------------------------------- /tools/excel_charts/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/tests/__init__.py -------------------------------------------------------------------------------- /tools/excel_charts/tests/draw_charts_test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/tests/draw_charts_test.xlsx -------------------------------------------------------------------------------- /tools/excel_charts/tests/test_distance.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import distance 4 | import math 5 | from statistics import mean 6 | from statistics import pstdev 7 | from random import random 8 | import unittest 9 | 10 | class TestDistanceMethods(unittest.TestCase): 11 | 12 | def test_deviation(self): 13 | n = 1000000 14 | r = [random() for x in range(n)] 15 | m = mean(r) 16 | map1 = {} 17 | map2 = {} 18 | for i, v in enumerate(r): 19 | map1[i] = [v] 20 | map2[i] = [m] 21 | d1 = distance.deviation(map1, map2, 0) 22 | d2 = pstdev(r, m) 23 | self.assertTrue(math.isclose(d1, d2)) 24 | 25 | def test_pair_changes(self): 26 | map1 = {'A': [1], 'B': [2], 'C': [3], 'D': [4], 'E': [5]} 27 | map2 = {'A': [1], 'B': [3], 'C': [2], 'D': [4], 'E': [5]} 28 | map3 = {'A': [3], 'B': [2], 'C': [1], 'D': [4], 'E': [5]} 29 | map4 = {'A': [5], 'B': [1], 'C': [2], 'D': [3], 'E': [4]} 30 | self.assertEqual(distance.pair_changes(map1, map1, 0), 0) 31 | self.assertEqual(distance.pair_changes(map1, map2, 0), 1) 32 | self.assertEqual(distance.pair_changes(map1, map3, 0), 3) 33 | self.assertEqual(distance.pair_changes(map1, map4, 0), 4) 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | 38 | -------------------------------------------------------------------------------- /tools/jira_stats/collect_git_urls.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import re 6 | import requests 7 | import sys 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser( 12 | description='Collect Apache and GitHub repo URLs of Apache projects') 13 | parser.add_argument('-f', '--file', required=True, 14 | help='the output file') 15 | args = parser.parse_args() 16 | 17 | if os.path.isfile(args.file): 18 | sys.exit('Error: output file already exists!') 19 | 20 | out_file = open(args.file, 'w') 21 | 22 | apache_git = 'https://git.apache.org/' 23 | 24 | resp = requests.get(apache_git) 25 | 26 | pattern = re.compile(r'(.+)\s*' 27 | r'\s*.+\s*\s*' 28 | r'\s*') 29 | 30 | for match in pattern.finditer(resp.text): 31 | name = match.group(1) 32 | apache_repo = match.group(2) 33 | github_repo = match.group(3) 34 | print(name, apache_repo, github_repo, sep=',', file=out_file) 35 | 36 | out_file.close() 37 | 38 | 39 | if __name__ == '__main__': 40 | main() 41 | -------------------------------------------------------------------------------- /tools/jira_stats/process_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import re 6 | 7 | 8 | def find_github(name, urls): 9 | candidates = [] 10 | target = set(x.lower() for x in name.split() if len(x) > 1) 11 | for item in urls: 12 | name_set = set(x.lower() for x in item['name'].split() if len(x) > 1) 13 | if target <= name_set: 14 | candidates.append({ 15 | 'name': item['name'], 16 | 'github_repo': item['github_repo'] 17 | }) 18 | return candidates 19 | 20 | 21 | def get_issue_stats(file_path): 22 | issue_stats = [] 23 | with open(file_path, 'r') as stats: 24 | for line in stats: 25 | name, key, id, count, \ 26 | feature, bug, improvement, maintenance, \ 27 | high, mid, low = line.split(',') 28 | if name == 'name' and key == 'key': 29 | continue 30 | issue_stats.append({ 31 | 'name': name, 'key': key, 'id': id, 'count': count, 32 | 'feature': feature, 'bug': bug, 33 | 'improvement': improvement, 'maintenance': maintenance, 34 | 'high': high, 'mid': mid, 'low': low 35 | }) 36 | return issue_stats 37 | 38 | 39 | def main(): 40 | parser = argparse.ArgumentParser( 41 | description='Select projects to produce the config file for' 42 | 'the JIRA issue crawler') 43 | parser.add_argument('-s', '--stats-file', required=True, 44 | help='the project issue stats file ' 45 | 'produced by global_stats') 46 | parser.add_argument('-u', '--url-file', required=True, 47 | help='the git url file produced by collect_git_urls') 48 | parser.add_argument('-d', '--parent-dir', required=True, 49 | help='the dir to contain repos') 50 | parser.add_argument('-o', '--output-file', required=True, 51 | help='output file') 52 | args = parser.parse_args() 53 | 54 | issue_stats = get_issue_stats(args.stats_file) 55 | 56 | project_urls = [] 57 | with open(args.url_file, 'r') as urls: 58 | for line in urls: 59 | name, apache_repo, github_repo = line.split(',') 60 | project_urls.append({ 61 | 'name': name, 62 | 'apache_repo': apache_repo, 63 | 'github_repo': github_repo 64 | }) 65 | 66 | out_file = open(args.output_file, 'w') 67 | empty_file = open(args.output_file + '.empty', 'w') 68 | 69 | re_name = re.compile(r'https://github\.com/apache/(\S+)') 70 | for project in issue_stats: 71 | candidates = find_github(project['name'], project_urls) 72 | if len(candidates) == 0: 73 | print(args.parent_dir, project['key'], 'master', 74 | sep='\t', file=empty_file) 75 | continue 76 | for candidate in candidates: 77 | github = candidate['github_repo'].strip() 78 | dir_name = re_name.search(github).group(1) 79 | path = os.path.join(args.parent_dir, dir_name) 80 | print(path, project['key'], 'master', github + '.git', 81 | sep='\t', file=out_file) 82 | 83 | empty_file.close() 84 | out_file.close() 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /tools/repo_crawler/.gitignore: -------------------------------------------------------------------------------- 1 | *-issues 2 | deleted.files 3 | -------------------------------------------------------------------------------- /tools/repo_crawler/README.md: -------------------------------------------------------------------------------- 1 | ## Data set format 2 | 3 | Each [project]-issues directory contains JIRA issues and GitHub pull request 4 | (PR) comments of the project. Only issues resolved and PRs closed by commits 5 | are included. 6 | 7 | In a project directory, every file starts with the commit hash (first ten 8 | digits) that the issue/PR is associated with. You can browse the commit via 9 | https://github.com/[user]/[project]/commit/[hash]. E.g., 10 | https://github.com/apache/spark/commit/b8aec6cd23. 11 | 12 | There are two types of files. 13 | 14 | 1. [hash]-[PROJECT]-[#].xml is an XML representation of the JIRA issue. You can 15 | browse the original issue via 16 | https://issues.apache.org/jira/browse/[PROJECT]-[#]. E.g., 17 | https://issues.apache.org/jira/browse/SPARK-10474. 18 | 19 | 2. [hash]-GitHub-[#].xml is an XML representation of the PR conversation. You 20 | can browse the original PR via https://github.com/[user]/[project]/pull/[#]. 21 | E.g., https://github.com/apache/spark/pull/13796. 22 | 23 | Besides, there are shadow files starting with ``.invalid.''. They can be 24 | ignored by users of this data set. Those files denote wrong information in 25 | commit messages. 26 | -------------------------------------------------------------------------------- /tools/repo_crawler/github_comments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import dicttoxml 3 | import github3 4 | import string 5 | import threading 6 | import time 7 | import xml.etree.ElementTree as ET 8 | 9 | class GitHubComments: 10 | def __init__(self, user = None, password = None, limit_per_min=81): 11 | self.gh = github3.login(user, password) 12 | self._limit_per_min = limit_per_min 13 | 14 | self._lock = threading.Lock() 15 | self._last_time = time.time() 16 | self._rest = limit_per_min 17 | 18 | def login(self, user, password): 19 | self.gh = github3.login(user, password) 20 | 21 | def get_lease(self): 22 | with self._lock: 23 | if self._rest > 0: 24 | self._rest -= 1 25 | return True 26 | elif time.time() - self._last_time > 60: 27 | self._rest = self._limit_per_min - 1 28 | self._last_time = time.time() 29 | return True 30 | else: 31 | return False 32 | 33 | def download(self, user, repo, num, file_path): 34 | while not self.get_lease(): 35 | time.sleep(5) 36 | pr = self.gh.pull_request(user, repo, num) 37 | comments = ET.Element('comments') 38 | for comment in pr.issue_comments(): 39 | snippet = dicttoxml.dicttoxml(comment.as_dict(), 40 | attr_type=False, 41 | custom_root='comment') 42 | snippet = ''.join(x for x in snippet if x in string.printable) 43 | comments.append(ET.fromstring(snippet)) 44 | for comment in pr.review_comments(): 45 | snippet = dicttoxml.dicttoxml(comment.as_dict(), 46 | attr_type=False, 47 | custom_root='comment') 48 | snippet = ''.join(x for x in snippet if x in string.printable) 49 | comments.append(ET.fromstring(snippet)) 50 | return ET.ElementTree(comments).write(file_path, encoding="utf-8") 51 | 52 | def add_args(parser): 53 | parser.add_argument('-u', '--github-user', 54 | help='user name of a GitHub account', 55 | type=str, required=True) 56 | parser.add_argument('-p', '--github-password', 57 | help='password of a GitHub account', 58 | type=str, required=True) 59 | 60 | def main(): 61 | parser = argparse.ArgumentParser() 62 | add_args(parser) 63 | args = parser.parse_args() 64 | 65 | ghc = GitHubComments(args.github_user, args.github_password) 66 | ghc.download('apache', 'spark', 8060, '8060.xml') 67 | ghc.download('apache', 'spark', 8069, '8069.xml') 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /tools/repo_crawler/jira_issue.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import urllib 5 | 6 | _URL_PREFIX_XML = "https://issues.apache.org/jira/si/jira.issueviews:issue-xml/" 7 | _URL_SUFFIX_XML = ".xml" 8 | 9 | class JiraIssue: 10 | def __init__(self, 11 | url_prefix=_URL_PREFIX_XML, 12 | url_suffix=_URL_SUFFIX_XML): 13 | self.url_prefix = url_prefix 14 | self.url_suffix = url_suffix 15 | 16 | def download(self, issue_id, dir_path, file_name): 17 | url = self.url_prefix + issue_id + "/" + issue_id + self.url_suffix 18 | file_path = os.path.join(dir_path, file_name) 19 | invalid_path = os.path.join(dir_path, ".invalid." + file_name) 20 | if os.path.isfile(file_path) or os.path.isfile(invalid_path): 21 | return 22 | for i in range(3): 23 | try: 24 | print urllib.urlretrieve(url, file_path)[0] 25 | with open(file_path, 'r') as downloaded: 26 | if "

Oops, you've found a dead link.

" in \ 27 | downloaded.read(): 28 | os.rename(file_path, invalid_path) 29 | print "Invalid issue ID:", invalid_path 30 | break 31 | except Exception as e: 32 | if i == 2: 33 | print "[Error] JiraIssue.download: ", type(e), e 34 | else: 35 | time.sleep(10) 36 | 37 | if __name__ == "__main__": 38 | if len(sys.argv) != 3: 39 | print sys.argv[0] + " ISSUE_ID FILE_PATH" 40 | sys.exit(1) 41 | jira_issue = JiraIssue() 42 | jira_issue.download(sys.argv[1], sys.argv[2]); 43 | -------------------------------------------------------------------------------- /tools/repo_crawler/repo.config: -------------------------------------------------------------------------------- 1 | ../../repos/hbase HBASE rel/1.3.1 https://github.com/apache/hbase.git 2 | ../../repos/spark SPARK v2.1.1 https://github.com/apache/spark.git 3 | ../../repos/zookeeper ZOOKEEPER release-3.5.3 https://github.com/apache/zookeeper.git 4 | ../../repos/incubator-systemml SYSTEMML v0.14.0-incubating-rc4 https://github.com/apache/incubator-systemml.git 5 | ../../repos/maven MNG maven-3.5.0 https://github.com/apache/maven.git 6 | ../../repos/cassandra CASSANDRA cassandra-3.11.0 https://github.com/apache/cassandra.git 7 | ../../repos/couchdb COUCHDB 2.0.0 https://github.com/apache/couchdb.git 8 | ../../repos/hive HIVE release-2.3.0-rc0 https://github.com/apache/hive.git 9 | ../../repos/activemq AMQ activemq-5.15.0 https://github.com/apache/activemq.git 10 | ../../repos/beam BEAM v2.0.0 https://github.com/apache/beam.git 11 | ../../repos/cloudstack CLOUDSTACK 4.9.2.0 https://github.com/apache/cloudstack.git 12 | ../../repos/ambari AMBARI release-2.5.1 https://github.com/apache/ambari.git 13 | ../../repos/geode GEODE rel/v1.1.1 https://github.com/apache/geode.git 14 | ../../repos/jackrabbit JCR jackrabbit-2.15.4 https://github.com/apache/jackrabbit.git 15 | ../../repos/airavata AIRAVATA airavata-0.16 https://github.com/apache/airavata.git 16 | ../../repos/ant-ivy IVY 2.4.0 https://github.com/apache/ant-ivy.git 17 | ../../repos/archiva MRM archiva-2.2.3 https://github.com/apache/archiva.git 18 | ../../repos/arrow ARROW apache-arrow-0.4.1 https://github.com/apache/arrow.git 19 | ../../repos/avro AVRO release-1.8.2 https://github.com/apache/avro.git 20 | ../../repos/buildr BUILDR 1.4.25 https://github.com/apache/buildr.git 21 | ../../repos/camel CAMEL camel-2.19.1 https://github.com/apache/camel.git 22 | -------------------------------------------------------------------------------- /tools/repo_crawler/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | sudo apt install -y python 4 | sudo apt install -y python-pip 5 | sudo pip install --upgrade pip 6 | sudo pip install sh 7 | 8 | sudo apt install -y libssl-dev 9 | sudo pip install --pre github3.py 10 | sudo pip install dicttoxml 11 | -------------------------------------------------------------------------------- /tools/repo_creater/README.md: -------------------------------------------------------------------------------- 1 | # Repo Creater Tool 2 | 3 | **Goal**: To be able to quickly create fake development history for test purpose 4 | 5 | # Workflow 6 | 1. `cd test` and `mkdir ` 7 | 2. For each commit in the fake history, `mkdir ` 8 | 3. Add source files for each commit 9 | 4. Write commit graph to `cg.dot` file, see `test/test_feature_branch/cg.dot` for an example. You can also plot it out for inspection with `dot -Tpng cg.dot -o cg.png` 10 | 5. Run repo_creater tool 11 | ``` 12 | cd tools/repo_creater 13 | ./create_repo.py ../../test/ 14 | ``` 15 | The newly created repo has the same name and will be under `repos/` folder. 16 | 17 | 6. Examine repo history 18 | ``` 19 | cd repos/ 20 | git log --graph 21 | # alternatively, to see only master 22 | git log --first-parent 23 | ``` 24 | 25 | # Assumptions 26 | - Merge only happens on master branch 27 | - No merge conflicts resolved manually. 28 | - All files dwell directly under `` (not in some subfolders) 29 | 30 | -------------------------------------------------------------------------------- /tools/repo_stats/setup_ubuntu.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | sudo apt install -y python3 python3-pip 4 | sudo -H pip3 install --upgrade pip 5 | sudo -H pip3 install sh 6 | -------------------------------------------------------------------------------- /tools/repo_stats/stats_author.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import subprocess 7 | import sys 8 | 9 | 10 | def stats_commits(repo_path, branch, author_stats=None): 11 | if not author_stats: 12 | author_stats = { } 13 | git_cmd = ['git', '-C', repo_path, 'checkout', branch] 14 | subprocess.check_output(git_cmd) 15 | git_cmd = ['git', '--no-pager', '-C', repo_path, 'shortlog', '-sn'] 16 | p = subprocess.Popen(git_cmd, stdout=subprocess.PIPE) 17 | with os.fdopen(os.dup(p.stdout.fileno())) as commits_per_author: 18 | for line in commits_per_author: 19 | num, name = [s.strip() for s in line.split('\t')] 20 | if name not in author_stats: 21 | author_stats[name] = {'n_commits': int(num)} 22 | else: 23 | author_stats[name]['n_commits'] = int(num) 24 | return author_stats 25 | 26 | 27 | def main(): 28 | parser = argparse.ArgumentParser( 29 | description='List author stats of git repo(s)') 30 | parser.add_argument('-c', '--count-commits', metavar='DIR', 31 | help='Git repo dir to list authors and their # commits') 32 | parser.add_argument('-b', '--branch', default='master', 33 | help='Branch of the repo to analyze') 34 | parser.add_argument('-a', '--count-authors', metavar='DIR', nargs='+', 35 | help='Multiple git repos to list their # authors') 36 | args = parser.parse_args() 37 | if args.count_commits: 38 | if not os.path.isdir(args.count_commits): 39 | sys.exit('Error: ' + args.dir + ' is not a valid dir!') 40 | author_stats = stats_commits(args.count_commits, args.branch) 41 | for name, stats in sorted(author_stats.items(), 42 | key=lambda x: x[1]['n_commits'], 43 | reverse=True): 44 | print(name, stats['n_commits'], sep=',') 45 | elif args.count_authors: 46 | project_authors = { } 47 | for d in args.count_authors: 48 | if os.path.isfile(d) or d.startswith('.'): 49 | continue 50 | repo_name = os.path.basename(os.path.normpath(d)) 51 | print('Parsing ' + repo_name) 52 | project_authors[repo_name] = stats_commits(d, args.branch) 53 | for repo_name, author_stats in sorted(project_authors.items(), 54 | key=lambda x: len(x[1]), 55 | reverse=True): 56 | print(repo_name, len(author_stats), sep=',') 57 | else: 58 | sys.exit('Error: see -h for usage.') 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | 64 | -------------------------------------------------------------------------------- /tools/repo_stats/stats_pr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import re 7 | import sys 8 | 9 | from sh.contrib import git 10 | from sh import wc 11 | 12 | 13 | def jira_issue(commit_message, key): 14 | if key is None: 15 | return [] 16 | matches = re.findall(key + "-\d+(?!\d*.\d+)", commit_message, re.IGNORECASE) 17 | return [m.upper() for m in matches] 18 | 19 | 20 | def parse_pr(commit_message): 21 | matches = re.findall("(?:close[ds]*|" 22 | "pull\s*request|" 23 | "fix(?:e[ds])?|" 24 | "merge[ds]*)" 25 | "\s*#\d+", 26 | commit_message, re.IGNORECASE) 27 | return [m.split('#')[-1] for m in matches] 28 | 29 | 30 | def num_commits(repo_dir): 31 | git_repo = git.bake('-C', os.path.expanduser(repo_dir)) 32 | logs = git_repo.log('--oneline', '--first-parent') 33 | n = wc(logs, '-l') 34 | return int(n) 35 | 36 | 37 | def stats_pr(repo_dir, key, begin, end): 38 | """Lists the number of PR/issue-based commits in the range 39 | """ 40 | git_repo = git.bake('-C', os.path.expanduser(repo_dir)) 41 | num = 0 42 | prs = [] 43 | for i in range(begin, end): 44 | message = str(git_repo.log('--first-parent', '-1', 'HEAD~' + str(i))) 45 | pi = [] 46 | pi += jira_issue(message, key) 47 | pi += parse_pr(message) 48 | if pi: 49 | num += 1 50 | prs += pi 51 | return num, prs 52 | 53 | 54 | def main(): 55 | parser = argparse.ArgumentParser( 56 | description='Stats commits through pull requests/issues') 57 | parser.add_argument('-n', '--num-groups', type=int, required=True, 58 | help='number of groups of commits in stats') 59 | parser.add_argument('-d', '--dir', required=True, 60 | help='dir of the git repo') 61 | parser.add_argument('-k', '--key', help='key of JIRA issue') 62 | parser.add_argument('-t', '--tag', help='tag to check out of the repo') 63 | parser.add_argument('-m', '--max', type=int, 64 | help='max number of commits to process') 65 | args = parser.parse_args() 66 | 67 | if not os.path.isdir(args.dir): 68 | sys.exit('Error: ' + args.dir + ' is not a valid dir!') 69 | 70 | if args.tag: 71 | git_repo = git.bake('-C', os.path.expanduser(args.dir)) 72 | git_repo.checkout(args.tag) 73 | 74 | print(os.path.basename(os.path.normpath(args.dir))) 75 | n = num_commits(args.dir) 76 | if args.max < n: 77 | n = args.max 78 | n //= args.num_groups 79 | for i in reversed(range(args.num_groups)): 80 | np, prs = stats_pr(args.dir, args.key, i * n, (i + 1) * n) 81 | print(np / n, end=',') 82 | print('"{0}"'.format(','.join(prs))) 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /tools/repo_stats/stats_pr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | N=100 4 | M=1000000 5 | 6 | DIR=results 7 | 8 | mkdir -p $DIR 9 | 10 | ./stats_pr.py -n $N -d repos/hbase -t rel/1.3.0 -k HBASE -m $M > $DIR/hbase.pr.csv & 11 | ./stats_pr.py -n $N -d repos/spark -t v2.1.0 -k SPARK -m $M > $DIR/spark.pr.csv & 12 | ./stats_pr.py -n $N -d repos/zookeeper -t release-3.4.9 -k ZOOKEEPER -m $M > $DIR/zookeeper.pr.csv & 13 | ./stats_pr.py -n $N -d repos/incubator-systemml -t v0.14.0-incubating-rc4 -k SYSTEMML -m $M > $DIR/systemml.pr.csv & 14 | ./stats_pr.py -n $N -d repos/maven -t maven-3.3.9 -k MNG -m $M > $DIR/maven.pr.csv & 15 | ./stats_pr.py -n $N -d repos/cassandra -t cassandra-3.10 -k CASSANDRA -m $M > $DIR/cassandra.pr.csv & 16 | ./stats_pr.py -n $N -d repos/couchdb -t 2.0.0 -k COUCHDB -m $M > $DIR/couchdb.pr.csv & 17 | ./stats_pr.py -n $N -d repos/hive -t rel/release-2.1.1 -k HIVE -m $M > $DIR/hive.pr.csv & 18 | ./stats_pr.py -n $N -d repos/rails -t v5.1.1 -m $M > $DIR/rails.pr.csv & 19 | ./stats_pr.py -n $N -d repos/opencv -t 3.2.0 -m $M > $DIR/opencv.pr.csv & 20 | ./stats_pr.py -n $N -d repos/tensorflow -t v1.1.0 -m $M > $DIR/tensorflow.pr.csv & 21 | ./stats_pr.py -n $N -d repos/vagrant -t v1.9.4 -m $M > $DIR/vagrant.pr.csv & 22 | ./stats_pr.py -n $N -d repos/jekyll -t v3.4.3 -m $M > $DIR/jekyll.pr.csv & 23 | ./stats_pr.py -n $N -d repos/discourse -t v1.7.8 -m $M > $DIR/discourse.pr.csv & 24 | 25 | for pid in $(jobs -p) 26 | do 27 | wait $pid 28 | done 29 | 30 | -------------------------------------------------------------------------------- /tools/repo_stats/test/couchdb.pr.csv: -------------------------------------------------------------------------------- 1 | couchdb 2 | 0.0,"" 3 | 0.2,"COUCHDB-1911,COUCHDB-1853" 4 | 0.1,"COUCHDB-1922" 5 | 0.0,"" 6 | 0.0,"" 7 | 0.1,"COUCHDB-1923" 8 | 0.3,"COUCHDB-1647,COUCHDB-1921,COUCHDB-1921" 9 | 0.1,"COUCHDB-1911" 10 | 0.1,"COUCHDB-1668" 11 | 0.0,"" 12 | 0.2,"COUCHDB-1986,COUCHDB-1795,COUCHDB-1962" 13 | 0.0,"" 14 | 0.0,"" 15 | 0.0,"" 16 | 0.0,"" 17 | 0.0,"" 18 | 0.0,"" 19 | 0.0,"" 20 | 0.1,"COUCHDB-2031" 21 | 0.0,"" 22 | 0.2,"COUCHDB-2040,COUCHDB-2028" 23 | 0.1,"COUCHDB-2054" 24 | 0.1,"COUCHDB-1474" 25 | 0.1,"COUCHDB-2086" 26 | 0.1,"COUCHDB-2067" 27 | 0.6,"COUCHDB-2189,170,COUCHDB-1076,COUCHDB-2187,COUCHDB-2170,COUCHDB-2123" 28 | 0.3,"COUCHDB-2196,COUCHDB-1180,COUCHDB-1036,COUCHDB-1180,169" 29 | 0.5,"184,183,COUCHDB-2110,COUCHDB-2166,COUCHDB-2201" 30 | 0.3,"COUCHDB-2169,186,185" 31 | 0.2,"COUCHDB-2209,190" 32 | 0.0,"" 33 | 0.1,"COUCHDB-2104" 34 | 0.1,"COUCHDB-1986" 35 | 0.0,"" 36 | 0.3,"COUCHDB-1697,200,COUCHDB-2206" 37 | 0.1,"211" 38 | 0.2,"COUCHDB-2136,COUCHDB-2220,COUCHDB-1669" 39 | 0.0,"" 40 | 0.2,"COUCHDB-2233,COUCHDB-2200" 41 | 0.1,"COUCHDB-2158" 42 | 0.0,"" 43 | 0.4,"COUCHDB-2222,COUCHDB-2153,COUCHDB-2248,COUCHDB-2249" 44 | 0.1,"COUCHDB-2238" 45 | 0.1,"250" 46 | 0.0,"" 47 | 0.0,"" 48 | 0.1,"COUCHDB-2026" 49 | 0.2,"COUCHDB-1133,COUCHDB-1133" 50 | 0.3,"COUCHDB-1986,COUCHDB-2324,268" 51 | 0.1,"274" 52 | 0.5,"COUCHDB-1432,COUCHDB-2430,260,276,COUCHDB-708" 53 | 0.1,"COUCHDB-2362" 54 | 0.3,"293,295,277" 55 | 0.3,"COUCHDB-1145,291,COUCHDB-2557,296" 56 | 0.2,"272,COUCHDB-2619,306" 57 | 0.1,"COUCHDB-2684" 58 | 0.1,"COUCHDB-2237" 59 | 0.0,"" 60 | 0.0,"" 61 | 0.0,"" 62 | 0.0,"" 63 | 0.1,"COUCHDB-2762" 64 | 0.0,"" 65 | 0.1,"COUCHDB-2797" 66 | 0.1,"COUCHDB-2824" 67 | 0.1,"COUCHDB-2824" 68 | 0.0,"" 69 | 0.0,"" 70 | 0.2,"359,COUCHDB-2844" 71 | 0.0,"" 72 | 0.1,"COUCHDB-2511" 73 | 0.2,"COUCHDB-2775,COUCHDB-2775" 74 | 0.0,"" 75 | 0.2,"COUCHDB-2409,COUCHDB-2674,COUCHDB-2850,COUCHDB-2858,COUCHDB-2666" 76 | 0.5,"COUCHDB-2879,COUCHDB-1447,COUCHDB-2835,COUCHDB-2534,COUCHDB-2859" 77 | 0.2,"COUCHDB-2905,COUCHDB-2897,COUCHDB-2898" 78 | 0.0,"" 79 | 0.3,"COUCHDB-2874,COUCHDB-2082,COUCHDB-2082" 80 | 0.0,"" 81 | 0.2,"COUCHDB-2938,COUCHDB-2866,COUCHDB-2872" 82 | 0.1,"COUCHDB-2938" 83 | 0.6,"COUCHDB-2968,COUCHDB-2965,COUCHDB-2963,COUCHDB-2959,390,386" 84 | 0.6,"COUCHDB-2978,COUCHDB-2978,COUCHDB-2900,397,396,393,391" 85 | 0.2,"408,COUCHDB-2988" 86 | 0.0,"" 87 | 0.0,"" 88 | 0.4,"COUCHDB-3016,COUCHDB-3016,COUCHDB-3016,COUCHDB-2990" 89 | 0.5,"COUCHDB-3028,420,421,419,416" 90 | 0.4,"COUCHDB-3054,428,426,COUCHDB-3039" 91 | 0.0,"" 92 | 0.1,"COUCHDB-3060" 93 | 0.1,"COUCHDB-3066" 94 | 0.1,"COUCHDB-3070" 95 | 0.6,"COUCHDB-3096,COUCHDB-3089,COUCHDB-3092,COUCHDB-3084,COUCHDB-3084,COUCHDB-3082" 96 | 0.4,"COUCHDB-3060,COUCHDB-3104,COUCHDB-3104,COUCHDB-3104,COUCHDB-2779,COUCHDB-3097,COUCHDB-3099" 97 | 0.3,"COUCHDB-3102,COUCHDB-3017,438" 98 | 0.5,"COUCHDB-3135,COUCHDB-3122,COUCHDB-3134,COUCHDB-3136,COUCHDB-3121,COUCHDB-3114,COUCHDB-3118" 99 | 0.1,"COUCHDB-3132" 100 | 0.0,"" 101 | 0.1,"COUCHDB-3143" 102 | -------------------------------------------------------------------------------- /tools/repo_stats/test/discourse.pr.csv: -------------------------------------------------------------------------------- 1 | discourse 2 | 0.1,"2547" 3 | 0.0,"" 4 | 0.1,"2549" 5 | 0.3,"2552,2551,2550" 6 | 0.5,"2546,2530,2537,2553,2554" 7 | 0.2,"2532,2555" 8 | 0.1,"2556" 9 | 0.1,"2557" 10 | 0.0,"" 11 | 0.0,"" 12 | 0.1,"2560" 13 | 0.2,"2563,2561" 14 | 0.3,"2572,2571,2564" 15 | 0.6,"2568,2566,2565,2548,2575,2573" 16 | 0.3,"2578,2569,2567" 17 | 0.1,"2580" 18 | 0.0,"" 19 | 0.4,"2584,2583,2581,2582" 20 | 0.2,"2587,2586" 21 | 0.2,"2589,2588" 22 | 0.5,"2592,2595,2590,2596,2597" 23 | 0.0,"" 24 | 0.2,"2585,2599" 25 | 0.3,"2600,2602,2601" 26 | 0.1,"2603" 27 | 0.0,"" 28 | 0.3,"2591,2604,2606" 29 | 0.2,"2609,2608" 30 | 0.1,"2607" 31 | 0.0,"" 32 | 0.1,"2612" 33 | 0.1,"2613" 34 | 0.2,"2614,2615" 35 | 0.0,"" 36 | 0.0,"" 37 | 0.4,"2617,2620,2618,2619" 38 | 0.2,"2622,2623" 39 | 0.3,"2625,2624,2627" 40 | 0.1,"2628" 41 | 0.0,"" 42 | 0.1,"2632" 43 | 0.0,"" 44 | 0.0,"" 45 | 0.1,"2634" 46 | 0.0,"" 47 | 0.1,"2629" 48 | 0.0,"" 49 | 0.3,"2638,2643,2642" 50 | 0.1,"2644" 51 | 0.1,"2646" 52 | 0.0,"" 53 | 0.3,"2649,2636,2648" 54 | 0.2,"2652,2651" 55 | 0.1,"2654" 56 | 0.2,"2655,2656" 57 | 0.0,"" 58 | 0.2,"2662,2661" 59 | 0.2,"2663,2645" 60 | 0.1,"2658" 61 | 0.1,"2665" 62 | 0.5,"2667,2659,2650,2666,2511" 63 | 0.0,"" 64 | 0.0,"" 65 | 0.0,"" 66 | 0.2,"2671,2669" 67 | 0.1,"2672" 68 | 0.0,"" 69 | 0.0,"" 70 | 0.2,"2676,2681" 71 | 0.5,"2682,2647,2675,2633,2670" 72 | 0.2,"2678,2683" 73 | 0.1,"2684" 74 | 0.2,"2690,2688" 75 | 0.2,"2677,2689" 76 | 0.3,"2693,2691,2692" 77 | 0.3,"2686,2695,2694" 78 | 0.5,"2700,2685,2696,2698,2699" 79 | 0.0,"" 80 | 0.0,"" 81 | 0.0,"" 82 | 0.1,"3192" 83 | 0.0,"" 84 | 0.0,"" 85 | 0.0,"" 86 | 0.0,"" 87 | 0.0,"" 88 | 0.0,"" 89 | 0.0,"" 90 | 0.0,"" 91 | 0.0,"" 92 | 0.0,"" 93 | 0.0,"" 94 | 0.0,"" 95 | 0.0,"" 96 | 0.0,"" 97 | 0.0,"" 98 | 0.0,"" 99 | 0.0,"" 100 | 0.0,"" 101 | 0.0,"" 102 | -------------------------------------------------------------------------------- /tools/repo_stats/test/jekyll.pr.csv: -------------------------------------------------------------------------------- 1 | jekyll 2 | 0.5,"4410,4429,4424,4423,4404" 3 | 0.5,"4452,4428,4437,4436,4434" 4 | 0.7,"4460,4461,4459,4465,4463,4464,4455" 5 | 0.6,"4374,4496,4487,4485,4484,4473" 6 | 0.3,"4505,4502,4492" 7 | 0.6,"4522,4526,4525,4512,4514,4517" 8 | 0.3,"4546,4543,4535" 9 | 0.2,"4553,4547" 10 | 0.8,"4591,4592,4594,4566,4559,4561,4562,4554" 11 | 0.5,"4545,4606,4597,4583,4589" 12 | 0.5,"4557,4602,4611,4599,4381" 13 | 0.5,"4620,4621,4618,4598,4590" 14 | 0.5,"4635,4630,4637,4633,4601" 15 | 0.4,"4639,4636,4558,4641" 16 | 0.5,"4645,4646,4596,4628,4555" 17 | 0.6,"4658,4659,4660,4647,4653,4652" 18 | 0.3,"4685,4682,4670" 19 | 0.5,"4700,4694,4699,4686,4491" 20 | 0.5,"4704,4706,4542,4533,4474" 21 | 0.5,"4703,4712,4640,3849,4624" 22 | 0.6,"4755,4750,4751,4717,4537,4720" 23 | 0.6,"4756,4760,4741,4763,4758,4759" 24 | 0.4,"4769,4771,4775,4781" 25 | 0.4,"4789,4734,4478,4689" 26 | 0.4,"4804,4754,4813,4786" 27 | 0.6,"4808,4595,4819,4792,4793,4799" 28 | 0.4,"4854,4847,4844,4710" 29 | 0.6,"4863,4872,4874,4867,4857,4855" 30 | 0.5,"4849,4887,4886,4839,4859" 31 | 0.5,"4888,4881,4892,4510,4890" 32 | 0.3,"4848,4903,4902" 33 | 0.1,"4916" 34 | 0.4,"4947,4950,4951,4949" 35 | 0.4,"4948,4931,4933,4934" 36 | 0.3,"4974,4958,4971" 37 | 0.5,"4959,4956,4953,4978,4975" 38 | 0.5,"4980,4976,4966,4977,4962" 39 | 0.6,"4989,4973,4940,4987,4985,4979" 40 | 0.4,"5006,4908,5000,5005" 41 | 0.5,"5009,4917,5012,4993,5010" 42 | 0.4,"4922,5018,5017,5014" 43 | 0.4,"5025,5026,5027,5019" 44 | 0.4,"5030,5031,5032,5024" 45 | 0.5,"5056,5058,5054,5043,5015" 46 | 0.5,"5067,5096,5068,5063,5065" 47 | 0.4,"5100,5097,5069,5098" 48 | 0.5,"5060,5101,5053,5042,5011" 49 | 0.5,"5112,5114,5117,5022,5106" 50 | 0.5,"5124,5119,5115,5113,5116" 51 | 0.4,"5109,5118,5122,5123" 52 | 0.5,"5135,4860,5127,5129,5131" 53 | 0.3,"5138,5139,5137" 54 | 0.3,"5150,5140,5141" 55 | 0.5,"5177,5173,5152,5158,5143" 56 | 0.5,"5178,5168,5164,5154,5156" 57 | 0.5,"5194,5180,5185,5187,5188" 58 | 0.4,"5205,5183,5196,5190" 59 | 0.3,"5226,5214,5167" 60 | 0.5,"5244,5254,5239,5221,5222" 61 | 0.5,"5272,5274,5271,5258,5249" 62 | 0.5,"5291,5286,5287,5273,5279" 63 | 0.5,"5240,5293,5281,5294,5262" 64 | 0.5,"5189,5045,5295,5304,5280" 65 | 0.5,"5318,5334,5320,5308,5224" 66 | 0.4,"5337,5316,5335,5235" 67 | 0.5,"5372,5361,5199,5347,5326" 68 | 0.5,"5237,5369,5364,5381,5383" 69 | 0.4,"5389,5375,5376,5380" 70 | 0.5,"5403,5402,5399,5338,5397" 71 | 0.6,"5413,5416,5325,5421,5420,5405" 72 | 0.4,"5428,5157,5210,5408" 73 | 0.5,"5411,5410,5427,5430,5412" 74 | 0.5,"5212,5256,5431,5441,5264" 75 | 0.4,"5456,5433,5452,4873" 76 | 0.4,"5472,5471,5449,5442" 77 | 0.5,"5497,5494,5479,5489,5491" 78 | 0.5,"5504,5502,5495,5496,5492" 79 | 0.5,"5519,5512,5457,5505,5511" 80 | 0.5,"5538,5532,5529,5459,5466" 81 | 0.4,"5536,5539,5540,5533" 82 | 0.4,"5546,5564,5524,5557" 83 | 0.1,"5526" 84 | 0.5,"5572,5571,5464,5570,5559" 85 | 0.4,"5597,5435,5592,5582" 86 | 0.5,"5573,5587,5600,5608,5605" 87 | 0.5,"5614,5611,5530,5609,5384" 88 | 0.5,"5513,5612,5643,5641,5632" 89 | 0.5,"5655,5658,5657,5653,5652" 90 | 0.4,"5671,5670,5668,5666" 91 | 0.5,"5712,5711,5683,5705,5689" 92 | 0.5,"5726,5720,5688,5713,5693" 93 | 0.5,"5740,5738,5696,5692,5544" 94 | 0.5,"5746,5748,5725,5745,5473" 95 | 0.5,"5758,5761,5744,5752,5621" 96 | 0.5,"5768,5769,5765,5764,5750" 97 | 0.5,"5782,5731,5691,5767,5779" 98 | 0.5,"5780,5791,5784,5312,5781" 99 | 0.3,"5640,5542,5753" 100 | 0.0,"" 101 | 0.3,"5968,5940,5924" 102 | -------------------------------------------------------------------------------- /tools/repo_stats/test/stats_pr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | N=100 4 | M=1000 5 | 6 | ../stats_pr.py -n $N -d ../repos/hbase -t rel/1.3.0 -k HBASE -m $M > hbase.pr.csv & 7 | ../stats_pr.py -n $N -d ../repos/spark -t v2.1.0 -k SPARK -m $M > spark.pr.csv & 8 | ../stats_pr.py -n $N -d ../repos/zookeeper -t release-3.4.9 -k ZOOKEEPER -m $M > zookeeper.pr.csv & 9 | ../stats_pr.py -n $N -d ../repos/incubator-systemml -t v0.14.0-incubating-rc4 -k SYSTEMML -m $M > systemml.pr.csv & 10 | ../stats_pr.py -n $N -d ../repos/maven -t maven-3.3.9 -k MNG -m $M > maven.pr.csv & 11 | ../stats_pr.py -n $N -d ../repos/cassandra -t cassandra-3.10 -k CASSANDRA -m $M > cassandra.pr.csv & 12 | ../stats_pr.py -n $N -d ../repos/couchdb -t 2.0.0 -k COUCHDB -m $M > couchdb.pr.csv & 13 | ../stats_pr.py -n $N -d ../repos/hive -t rel/release-2.1.1 -k HIVE -m $M > hive.pr.csv & 14 | ../stats_pr.py -n $N -d ../repos/rails -t v5.1.1 -m $M > rails.pr.csv & 15 | ../stats_pr.py -n $N -d ../repos/opencv -t 3.2.0 -m $M > opencv.pr.csv & 16 | ../stats_pr.py -n $N -d ../repos/tensorflow -t v1.1.0 -m $M > tensorflow.pr.csv & 17 | ../stats_pr.py -n $N -d ../repos/vagrant -t v1.9.4 -m $M > vagrant.pr.csv & 18 | ../stats_pr.py -n $N -d ../repos/jekyll -t v3.4.3 -m $M > jekyll.pr.csv & 19 | ../stats_pr.py -n $N -d ../repos/discourse -t v1.7.8 -m $M > discourse.pr.csv & 20 | 21 | for pid in $(jobs -p) 22 | do 23 | wait $pid 24 | done 25 | 26 | -------------------------------------------------------------------------------- /tools/repo_stats/test/vagrant.pr.csv: -------------------------------------------------------------------------------- 1 | vagrant 2 | 0.4,"6502,5986,5981,6534" 3 | 0.3,"5991,5993,4738" 4 | 0.6,"6150,6149,6073,6071,6050,5999" 5 | 0.6,"6195,6185,6160,6172,6157,6156" 6 | 0.6,"6259,6254,6232,6219,6213,6203" 7 | 0.6,"6364,6322,6318,6307,6305,6288" 8 | 0.6,"6386,6383,6373,6444,6404,6367" 9 | 0.7,"6488,6479,6475,6474,6389,6521,6407" 10 | 0.6,"6515,6536,6535,6493,6382,6489" 11 | 0.5,"6049,6538,6539,6537,6406" 12 | 0.2,"6540,6541" 13 | 0.2,"6543,6542" 14 | 0.6,"6553,6551,6550,6548,6520,6545,6544" 15 | 0.5,"6557,6556,6555,6554,6552" 16 | 0.3,"4473,6560,6559" 17 | 0.4,"6565,6563,6562,6561" 18 | 0.2,"6568,6564" 19 | 0.0,"" 20 | 0.3,"6581,6567,6575" 21 | 0.4,"6584,6582,6583,6118" 22 | 0.5,"6590,6589,6588,6587,6585" 23 | 0.5,"6601,5086,6599,6591,6597" 24 | 0.1,"6606" 25 | 0.4,"6649,6636,6650,6643,3539" 26 | 0.7,"6603,6612,6659,6664,6661,6288,6652" 27 | 0.6,"6681,6691,6675,6671,6671,6662" 28 | 0.5,"6722,6728,6714,6711,6700" 29 | 0.3,"6731,6706,6718" 30 | 0.3,"6756,6753,6740" 31 | 0.7,"6800,6795,6844,6833,6816,6774,6771" 32 | 0.6,"6932,6806,6926,6891,6763,6874" 33 | 0.7,"6912,6963,6962,6952,6948,6950,6923" 34 | 0.5,"6909,6899,6922,6760,6848" 35 | 0.5,"6602,6749,6897,6867,6893" 36 | 0.5,"6977,6969,6610,6843,6805" 37 | 0.7,"7024,7026,7016,7001,6991,6981,6983" 38 | 0.6,"7086,7085,7081,7078,7056,7041" 39 | 0.4,"7101,7107,6879,7093" 40 | 0.6,"7151,7050,7123,7120,7122,7121,7104" 41 | 0.6,"7203,7191,7159,7162,7154,7153" 42 | 0.6,"7219,7216,7215,7158,7184,7204" 43 | 0.5,"7251,7103,7239,7090,7223" 44 | 0.4,"7327,7110,7299,7283" 45 | 0.7,"7059,7352,7349,7347,7339,7334,7108" 46 | 0.3,"7290,7298,7353" 47 | 0.4,"7308,7355,7354,5670" 48 | 0.5,"7126,7179,7356,7287,7293" 49 | 0.4,"7363,7360,7358,7009" 50 | 0.6,"7359,7370,7369,7366,7365,7364" 51 | 0.5,"7379,7382,7377,7376,7276" 52 | 0.4,"7270,7387,7395,7393" 53 | 0.4,"6838,7207,7396,7372" 54 | 0.4,"7190,7409,7406,7400" 55 | 0.3,"7269,7419,7418" 56 | 0.4,"7460,7453,7432,7428" 57 | 0.4,"7478,7477,7467,7456" 58 | 0.4,"7484,7483,7481,7480" 59 | 0.5,"7505,7499,7487,7492,7491" 60 | 0.5,"7550,7587,7605,7589,7574" 61 | 0.4,"7569,7568,7571,7012,7524" 62 | 0.2,"7630,7611" 63 | 0.6,"6765,7650,7647,7643,7639,7632" 64 | 0.7,"7705,7676,7703,7690,7701,7698,7684" 65 | 0.4,"7725,7720,7675,7623" 66 | 0.6,"7752,7798,7781,7724,7740,7726" 67 | 0.5,"7712,7489,7778,7758,7819,7830" 68 | 0.5,"7813,7751,7831,7848,7802" 69 | 0.5,"7877,7873,7679,7674,7688" 70 | 0.5,"7881,7536,7866,7874,7756" 71 | 0.3,"7889,7887,7818" 72 | 0.2,"7929,7907" 73 | 0.6,"7931,7719,7928,7922,7921,7926" 74 | 0.3,"7947,7944,7943" 75 | 0.5,"7986,7793,7976,7980,7978" 76 | 0.5,"7897,7854,8000,7985,7989" 77 | 0.4,"8011,8008,7918,7879" 78 | 0.8,"8032,8033,8009,7896,7998,8027,8031,8028" 79 | 0.4,"8071,8070,8062,8041" 80 | 0.5,"8051,8052,8066,8068,8079" 81 | 0.5,"8106,8098,7867,8094,8087" 82 | 0.5,"8148,8102,8146,8092,8160" 83 | 0.8,"8198,8100,8246,8205,8233,8109,8119,8143" 84 | 0.4,"8192,8176,8191,8195" 85 | 0.5,"8270,8165,8272,8252,8248" 86 | 0.6,"8237,8219,8283,8167,8296,8273" 87 | 0.5,"8302,8291,8300,8194,8196" 88 | 0.2,"7035,8314" 89 | 0.6,"8073,7967,8089,8326,8334,7956" 90 | 0.4,"8337,8341,8327,8122" 91 | 0.5,"8353,8308,8364,8350,8344" 92 | 0.3,"8390,8366,8325" 93 | 0.3,"8385,8379,8336" 94 | 0.5,"8264,8401,8400,8399,8393" 95 | 0.2,"8416,8410" 96 | 0.5,"8436,8422,8414,8368,8421" 97 | 0.6,"8454,8451,7425,8442,8437,7840" 98 | 0.4,"8428,8457,8456,8329" 99 | 0.5,"8407,8443,8472,8310,8482" 100 | 0.4,"8497,8495,7797,8485" 101 | 0.4,"8504,8507,8503,8498" 102 | --------------------------------------------------------------------------------