├── .gitattributes
├── .gitconfig
├── .gitignore
├── .gitlab-ci.yml
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.md
├── docs
    ├── Makefile
    ├── conf.py
    ├── index.rst
    └── make.bat
├── gitconfig.sh
├── notebooks
    ├── demo.ipynb
    ├── lsp-ccls-ccls.ipynb
    └── lsp-ccls.ipynb
├── persper
    ├── analytics
    │   ├── analyzer.py
    │   ├── analyzer2.py
    │   ├── another_patch_parser.py
    │   ├── c.py
    │   ├── call_commit_graph.py
    │   ├── call_graph
    │   │   ├── c.py
    │   │   ├── cpp.py
    │   │   └── utils.py
    │   ├── commit_classifier.py
    │   ├── complexity.py
    │   ├── cpp.py
    │   ├── detect_change.py
    │   ├── devrank.py
    │   ├── error.py
    │   ├── git_tools.py
    │   ├── graph_server.py
    │   ├── inverse_diff.py
    │   ├── iterator.py
    │   ├── lsp_graph_server
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── callgraph
    │   │   │   ├── __init__.py
    │   │   │   ├── adapters.py
    │   │   │   ├── builder.py
    │   │   │   └── manager.py
    │   │   ├── ccls.py
    │   │   ├── cquery.py
    │   │   ├── fileparsers
    │   │   │   └── CPP14Lexer.py
    │   │   ├── jsonrpcutils.py
    │   │   ├── languageclient
    │   │   │   ├── __init__.py
    │   │   │   ├── lspclient.py
    │   │   │   ├── lspcontract.py
    │   │   │   └── lspserver.py
    │   │   ├── main.py
    │   │   ├── setup.cfg
    │   │   └── wildcards.py
    │   ├── multi_analyzer.py
    │   ├── pagerank.py
    │   ├── patch_parser.py
    │   ├── score.py
    │   ├── srcml.py
    │   └── write_graph_to_dot.py
    ├── analytics2
    │   ├── abstractions
    │   │   ├── __init__.py
    │   │   ├── analyzers.py
    │   │   ├── callcommitgraph.py
    │   │   └── repository.py
    │   ├── devrank.py
    │   ├── memorycallcommitgraph.py
    │   ├── metaanalyzer.py
    │   ├── repository.py
    │   ├── setup.cfg
    │   └── utilities.py
    └── util
    │   ├── bidict.py
    │   ├── linguist.rb
    │   ├── normalize_score.py
    │   └── path.py
├── requirements.txt
├── setup-linux-ubuntu.sh
├── setup.py
├── test
    ├── README.md
    ├── __init__.py
    ├── cpp_test_files_repo
    │   ├── A
    │   │   └── main.cpp
    │   ├── B
    │   │   └── main_renamed.cpp
    │   ├── C
    │   │   ├── another_main.cpp
    │   │   └── main_renamed.cpp
    │   └── cg.dot
    ├── cpp_test_repo
    │   ├── A
    │   │   ├── Exceptions.h
    │   │   ├── TextFileParsers.cpp
    │   │   ├── TextFileParsers.h
    │   │   ├── TypeTraits.h
    │   │   ├── Utility.cpp
    │   │   ├── Utility.h
    │   │   ├── main.cpp
    │   │   ├── stdafx.cpp
    │   │   └── stdafx.h
    │   ├── B
    │   │   ├── Exceptions.h
    │   │   ├── TextFileParsers.cpp
    │   │   ├── TextFileParsers.h
    │   │   ├── TypeTraits.h
    │   │   ├── Utility-1.cpp
    │   │   ├── Utility.h
    │   │   ├── main.cpp
    │   │   ├── stdafx.cpp
    │   │   └── stdafx.h
    │   ├── C
    │   │   ├── CppProject1.vcxproj
    │   │   ├── CppProject1.vcxproj.filters
    │   │   ├── Exceptions.h
    │   │   ├── TextFileParsers.cpp
    │   │   ├── TextFileParsers.h
    │   │   ├── TypeTraits.h
    │   │   ├── Utility.cpp
    │   │   ├── Utility.h
    │   │   ├── main.cpp
    │   │   ├── stdafx.cpp
    │   │   └── stdafx.h
    │   ├── D
    │   │   ├── Exceptions.h
    │   │   ├── TextFileParsers.cpp
    │   │   ├── TextFileParsers.h
    │   │   ├── TypeTraits.h
    │   │   ├── Utility.cpp
    │   │   ├── Utility.h
    │   │   ├── main.cpp
    │   │   ├── stdafx.cpp
    │   │   └── stdafx.h
    │   └── cg.dot
    ├── pytest.ini
    ├── test_analytics
    │   ├── __init__.py
    │   ├── baseline
    │   │   ├── analyzer_pickling
    │   │   │   ├── A.g.json
    │   │   │   ├── B.g.json
    │   │   │   ├── C.g.json
    │   │   │   ├── D.g.json
    │   │   │   ├── E.g.json
    │   │   │   ├── F.g.json
    │   │   │   ├── G.g.json
    │   │   │   ├── H.g.json
    │   │   │   ├── I.g.json
    │   │   │   ├── J.g.json
    │   │   │   └── K.g.json
    │   │   ├── cpp_test_repo
    │   │   │   ├── A.g.json
    │   │   │   ├── B.g.json
    │   │   │   ├── C.g.json
    │   │   │   └── D.g.json
    │   │   ├── feature_branch
    │   │   │   ├── A.g.json
    │   │   │   ├── B.g.json
    │   │   │   ├── C.g.json
    │   │   │   ├── D.g.json
    │   │   │   ├── E.g.json
    │   │   │   ├── F.g.json
    │   │   │   ├── G.g.json
    │   │   │   ├── H.g.json
    │   │   │   ├── I.g.json
    │   │   │   ├── J.g.json
    │   │   │   └── K.g.json
    │   │   └── feature_branch_first_parent
    │   │   │   ├── A.g.json
    │   │   │   ├── B.g.json
    │   │   │   ├── C.g.json
    │   │   │   ├── D.g.json
    │   │   │   ├── E.g.json
    │   │   │   ├── F.g.json
    │   │   │   └── K.g.json
    │   ├── conftest.py
    │   ├── patch_test_files
    │   │   ├── example.cc
    │   │   ├── example.patch
    │   │   ├── example2.patch
    │   │   ├── example3.patch
    │   │   ├── example4.patch
    │   │   ├── example5.patch
    │   │   ├── example6.patch
    │   │   ├── example7.patch
    │   │   ├── example7_new.c
    │   │   └── example7_old.c
    │   ├── test_analyzer.py
    │   ├── test_analyzer_c.py
    │   ├── test_analyzer_cpp.py
    │   ├── test_analyzer_lsp_ccls.py
    │   ├── test_call_commit_graph.py
    │   ├── test_detect_change.py
    │   ├── test_devrank.py
    │   ├── test_diff.py
    │   ├── test_filter_commit.py
    │   ├── test_inverse_diff.py
    │   ├── test_modularity.py
    │   ├── test_multi_analyzer.py
    │   ├── test_score.py
    │   ├── test_srcml.py
    │   ├── util.py
    │   └── utility
    │   │   ├── __init__.py
    │   │   └── graph_baseline.py
    ├── test_analytics2
    │   ├── __init__.py
    │   ├── helpers
    │   │   ├── __init__.py
    │   │   ├── callcommitgraph.py
    │   │   └── repository.py
    │   ├── setup.cfg
    │   ├── test_callcommitgraph.py
    │   ├── test_metaanalyzer.py
    │   ├── test_repository.py
    │   └── utilities.py
    └── test_feature_branch
    │   ├── A
    │       └── main.c
    │   ├── B
    │       └── main.c
    │   ├── C
    │       └── main.c
    │   ├── D
    │       ├── feature-G.c
    │       └── main.c
    │   ├── E
    │       ├── feature-G.c
    │       ├── feature-H.c
    │       └── main.c
    │   ├── F
    │       ├── feature-G.c
    │       ├── feature-H.c
    │       ├── feature-J.c
    │       └── main.c
    │   ├── G
    │       ├── feature-G.c
    │       └── main.c
    │   ├── H
    │       ├── feature-G.c
    │       ├── feature-H.c
    │       └── main.c
    │   ├── I
    │       ├── feature-G.c
    │       ├── feature-H.c
    │       └── main.c
    │   ├── J
    │       ├── feature-J.c
    │       └── main.c
    │   ├── K
    │       ├── feature-G.c
    │       ├── feature-H.c
    │       ├── feature-K.c
    │       └── main.c
    │   ├── cg.dot
    │   └── cg.png
└── tools
    ├── build_history.py
    ├── excel_charts
        ├── distance.py
        ├── draw_charts.py
        ├── excel.py
        ├── gini
        │   ├── .gitignore
        │   ├── LICENSE
        │   ├── README.md
        │   ├── gini.png
        │   └── gini.py
        └── tests
        │   ├── __init__.py
        │   ├── draw_charts_test.xlsx
        │   └── test_distance.py
    ├── jira_stats
        ├── collect_git_urls.py
        ├── git_urls.csv
        ├── global_stats.ipynb
        └── process_stats.py
    ├── pickle_stats.ipynb
    ├── repo_crawler
        ├── .gitignore
        ├── README.md
        ├── check.ipynb
        ├── crawl_issues.py
        ├── github_comments.py
        ├── icse_repos.config
        ├── jira_issue.py
        ├── repo.config
        └── setup.sh
    ├── repo_creater
        ├── README.md
        └── create_repo.py
    └── repo_stats
        ├── setup_ubuntu.sh
        ├── stats_author.py
        ├── stats_commit.py
        ├── stats_pr.py
        ├── stats_pr.sh
        └── test
            ├── cassandra.pr.csv
            ├── couchdb.pr.csv
            ├── discourse.pr.csv
            ├── hbase.pr.csv
            ├── hive.pr.csv
            ├── jekyll.pr.csv
            ├── maven.pr.csv
            ├── opencv.pr.csv
            ├── rails.pr.csv
            ├── spark.pr.csv
            ├── stats_pr.sh
            ├── systemml.pr.csv
            ├── tensorflow.pr.csv
            ├── vagrant.pr.csv
            └── zookeeper.pr.csv


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb filter=nbstrip_full
2 | 


--------------------------------------------------------------------------------
/.gitconfig:
--------------------------------------------------------------------------------
1 | [filter "nbstrip_full"]
2 | clean = "jq --indent 1 \
3 |         '(.cells[] | select(has(\"outputs\")) | .outputs) = []  \
4 |         | (.cells[] | select(has(\"execution_count\")) | .execution_count) = null  \
5 |         | .cells[].metadata = {} \
6 |         '"
7 | smudge = cat
8 | required = true
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .dropbox
  2 | *.pickle
  3 | *.xlsx
  4 | repos
  5 | .vscode
  6 | .DS_Store
  7 | .idea/
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *,cover
 52 | .hypothesis/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # IPython Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | .ccls-cache
 97 | /bin
 98 | /bin-*
 99 | .pytest_cache
100 | /test/test_analytics/actualdump
101 | /test_scripts
102 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   - build
 3 |   - test
 4 | 
 5 | 
 6 | test_ci:
 7 |   stage: test
 8 |   image: hub.meri.dev/test-docker/test:latest
 9 | #  only:
10 | #    - setup-ci
11 |   before_script:
12 |     - apt update && apt install -y libarchive-dev #libcurl4-openssl-dev 
13 |     - apt install -y zlib1g-dev libicu-dev libcurl3 libcurl-openssl1.0-dev
14 |     - apt install -y build-essential cmake libssl-dev pkg-config cmake
15 |     - wget http://131.123.42.38/lmcrs/beta/srcML-Ubuntu18.04.deb
16 |     - dpkg -i srcML-Ubuntu18.04.deb
17 |     - mkdir -p ~/.ssh
18 |     - echo "${DEPLOY_KEY}" | tr -d '\r' > ~/.ssh/id_rsa
19 |     - chmod 600 ~/.ssh/id_rsa
20 |     - eval "$(ssh-agent -s)"
21 |     - ssh-keyscan -H  "gitlab.com" >> ~/.ssh/known_hosts
22 |     - chmod 644 ~/.ssh/known_hosts
23 |     - set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
24 |     - export LC_ALL=C.UTF-8
25 |     - export LANG=C.UTF-8
26 |   script:
27 |     - git config --global user.email "merico@meri.co"
28 |     - git config --global user.name "merico"
29 |     - pip3 install pipenv
30 |     - echo -e "machine gitlab.com\nlogin ${GITLAB_USER}\npassword ${GITLAB_PASSWD}" > ~/.netrc
31 |     - git clone https://gitlab.com/persper/code-analytics.git && cd code-analytics 
32 |     #&& git checkout ${CI_COMMIT_REF_NAME}
33 |     - export PYTHONPATH=$PYTHONPATH:/root/code-analytics
34 |     - pipenv install --python 3.7
35 |     - pipenv run pytest -s test/test_analytics
36 |     - pipenv run pytest -s test/test_analytics2
37 |     - echo "Done"
38 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | networkx = "*"
 8 | numpy = "*"
 9 | scipy = "*"
10 | requests = "*"
11 | lxml = "*"
12 | sklearn = "*"
13 | nltk = "*"
14 | openpyxl = "*"
15 | pytest = "*"
16 | pydot = "*"
17 | ipykernel = "*"
18 | matplotlib = "*"
19 | redis = "*"
20 | naked = "*"
21 | antlr4-python3-runtime = "*"
22 | python-jsonrpc-server = "==0.0.2"
23 | pytest-asyncio = "*"
24 | aenum = "*"
25 | pytest-cov = "*"
26 | gitpython = "*"
27 | sphinx = "*"
28 | python-louvain = "*"
29 | 
30 | [dev-packages]
31 | 
32 | [requires]
33 | python_version = "3.6"
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Persper Code Analytics Tool
 2 | 
 3 | This project implements the DevRank algorithm for quantiying the structural value of code contributions as described in
 4 | 
 5 | > J. Ren\*, H. Yin\*, Q. Hu, A. Fox, W. Koszek. Towards Quantifying the Development Value of Code Contributions. In *FSE (NIER)*, 2018. 
 6 | 
 7 | This repo contains a central code analyzer written in python, which given a target git repository, invokes language-specific call graph server to construct the call-commit graph (union of all commits' call graphs) while it iterates through the commits of the repository being analzyed. The resulted call-commit graph is stored in the [CallCommitGraph](/persper/analytics/call_commit_graph.py) class, which knows how to compute DevRanks for functions, commits, and developers.
 8 | 
 9 | ## Get Started
10 | 
11 | The following procedure is tested on Ubuntu 16.04 LTS.
12 | 
13 | 1. Install Python (>=3.6)
14 | 
15 | Download and install Python 3.6+: <https://www.python.org/downloads/>.
16 | 
17 | Also, create a symbolic link from `python3` to `python` since some scripts reply on it.
18 | ```sh
19 | sudo ln -s /usr/bin/python3 /usr/bin/python
20 | ```
21 | 
22 | 2. Install python dependencies (we recommend to use pipenv)
23 | 
24 | ```sh
25 | pipenv install
26 | ```
27 | 
28 | 3. Update git
29 | 
30 | In order to uset the `--indent-heuristic` option of `git diff`, we require git version >= 2.11. Use the following commands to upgrade:
31 | 
32 | ```sh
33 | sudo add-apt-repository ppa:git-core/ppa -y
34 | sudo apt-get update
35 | sudo apt-get install git -y
36 | git --version
37 | ```
38 | 
39 | 4. Add project directory to path
40 | 
41 | Add the following line to your `~/.bashrc` file.
42 | 
43 | ```sh
44 | export PYTHONPATH=$PYTHONPATH:/path/to/dir
45 | ```
46 | 
47 | To update your path for the remainder of the session.
48 | ```sh
49 | source ~/.bashrc
50 | ```
51 | 
52 | 5. Install srcML for parsing C/C++ and Java
53 | 
54 | Please download from [here](https://www.srcml.org/#download) and follow the [instructions](http://131.123.42.38/lmcrs/beta/README).
55 | 
56 | srcML also needs `libarchive-dev` and `libcurl4-openssl-dev`. Install them with the following commands:
57 | 
58 | ```sh
59 | sudo apt install libarchive-dev
60 | sudo apt install libcurl4-openssl-dev
61 | ```
62 | 
63 | 6. Check setup correctness
64 | 
65 | As the test process will create Git repositories, set up your global Git user name and email before testing:
66 | ```sh
67 | git config --global user.email "you@example.com"
68 | git config --global user.name "Your Name"
69 | ```
70 | 
71 | Run the test process:
72 | ```sh
73 | pipenv run pytest test/test_analytics
74 | ```
75 | 
76 | You should see all tests passed.
77 | 
78 | ## Report Test Coverage
79 | 
80 | We use [coverage.py](https://coverage.readthedocs.io/) and [pytest-cov](https://pytest-cov.readthedocs.io/en/latest/) to compute test coverage:
81 | 
82 | ```
83 | # Execution
84 | pytest --cov=persper/ test/test_analytics
85 | 
86 | # Reporting
87 | coverage html
88 | 
89 | # then visit htmlcov/index.html in your browser
90 | ```
91 | 
92 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to Persper Code Analytics's documentation!
 2 | ==================================================
 3 | 
 4 | .. automodule:: persper.analytics.call_commit_graph
 5 |     :members:
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: Contents:
10 | 
11 | 
12 | 
13 | Indices and tables
14 | ==================
15 | 
16 | * :ref:`genindex`
17 | * :ref:`modindex`
18 | * :ref:`search`
19 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/gitconfig.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | git config --local include.path ../.gitconfig
4 | 


--------------------------------------------------------------------------------
/notebooks/demo.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "# import deps\n",
10 |     "import os\n",
11 |     "from persper.analytics.c import CGraphServer\n",
12 |     "from persper.analytics.analyzer2 import Analyzer\n",
13 |     "from persper.analytics.graph_server import C_FILENAME_REGEXES\n",
14 |     "from persper.util.path import root_path"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {},
21 |    "outputs": [],
22 |    "source": [
23 |     "# configure your project\n",
24 |     "repo_path = os.path.join(root_path, 'repos/<your_repo_name>')\n",
25 |     "\n",
26 |     "# configure alpha for devrank\n",
27 |     "alpha = 0.5"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": null,
33 |    "metadata": {},
34 |    "outputs": [],
35 |    "source": [
36 |     "# start analysis and show commit devrank values\n",
37 |     "az = Analyzer(repo_path, CGraphServer(C_FILENAME_REGEXES))\n",
38 |     "await az.analyze()\n",
39 |     "ccgraph = az.graph\n",
40 |     "ccgraph.commit_devranks(alpha)"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": null,
46 |    "metadata": {},
47 |    "outputs": [],
48 |    "source": []
49 |   }
50 |  ],
51 |  "metadata": {
52 |   "kernelspec": {
53 |    "display_name": "code-analytics-8iDyuztf",
54 |    "language": "python",
55 |    "name": "code-analytics-8idyuztf"
56 |   },
57 |   "language_info": {
58 |    "codemirror_mode": {
59 |     "name": "ipython",
60 |     "version": 3
61 |    },
62 |    "file_extension": ".py",
63 |    "mimetype": "text/x-python",
64 |    "name": "python",
65 |    "nbconvert_exporter": "python",
66 |    "pygments_lexer": "ipython3",
67 |    "version": "3.6.6"
68 |   }
69 |  },
70 |  "nbformat": 4,
71 |  "nbformat_minor": 2
72 | }
73 | 


--------------------------------------------------------------------------------
/persper/analytics/another_patch_parser.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | _hunkHeader = re.compile(
 4 |     r"^@@\s*\-(?P<LN>\d+),\s*\d+\s+\+(?P<RN>\d+),\s*\d+\s*@@")
 5 | 
 6 | 
 7 | def parseUnifiedDiff(diffContent: str, lineNumberOffset: int = 0):
 8 |     """
 9 |     Parse unified diff content, and return the ranges of added and removed lines.
10 |     Returns
11 |         (addedRanges, removedRanges), where
12 |             addedRanges: a list of line ranges [[startLine1, endLine1], ...] added into the new file,
13 |                         using the 1-based line numbers in the new file.
14 |             removedRanges: a list of line ranges [[startLine1, endLine1], ...] removed from the old file,
15 |                         using the 1-based line numbers in the old file.
16 |     """
17 |     leftLine: int = None
18 |     rightLine: int = None
19 |     addedRanges = []
20 |     removedRanges = []
21 |     lastAddedRange: list = None
22 |     lastRemovedRange: list = None
23 |     isInPrologue = True
24 |     for diffLine in diffContent.rstrip("\r\n\v").split("\n"):
25 |         if diffLine.startswith("@@"):
26 |             match = _hunkHeader.search(diffLine)
27 |             if not match:
28 |                 if isInPrologue:
29 |                     continue
30 |                 raise ValueError(str.format(
31 |                     "Invalid diff line: {0}.", diffLine))
32 |             leftLine = int(match.group("LN")) + lineNumberOffset
33 |             rightLine = int(match.group("RN")) + lineNumberOffset
34 |             lastAddedRange = lastRemovedRange = None
35 |             isInPrologue = False
36 |         elif diffLine.startswith(" "):
37 |             assert leftLine != None and rightLine != None
38 |             leftLine += 1
39 |             rightLine += 1
40 |             lastAddedRange = lastRemovedRange = None
41 |         elif diffLine.startswith("-"):
42 |             assert leftLine != None and rightLine != None
43 |             if lastRemovedRange:
44 |                 lastRemovedRange[1] = leftLine
45 |             else:
46 |                 lastRemovedRange = [leftLine, leftLine]
47 |                 removedRanges.append(lastRemovedRange)
48 |             leftLine += 1
49 |         elif diffLine.startswith("+"):
50 |             assert leftLine != None and rightLine != None
51 |             if lastAddedRange:
52 |                 lastAddedRange[1] = rightLine
53 |             else:
54 |                 lastAddedRange = [rightLine, rightLine]
55 |                 addedRanges.append(lastAddedRange)
56 |             rightLine += 1
57 |         elif diffLine.startswith("\\"):
58 |             # \ No newline at end of file
59 |             # Do nothing. We ignore blank lines.
60 |             pass
61 |         else:
62 |             if isInPrologue:
63 |                 continue
64 |             raise ValueError(str.format("Invalid diff line: {0}.", diffLine))
65 |     return addedRanges, removedRanges
66 | 


--------------------------------------------------------------------------------
/persper/analytics/call_graph/cpp.py:
--------------------------------------------------------------------------------
 1 | 
 2 | ns = {'srcml': 'http://www.srcML.org/srcML/src', 'pos': 'http://www.srcML.org/srcML/position'}
 3 | 
 4 | def get_func_ranges_cpp(root):
 5 |     func_ranges, func_names = [], []
 6 |     for func_node in root.xpath('./srcml:constructor | ./srcml:function', namespaces=ns):
 7 | 
 8 |         func_name, start_line, end_line = handle_function(func_node) 
 9 |         if not (func_name and start_line and end_line):
10 |             continue
11 | 
12 |         func_ranges.append([start_line, end_line])
13 |         func_names.append(func_name)
14 |     return func_names, func_ranges
15 | 
16 | def handle_name(name_node):
17 |     func_id, line = None, None
18 |     if name_node != None:
19 |         if name_node.text:
20 |             func_id = name_node.text
21 |             line = int(name_node.attrib['{http://www.srcML.org/srcML/position}line'])
22 |         else:
23 |             try:
24 |                 # alternative solution is to use 
25 |                 # graphs.call_graph.utils.transform_node_to_src
26 |                 class_name = name_node[0].text
27 |                 line = int(name_node[0].attrib['{http://www.srcML.org/srcML/position}line'])
28 |                 assert(name_node[1].text == "::")
29 |                 func_name = name_node[2].text
30 |                 func_id = "{}::{}".format(class_name, func_name)
31 |             except:
32 |                 import pdb
33 |                 pdb.set_trace()
34 |     return func_id, line
35 | 
36 | def handle_function(func_node):
37 | 
38 |     name_node = func_node.find('srcml:name', ns)
39 |     func_id, start_line = handle_name(name_node)
40 |     if not func_id or not start_line:
41 |         print('Function name/start not found!')
42 |         return None, None, None
43 | 
44 |     block_node = func_node.find('srcml:block', ns)
45 |     try:
46 |         pos_node = block_node.find('pos:position', ns)
47 |         end_line = int(pos_node.attrib['{http://www.srcML.org/srcML/position}line'])
48 |     except:
49 |         return func_id, None, None 
50 | 
51 |     return func_id, start_line, end_line
52 | 


--------------------------------------------------------------------------------
/persper/analytics/call_graph/utils.py:
--------------------------------------------------------------------------------
 1 | ns = {'srcml': 'http://www.srcML.org/srcML/src',
 2 |       'pos': 'http://www.srcML.org/srcML/position'}
 3 | 
 4 | line_attr = '{http://www.srcML.org/srcML/position}line'
 5 | 
 6 | 
 7 | def transform_node_to_src(node, s=None):
 8 |     """Print out the source code of a xml node"""
 9 |     if s is None:
10 |         s = ""
11 |     if node.text:
12 |         s += node.text
13 |     for child in node:
14 |         s = transform_node_to_src(child, s)
15 |     if node.tail:
16 |         s += node.tail
17 |     return s
18 | 
19 | 
20 | def remove_edges_of_node(G, n, in_edges=True, out_edges=True):
21 |     """Remove edges of n, but keep the node itself in the graph
22 | 
23 |     >>> G3 = nx.DiGraph()
24 |     >>> G3.add_path([0, 1, 2, 3, 4])
25 |     >>> remove_edges_of_node(G3, 2)
26 |     >>> G3.nodes()
27 |     [0, 1, 2, 3, 4]
28 |     >>> G3.edges()
29 |     [(0, 1), (3, 4)]
30 | 
31 |     """
32 |     try:
33 |         nbrs = G._succ[n]
34 |     except KeyError:  # NetworkXError if not in self
35 |         # raise NetworkXError("The node %s is not in the digraph."%(n, ))
36 |         print("The node %s is not in the digraph." % n)
37 |         return
38 |     if out_edges:
39 |         for u in nbrs:
40 |             del G._pred[u][n]
41 |         G._succ[n] = {}
42 |     if in_edges:
43 |         for u in G._pred[n]:
44 |             del G._succ[u][n]
45 |         G._pred[n] = {}
46 | 


--------------------------------------------------------------------------------
/persper/analytics/commit_classifier.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from git import Commit, DiffIndex, Repo
 3 | 
 4 | 
 5 | class CommitClassifier(ABC):
 6 |     """
 7 |     Defines the interface of any commit classifier
 8 |     """
 9 | 
10 |     @abstractmethod
11 |     def predict(self, commit: Commit, diff_index: DiffIndex, repo: Repo):
12 |         """
13 |         Args:
14 |             commit: A gitpython's Commit object.
15 |             diff_index: A gitpython's DiffIndex object.
16 |                 It is a list of Diff object, each containing the
17 |                 diff information between a pair of old/new source files.
18 |             repo: A gitpython's Repo object.
19 | 
20 | 
21 |         Returns:
22 |             A list, representing the probability distribution of each label
23 |         """
24 |         pass
25 | 
26 |     @property
27 |     @abstractmethod
28 |     def labels(self):
29 |         """
30 |         Returns:
31 |             A list of label (str),
32 |             in the same order as `predict` method's output.
33 |         """
34 |         pass
35 | 


--------------------------------------------------------------------------------
/persper/analytics/complexity.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Dict, List
 3 | 
 4 | import numpy as np
 5 | from networkx import DiGraph
 6 | 
 7 | _logger = logging.getLogger(__file__)
 8 | 
 9 | 
10 | def eval_project_complexity(G: DiGraph, r_n: float, r_e: float):
11 |     """
12 |     Evaluates project complexity from the specified bare call commit graph.
13 |     remarks
14 |         The formula is
15 |             complexity = sum_by_node(added_units + removed_units) + r_n*len(nodes) + r_e*len(edges)
16 |     """
17 |     logical_units = 0
18 |     useFallback = None
19 |     for _, data in G.nodes(data=True):
20 |         added = 0
21 |         removed = 0
22 |         for _, v in data["history"].items():
23 |             if useFallback == None:
24 |                 useFallback = not "added_units" in v
25 |                 if useFallback:
26 |                     _logger.warning(
27 |                         "Will use LOC instead of logic units to measure complexity.")
28 |             if useFallback:
29 |                 added += v["adds"]
30 |                 removed += v["dels"]
31 |             else:
32 |                 added += v["added_units"]
33 |                 removed += v["removed_units"]
34 |         logical_units += added + removed
35 |     complexity = logical_units + r_n*len(G.nodes) + r_e*len(G.edges)
36 |     return complexity
37 | 


--------------------------------------------------------------------------------
/persper/analytics/cpp.py:
--------------------------------------------------------------------------------
1 | from persper.analytics.c import CGraphServer
2 | 
3 | CPPGraphServer = CGraphServer
4 | 


--------------------------------------------------------------------------------
/persper/analytics/devrank.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy import linalg as LA
 3 | from scipy.sparse import coo_matrix
 4 | 
 5 | 
 6 | def devrank(G, weight_label, alpha=0.85, epsilon=1e-5, max_iters=300):
 7 |     """Memory efficient DevRank using scipy.sparse
 8 | 
 9 |     Args:
10 |                    G - A nx.Digraph object.
11 |         weight_label - A string, each node in graph should have this attribute.
12 |                      - It will be used as the weight of each node.
13 |                alpha - A float between 0 and 1, DevRank's damping factor.
14 |              epsilon - A float.
15 |            max_iters - An integer, specify max number of iterations to run.
16 | 
17 |     Returns:
18 |         A dict with node names being keys and DevRanks being values.
19 |     """
20 |     ni = {}
21 |     for i, u in enumerate(G):
22 |         ni[u] = i
23 | 
24 |     def sizeof(u):
25 |         return G.node[u][weight_label]
26 | 
27 |     num_nodes = len(G.nodes())
28 |     row, col, data = [], [], []
29 |     for u in G:
30 |         size_sum = 0
31 |         for v in G[u]:
32 |             size_sum += sizeof(v)
33 |         for v in G[u]:
34 |             row.append(ni[v])
35 |             col.append(ni[u])
36 |             data.append(sizeof(v) / size_sum)
37 | 
38 |     P = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsr()
39 | 
40 |     universe_size = 0
41 |     for u in G:
42 |         universe_size += sizeof(u)
43 | 
44 |     p = np.empty(num_nodes)
45 |     for u in G:
46 |         p[ni[u]] = sizeof(u) / universe_size
47 | 
48 |     v = np.ones(num_nodes) / num_nodes
49 | 
50 |     for i in range(max_iters):
51 |         new_v = alpha * P.dot(v)
52 |         gamma = LA.norm(v, 1) - LA.norm(new_v, 1)
53 |         new_v += gamma * p
54 |         delta = LA.norm(new_v - v, 1)
55 |         if delta < epsilon:
56 |             break
57 |         v = new_v
58 | 
59 |     dr = {}
60 |     for u in G:
61 |         dr[u] = v[ni[u]]
62 | 
63 |     return dr
64 | 


--------------------------------------------------------------------------------
/persper/analytics/error.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Error(Exception):
 3 |     """Base class for other errors"""
 4 |     pass
 5 | 
 6 | 
 7 | class GraphServerError(Error):
 8 |     """Base class for all `GraphServer` errors"""
 9 |     pass
10 | 
11 | 
12 | class UnexpectedASTError(GraphServerError):
13 |     """Raise for unexpected ast structure"""
14 |     pass
15 | 


--------------------------------------------------------------------------------
/persper/analytics/git_tools.py:
--------------------------------------------------------------------------------
 1 | from git.exc import InvalidGitRepositoryError, NoSuchPathError
 2 | from git import Repo, Commit
 3 | from typing import Union
 4 | import sys
 5 | import git
 6 | import codecs
 7 | 
 8 | EMPTY_TREE_SHA = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
 9 | 
10 | 
11 | def diff_with_first_parent(repo: Repo, commit: Commit):
12 |     if len(commit.parents) == 0:
13 |         return diff_with_commit(repo, commit, None)
14 |     else:
15 |         return diff_with_commit(repo, commit, commit.parents[0])
16 | 
17 | 
18 | def diff_with_commit(repo: Repo, current_commit: Commit, base_commit_sha: str):
19 |     # about git.NULL_TREE: https://github.com/gitpython-developers/GitPython/blob/master/git/diff.py#L87
20 |     if current_commit is None:
21 |         current_commit = git.NULL_TREE
22 |     if base_commit_sha is None:
23 |         base_commit = repo.tree(EMPTY_TREE_SHA)
24 |     else:
25 |         base_commit = repo.commit(base_commit_sha)
26 |     return base_commit.diff(current_commit, create_patch=True, indent_heuristic=True,
27 |                             ignore_blank_lines=True, ignore_space_change=True)
28 | 
29 | 
30 | def initialize_repo(repo_path):
31 |     try:
32 |         repo = Repo(repo_path)
33 |     except InvalidGitRepositoryError as e:
34 |         print("Invalid Git Repository!")
35 |         sys.exit(-1)
36 |     except NoSuchPathError as e:
37 |         print("No such path error!")
38 |         sys.exit(-1)
39 |     return repo
40 | 
41 | 
42 | def get_contents(repo, commit, path):
43 |     """Get contents of a path within a specific commit"""
44 |     if type(commit) == Commit:
45 |         commit = commit.hexsha
46 |     byte_str = repo.git.show('{}:{}'.format(commit, path), stdout_as_string=False)
47 |     # default utf-8
48 |     encoding = 'utf-8'
49 |     # the following code is from: https://github.com/chardet/chardet/blob/master/chardet/universaldetector.py#L137
50 |     # encoding names are from here: https://docs.python.org/3/library/codecs.html
51 |     if byte_str.startswith(codecs.BOM_UTF8):
52 |         # EF BB BF  UTF-8 with BOM
53 |         encoding = 'utf-8-sig'
54 |     elif byte_str.startswith(codecs.BOM_UTF32_LE):
55 |         # FF FE 00 00  UTF-32, little-endian BOM
56 |         encoding = 'utf-32-le'
57 |     elif byte_str.startswith(codecs.BOM_UTF32_BE):
58 |         # 00 00 FE FF  UTF-32, big-endian BOM
59 |         encoding = 'utf-32-be'
60 |     elif byte_str.startswith(codecs.BOM_LE):
61 |         # FF FE  UTF-16, little endian BOM
62 |         encoding = 'utf-16-le'
63 |     elif byte_str.startswith(codecs.BOM_BE):
64 |         # FE FF  UTF-16, big endian BOM
65 |         encoding = 'utf-16-be'
66 |     return byte_str.decode(encoding=encoding, errors='replace')
67 | 


--------------------------------------------------------------------------------
/persper/analytics/inverse_diff.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def inverse_diff(adds, dels):
 3 |     """
 4 |     >>> adds = [[11, 1], [32, 1]]
 5 |     >>> dels = [[11, 11], [31, 32]]
 6 |     >>> _inverse_diff_result(adds, dels)
 7 |     ([[10, 1], [30, 2]], [[11, 11], [31, 31]])
 8 |     """
 9 |     diff = 0
10 |     add_ptr, del_ptr = 0, 0
11 |     num_adds, num_dels = len(adds), len(dels)
12 |     inv_adds, inv_dels = [], []
13 | 
14 |     def _handle_a(a):
15 |         nonlocal diff
16 |         inv_dels.append([diff + a[0] + 1, diff + a[0] + a[1]])
17 |         diff += a[1]
18 | 
19 |     def _handle_d(d):
20 |         nonlocal diff
21 |         inv_adds.append([diff + d[0] - 1, d[1] - d[0] + 1])
22 |         diff -= (d[1] - d[0] + 1)
23 | 
24 |     while add_ptr < num_adds or del_ptr < num_dels:
25 |         if add_ptr < num_adds and del_ptr < num_dels:
26 |             if adds[add_ptr][0] < dels[del_ptr][0]:
27 |                 _handle_a(adds[add_ptr])
28 |                 add_ptr += 1
29 |             else:
30 |                 _handle_d(dels[del_ptr])
31 |                 del_ptr += 1
32 |         elif add_ptr < num_adds and del_ptr >= num_dels:
33 |             # we have finished dels
34 |             _handle_a(adds[add_ptr])
35 |             add_ptr += 1
36 |         else:
37 |             # we have finished adds
38 |             _handle_d(dels[del_ptr])
39 |             del_ptr += 1
40 | 
41 |     return inv_adds, inv_dels
42 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/README.md:
--------------------------------------------------------------------------------
 1 | # lsp_graph_server
 2 | 
 3 | To try out the graph server backed by LSP, especially the [ccls](https://github.com/MaskRay/ccls)-based one, you need
 4 | * Compile [MaskRay/ccls](https://github.com/MaskRay/ccls), the customized ccls fork for graph server.
 5 | 
 6 | * Place the compiled binary under `/bin` folder of the repository root, i.e. `/bin/ccls` or `/bin/ccls.exe`.
 7 | 
 8 | ## Work with notebook
 9 | 
10 | * In the repository root, run `pipenv run ./tools/repo_creater/create_repo.py test/cpp_test_repo/` to create a cpp test repo.
11 | 
12 | * `jupyter notebook`, then open `notebooks/lsp-ccls.ipynb`
13 | 
14 | * Execute all the cells
15 | 
16 | ## Work with unit tests
17 | 
18 | * Open a shell under `/test/test_analytics`, run
19 | 
20 |     ```powershell
21 |     # run all of the tests
22 |     pipenv run pytest test_analyzer_lsp_ccls.py
23 |     # or run a single test
24 |     pipenv run pytest test_analyzer_lsp_ccls.py::testFeatureBranch
25 |     ```
26 | 
27 |   * The test results are compared against baseline (by commit) in `/test/test_analytics/baseline`.
28 | 
29 |   * If there are assertion errors during testing, you can see the actual run result in `/test/test_analytics/actualdump`.
30 | 
31 | ## Current status
32 | 
33 | ### C++ (ccls)
34 | 
35 | * C++ (ccls) LSP server is written, but its overall analysis speed is slow on large repositories. E.g. on TensorFlow:
36 | ```
37 | 2019-03-31 01:56:12,998 INFO     [__main__] Checkpoint at 8300, 1429.66s, 14.3s/commit; total 649431.64s, 78.2s/commit.
38 | ```
39 | * It's not stable on certain test cases (e.g. `cpp_test_repo`), sometimes regression may happen.
40 | 
41 |  ```
42 |     E           AssertionError: Extra node: std::RowReader &std::operator>>(std::RowReader &reader, int &rhs).
43 |  ```
44 | 
45 | * It relies on ANTLR-generated lexer to recognize the identifier token, on which to perform go to definition operations. This lexer is not so reliable when there is macro present in the file. If certain part of the file is not covered by ANTLR token, we won't perform go to defintion there.
46 | * You may see `jsonrpc.exceptions.JsonRpcInvalidRequest: not indexed` , this is due to ccls's job count reporting fluctuation. We have already wait for job count to become zero after sending 'didOpen' request.
47 | * Perhaps we should keep all the files open when analyzing for the call graph.


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/callgraph/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Basic data structures for call graph.
 3 | """
 4 | import logging
 5 | from io import IOBase
 6 | from pathlib import Path, PurePath
 7 | from typing import Dict, Iterable, List, NamedTuple, Tuple, Type, Union
 8 | 
 9 | from persper.analytics.lsp_graph_server.languageclient.lspcontract import \
10 |     DocumentSymbol, Location, Position, SymbolInformation, SymbolKind, \
11 |     TextDocument, TextDocumentContentChangeEvent
12 | 
13 | 
14 | _logger = logging.getLogger(__name__)
15 | 
16 | 
17 | class CallGraphNode(NamedTuple):
18 |     name: str
19 |     kind: SymbolKind
20 |     file: PurePath
21 |     pos: Position
22 |     length: int
23 | 
24 |     def __eq__(self, other):
25 |         if not isinstance(other, CallGraphNode):
26 |             return False
27 |         return self.name == other.name and self.file == other.file and self.pos == other.pos and self.length == other.length
28 | 
29 |     def __hash__(self):
30 |         return hash((self.name, self.kind, self.file, self.pos, self.length))
31 | 
32 | 
33 | class CallGraphScope(NamedTuple):
34 |     name: str
35 |     kind: SymbolKind
36 |     file: PurePath
37 |     startPos: Position
38 |     endPos: Position
39 | 
40 |     def __eq__(self, other):
41 |         if not isinstance(other, CallGraphScope):
42 |             return False
43 |         return self.name == other.name and self.file == other.file and self.startPos == other.startPos \
44 |             and self.endPos == other.endPos
45 | 
46 |     def __hash__(self):
47 |         return hash((self.name, self.kind, self.file, self.startPos, self.endPos))
48 | 
49 | 
50 | class CallGraphBranch(NamedTuple):
51 |     sourceScope: CallGraphScope
52 |     definitionScope: CallGraphScope
53 |     sourceToken: CallGraphNode
54 |     definitionToken: CallGraphNode
55 | 
56 |     def __eq__(self, other):
57 |         if not isinstance(other, CallGraphBranch):
58 |             return False
59 |         return self.sourceScope == other.sourceScope and self.definitionScope == other.definitionScope \
60 |             and self.sourceToken == other.sourceToken and self.definitionToken == other.definitionToken
61 | 
62 | 
63 | class CallGraph():
64 | 
65 |     def __init__(self):
66 |         self._items = set()
67 | 
68 |     @property
69 |     def items(self):
70 |         return self._items
71 | 
72 |     def add(self, branch: CallGraphBranch):
73 |         if not branch.sourceScope:
74 |             raise ValueError("branch.sourceScope should not be None.")
75 |         if not branch.definitionScope:
76 |             raise ValueError("branch.definitionScope should not be None.")
77 |         self._items.append(branch)
78 | 
79 |     def clear(self):
80 |         self._items.clear()
81 | 
82 |     def removeBySourceFiles(self, fileNames: Iterable[PurePath]):
83 |         if not isinstance(fileNames, set):
84 |             fileNames = set(fileNames)
85 |         newItems = [i for i in self._items if i.sourceScope.file not in fileNames]
86 |         _logger.info("Removed %d branches by %d files.", len(self._items) - len(newItems), len(fileNames))
87 |         self._items = newItems
88 | 
89 |     def dump(self, file: IOBase):
90 |         for i in self._items:
91 |             file.write(str(i))
92 |             file.write("\n")
93 | 
94 |     def dumpTo(self, fileName: str):
95 |         with open(fileName, "wt") as f:
96 |             self.dump(f)
97 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/callgraph/adapters.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path, PurePath
 3 | from typing import Iterable
 4 | 
 5 | from persper.analytics.call_commit_graph import CallCommitGraph
 6 | 
 7 | from . import CallGraph, CallGraphBranch
 8 | _logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class CallCommitGraphSynchronizer(CallGraph):
12 |     def __init__(self, callCommitGraph: CallCommitGraph):
13 |         super().__init__()
14 |         self._callCommitGraph = callCommitGraph
15 | 
16 |     def add(self, branch: CallGraphBranch):
17 |         if branch.sourceScope == branch.definitionScope:
18 |             # e.g. variable referernces.
19 |             return
20 |         if branch.sourceScope is None or branch.definitionScope is None:
21 |             _logger.debug("Ignored branch with None scope: %s", branch)
22 |             return
23 |         # assuming the referenced edges has already been registered,
24 |         # or there will be Error
25 |         self._callCommitGraph.add_edge(branch.sourceScope.name, branch.definitionScope.name)
26 | 
27 |     def removeBySourceFiles(self, fileNames: Iterable[PurePath]):
28 |         pass
29 | 
30 |     def clear(self):
31 |         self._callCommitGraph.reset()
32 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/jsonrpcutils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime
 3 | 
 4 | from jsonrpc.streams import JsonRpcStreamReader, JsonRpcStreamWriter
 5 | 
 6 | 
 7 | class JsonRpcLogger():
 8 |     def __init__(self, fileName):
 9 |         self._fileName = fileName
10 |         self._file = open(fileName, "wt")
11 | 
12 |     def logTX(self, message: dict):
13 |         self._file.write("{0} < {1}\n".format(datetime.now(), json.dumps(message)))
14 |         if message.get("method", None) == "shutdown":
15 |             self._file.flush()
16 | 
17 |     def logRX(self, message: dict):
18 |         self._file.write("{0} > {1}\n".format(datetime.now(), json.dumps(message)))
19 | 
20 |     def __exit__(self, exc_type, exc_value, traceback):
21 |         self._file.close()
22 | 
23 | 
24 | class LoggedJsonRpcStreamReader(JsonRpcStreamReader):
25 |     def __init__(self, rfile, logger: JsonRpcLogger):
26 |         super().__init__(rfile)
27 |         self._logger = logger
28 | 
29 |     def listen(self, message_consumer):
30 |         def wrapper(message):
31 |             self._logger.logRX(message)
32 |             message_consumer(message)
33 |         super().listen(wrapper)
34 | 
35 | 
36 | class LoggedJsonRpcStreamWriter(JsonRpcStreamWriter):
37 |     def __init__(self, wfile, logger: JsonRpcLogger, **json_dumps_args):
38 |         super().__init__(wfile, **json_dumps_args)
39 |         self._logger = logger
40 | 
41 |     def write(self, message):
42 |         self._logger.logTX(message)
43 |         super().write(message)
44 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/languageclient/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/persper/analytics/lsp_graph_server/languageclient/__init__.py


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/languageclient/lspclient.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LSP client implementation.
 3 | """
 4 | import logging
 5 | import threading
 6 | 
 7 | from jsonrpc.dispatchers import MethodDispatcher
 8 | from jsonrpc.endpoint import Endpoint
 9 | from jsonrpc.streams import JsonRpcStreamReader, JsonRpcStreamWriter
10 | 
11 | from persper.analytics.lsp_graph_server.languageclient.lspcontract import MessageType, Registration
12 | from persper.analytics.lsp_graph_server.languageclient.lspserver import LspServerStub
13 | from persper.analytics.lsp_graph_server.jsonrpcutils import LoggedJsonRpcStreamReader, LoggedJsonRpcStreamWriter, JsonRpcLogger
14 | 
15 | _logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class LspClient(MethodDispatcher):
19 |     def __init__(self, rx, tx, logFile: str = None):
20 |         super().__init__()
21 |         self._rpclogger = JsonRpcLogger(logFile) if logFile else None
22 |         self._streamReader = LoggedJsonRpcStreamReader(rx, self._rpclogger) if logFile else JsonRpcStreamReader(rx)
23 |         self._streamWriter = LoggedJsonRpcStreamWriter(tx, self._rpclogger) if logFile else JsonRpcStreamWriter(tx)
24 |         self._nextJsonRpcMessageId = 0
25 |         # Some language server, e.g. cquery, only supports numerical request Ids.
26 |         self._endpoint = Endpoint(self, self._streamWriter.write, self.nextJsonRpcMessageId)
27 |         self._listenerThread = None
28 |         self._shutdownEvent = threading.Event()
29 |         self._serverStub = LspServerStub(self._endpoint)
30 | 
31 |     def nextJsonRpcMessageId(self):
32 |         self._nextJsonRpcMessageId += 1
33 |         if self._nextJsonRpcMessageId >= 0x7FFFFFFF:
34 |             self._nextJsonRpcMessageId = 0
35 |         return str(self._nextJsonRpcMessageId)
36 | 
37 |     def start(self):
38 |         self._listenerThread = threading.Thread(target=self._startListener, daemon=True)
39 |         self._listenerThread.start()
40 | 
41 |     def stop(self):
42 |         self._endpoint.shutdown()
43 |         self._streamReader.close()
44 |         self._streamWriter.close()
45 |         self._shutdownEvent.set()
46 |         self._listenerThread.join(timeout=30)
47 | 
48 |     def initializeServer(self):
49 |         raise NotImplementedError()
50 | 
51 |     @property
52 |     def server(self):
53 |         return self._serverStub
54 | 
55 |     def _startListener(self):
56 |         self._streamReader.listen(self._endpoint.consume)
57 | 
58 |     def m_window__show_message(self, type: MessageType, message: str):
59 |         type = MessageType(type)
60 |         _logger.info(type, message)
61 | 
62 |     def m_window__show_message_request(self, type: MessageType, message: str, actions):
63 |         type = MessageType(type)
64 |         print(type, message, actions)
65 |         return actions[0]["title"]
66 | 
67 |     def m_window__log_message(self, type: MessageType, message: str):
68 |         type = MessageType(type)
69 |         _logger.info(type, message)
70 | 
71 |     def m_text_document__publish_diagnostics(self, uri: str, diagnostics):
72 |         # ignore all diagnostic information for now.
73 |         pass
74 | 
75 |     def m_client__register_capability(self, registrations: list):
76 |         regs = [Registration.fromDict(r) for r in registrations]
77 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 120
3 | 


--------------------------------------------------------------------------------
/persper/analytics/lsp_graph_server/wildcards.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | 
 5 | def translate(pat):
 6 |     """
 7 |     Translate a shell PATTERN to a regular expression.
 8 |     There is no way to quote meta-characters.
 9 |     This version can handle **/ pattern properly, compared with fnmatch.
10 |     """
11 | 
12 |     i, n = 0, len(pat)
13 |     res = ''
14 |     while i < n:
15 |         c = pat[i]
16 |         i = i + 1
17 |         if c == '*':
18 |             if i < n and pat[i] == '*':
19 |                 res = res + '.*?'
20 |                 i = i + 1
21 |                 if i < n and pat[i] == os.sep:
22 |                     i = i + 1
23 |             else:
24 |                 res = res + r'[^\\/]+'
25 |         elif c == '?':
26 |             res = res + '.'
27 |         elif c == '[':
28 |             j = i
29 |             if j < n and pat[j] == '!':
30 |                 j = j + 1
31 |             if j < n and pat[j] == ']':
32 |                 j = j + 1
33 |             while j < n and pat[j] != ']':
34 |                 j = j + 1
35 |             if j >= n:
36 |                 res = res + '\\['
37 |             else:
38 |                 stuff = pat[i:j].replace('\\', '\\\\')
39 |                 i = j + 1
40 |                 if stuff[0] == '!':
41 |                     stuff = '^' + stuff[1:]
42 |                 elif stuff[0] == '^':
43 |                     stuff = '\\' + stuff
44 |                 res = '%s[%s]' % (res, stuff)
45 |         else:
46 |             res = res + re.escape(c)
47 |     return '(?ms)' + res + '$'
48 | 


--------------------------------------------------------------------------------
/persper/analytics/pagerank.py:
--------------------------------------------------------------------------------
 1 | from numpy import linalg as LA
 2 | import numpy as np
 3 | from scipy.sparse import coo_matrix
 4 | 
 5 | 
 6 | def pagerank(G, alpha=0.85, epsilon=1e-5, max_iters=300):
 7 |     """Memory efficient PageRank using scipy.sparse
 8 |     This function implements Algo 1. in "A Survey on PageRank Computing"
 9 |     """
10 |     ni = {}
11 |     for i, u in enumerate(G):
12 |         ni[u] = i
13 | 
14 |     num_nodes = len(G.nodes())
15 | 
16 |     row, col, data = [], [], []
17 |     for u in G:
18 |         num_out_edges = len(G[u])
19 |         if num_out_edges > 0:
20 |             w = 1 / num_out_edges
21 |             for v in G[u]:
22 |                 row.append(ni[v])
23 |                 col.append(ni[u])
24 |                 data.append(w)
25 | 
26 |     P = coo_matrix((data, (row, col)), shape=(num_nodes, num_nodes)).tocsr()
27 |     p = np.ones(num_nodes) / num_nodes
28 |     v = np.ones(num_nodes) / num_nodes
29 | 
30 |     for i in range(max_iters):
31 |         new_v = alpha * P.dot(v)
32 |         gamma = LA.norm(v, 1) - LA.norm(new_v, 1)
33 |         new_v += gamma * p
34 |         delta = LA.norm(new_v - v, 1)
35 |         if delta < epsilon:
36 |             break
37 |         v = new_v
38 | 
39 |     pr = {}
40 |     for u in G:
41 |         pr[u] = v[ni[u]]
42 | 
43 |     return pr
44 | 


--------------------------------------------------------------------------------
/persper/analytics/patch_parser.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | class PatchParser():
 5 | 
 6 |     def __init__(self):
 7 |         self.re_chunk_header = re.compile(r"""\@\@\s*
 8 |                                             \-(?P<old_start_line>\d+)(,(?P<old_num_lines>\d+))?\s*
 9 |                                             \+(?P<new_start_line>\d+)(,(?P<new_num_lines>\d+))?\s*
10 |                                             \@\@
11 |                                         """, re.VERBOSE)
12 | 
13 |     def clean(self):
14 |         self.additions = []
15 |         self.deletions = []
16 |         self.in_add, self.in_del = False, False
17 |         self.in_chunk = False
18 | 
19 |         self.add_start, self.del_start = None, None
20 |         self.add_num_lines = None
21 |         self.cur = None
22 | 
23 |     def start_add(self):
24 |         self.in_add = True
25 |         self.add_start = self.cur - 1
26 |         self.add_num_lines = 1
27 | 
28 |     def start_del(self):
29 |         self.in_del = True
30 |         self.del_start = self.cur
31 | 
32 |     def finish_add(self):
33 |         self.in_add = False
34 |         self.additions.append([self.add_start, self.add_num_lines])
35 | 
36 |     def finish_del(self):
37 |         self.in_del = False
38 |         self.deletions.append([self.del_start, self.cur - 1])
39 | 
40 |     def parse(self, text):
41 |         self.clean()
42 |         for line in text.split('\n'):
43 |             line = line.strip()
44 |             if not self.in_chunk:
45 |                 if line.startswith('@@'):
46 |                     self.in_chunk = True
47 |                 else:
48 |                     continue
49 | 
50 |             if line.startswith('@@'):
51 |                 m = self.re_chunk_header.search(line)
52 |                 self.cur = max(int(m.groups()[0]), 1)
53 |             elif line.startswith('-'):
54 |                 # print("in minus")
55 |                 if self.in_add:
56 |                     self.finish_add()
57 |                     self.start_del()
58 |                 elif self.in_del:
59 |                     pass
60 |                 else:
61 |                     self.start_del()
62 |                 self.cur += 1  # always increment in minus
63 |             elif line.startswith('+'):
64 |                 # print("in plus")
65 |                 if self.in_add:
66 |                     self.add_num_lines += 1
67 |                 elif self.in_del:
68 |                     self.finish_del()
69 |                     self.start_add()
70 |                 else:
71 |                     self.start_add()
72 |             elif line.startswith('\\'):
73 |                 # Ignore \No newline at the end of file
74 |                 pass
75 |             else:
76 |                 # print("in blank")
77 |                 if self.in_add:
78 |                     self.finish_add()
79 |                 elif self.in_del:
80 |                     self.finish_del()
81 |                 else:
82 |                     pass
83 |                 self.cur += 1  # always increment in blank
84 | 
85 |         if self.in_add:
86 |             self.finish_add()
87 |         elif self.in_del:
88 |             self.finish_del()
89 | 
90 |         return self.additions, self.deletions
91 | 


--------------------------------------------------------------------------------
/persper/analytics/score.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | import numpy as np
 3 | 
 4 | 
 5 | def normalize(scores: Dict[str, float]) -> Dict[str, float]:
 6 |     normalized_scores = {}
 7 |     score_sum = 0
 8 |     for _, score in scores.items():
 9 |         score_sum += score
10 | 
11 |     for idx in scores:
12 |         normalized_scores[idx] = scores[idx] / score_sum
13 |     return normalized_scores
14 | 
15 | 
16 | def commit_overall_scores(commit_devranks: Dict[str, float],
17 |                           clf_results: Dict[str, List[float]],
18 |                           label_weights: List[float],
19 |                           top_one=False,
20 |                           additive=False) -> Dict[str, float]:
21 |     overall_scores = {}
22 |     for sha, dr in commit_devranks.items():
23 |         assert sha in clf_results, "Commit %s does not have label."
24 |         if top_one:
25 |             top_idx = np.argmax(clf_results[sha])
26 |             category_vec = np.zeros(len(label_weights))
27 |             category_vec[top_idx] = 1
28 |         else:
29 |             category_vec = clf_results[sha]
30 | 
31 |         if additive:
32 |             overall_scores[sha] = np.dot(category_vec, label_weights) + len(commit_devranks) * dr
33 |         else:
34 |             overall_scores[sha] = np.dot(category_vec, label_weights) * dr
35 | 
36 |     return normalize(overall_scores)


--------------------------------------------------------------------------------
/persper/analytics/write_graph_to_dot.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | import matplotlib.cm as cm
 3 | 
 4 | 
 5 | def map_to_color(x, m):
 6 |     color_list = [int(255 * c) for c in m.to_rgba(x)]
 7 |     return '#%02x%02x%02x%02x' % tuple(color_list)
 8 | 
 9 | 
10 | def write_G_to_dot_with_pr(G, pr, fname, edge_attrib=None, header_lines=None):
11 |     norm = mpl.colors.Normalize(vmin=min(pr.values()), vmax=max(pr.values()))
12 |     cmap = cm.Blues
13 |     m = cm.ScalarMappable(norm, cmap=cmap)
14 |     with open(fname, 'w+') as f:
15 |         f.write('digraph graphname {\n')
16 |         if header_lines:
17 |             for line in header_lines:
18 |                 f.write(line)
19 |         for n in G.nodes(data=False):
20 |             color_str = map_to_color(pr[n], m)
21 |             f.write(
22 |                 '\"%s\" [style=filled fillcolor="%s" tooltip=\"%f\"];\n' %
23 |                 (n, color_str, pr[n]))
24 |         for e in G.edges_iter(data=True):
25 |             if edge_attrib:
26 |                 f.write('\"{}\" -> \"{}\" [ label=\"{}\"];\n'.format(
27 |                     e[0], e[1], '&#10;'.join(e[2][edge_attrib][:10])))
28 |             else:
29 |                 f.write('\"{}\" -> \"{}\";\n'.format(e[0], e[1]))
30 | 
31 |         f.write('}')
32 | 


--------------------------------------------------------------------------------
/persper/analytics2/abstractions/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Common data structures and abstractions of code analytics.
3 | """


--------------------------------------------------------------------------------
/persper/analytics2/abstractions/analyzers.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod, abstractproperty
 2 | from typing import IO, Iterable, NoReturn
 3 | 
 4 | from aenum import Enum
 5 | 
 6 | from persper.analytics2.abstractions.repository import ICommitInfo
 7 | 
 8 | 
 9 | class CommitAnalysisStopReason(Enum):
10 |     _init_ = "value __doc__"
11 |     ReachedTerminalCommit = 0, "Terminal commit has reached."
12 |     ReachedMaximumCommits = 1, "Maximum commit analysis count has reached."
13 |     FatalError = 2, "An exception occurred during analyzing a commit."
14 |     Abort = 3, "User or other external signal aborts the analysis before its completion."
15 | 
16 | 
17 | class AnalysisStatus():
18 |     """
19 |     An immutable status snapshot of meta analysis. Usually used to provide status information for `IPostAnalyzer`.
20 |     """
21 | 
22 |     def __init__(self, stop_reason: CommitAnalysisStopReason, exception: Exception,
23 |                  origin_commit_ref: str, terminal_commit_ref: str,
24 |                  analyzed_commits_ref: Iterable[str], last_commit_ref: str):
25 |         self._stop_reason = stop_reason
26 |         self._exception = exception
27 |         self._origin_commit_ref = origin_commit_ref
28 |         self._terminal_commit_ref = terminal_commit_ref
29 |         self._analyzed_commits_ref = analyzed_commits_ref
30 |         self._last_commit_ref = last_commit_ref
31 | 
32 |     @property
33 |     def stop_reason(self):
34 |         return self._stop_reason
35 | 
36 |     @property
37 |     def exception(self):
38 |         """
39 |         Gets the Exception that caused failure of analysis.
40 |         """
41 |         return self._exception
42 | 
43 |     @property
44 |     def origin_commit_ref(self):
45 |         """
46 |         Gets the commit ref of intended analysis origin.
47 |         """
48 |         return self._origin_commit_ref
49 | 
50 |     @property
51 |     def terminal_commit_ref(self):
52 |         """
53 |         Gets the commit ref of intended analysis terminal (inclusive).
54 |         """
55 |         return self._terminal_commit_ref
56 | 
57 |     @property
58 |     def analyzed_commits_ref(self):
59 |         """
60 |         Gets a list of commits that are actually analyzed completely.
61 |         remarks
62 |             The list will exclude all the commits that are skipped or failed to analyze.
63 |         """
64 |         return self._analyzed_commits_ref
65 | 
66 |     @property
67 |     def last_commit_ref(self):
68 |         """
69 |         Gets the the last commit ref being analyzed before the analysis stops.
70 |         remarks
71 |             If there are fatal error analyzing the commit, this member should be the commit that causes the error.
72 |         """
73 |         return self._last_commit_ref
74 | 
75 | 
76 | class ICommitAnalyzer(ABC):
77 |     """
78 |     Provides functionality for analyzing a single commit.
79 |     remarks
80 |         The implementation will decide where to put the analysis result.
81 |     """
82 |     @abstractmethod
83 |     def analyze(self, commit: ICommitInfo) -> None:
84 |         pass
85 | 
86 | 
87 | class IPostAnalyzer(ABC):
88 |     """
89 |     Provides functionality for doing post-analysis after the commit analysis ends due to
90 |     completion or exception.
91 |     """
92 |     @abstractmethod
93 |     def analyze(self, status: AnalysisStatus) -> None:
94 |         pass
95 | 


--------------------------------------------------------------------------------
/persper/analytics2/devrank.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from time import monotonic
 3 | from typing import Iterable
 4 | 
 5 | from persper.analytics2.abstractions.analyzers import (AnalysisStatus,
 6 |                                                        ICommitAnalyzer,
 7 |                                                        IPostAnalyzer)
 8 | from persper.analytics2.abstractions.callcommitgraph import (
 9 |     IGraphServer, IReadOnlyCallCommitGraph, IWriteOnlyCallCommitGraph)
10 | from persper.analytics2.abstractions.repository import ICommitInfo
11 | 
12 | _logger = logging.getLogger(__file__)
13 | 
14 | 
15 | class CallCommitGraphAnalyzer(ICommitAnalyzer):
16 |     def __init__(self, graph_servers: Iterable[IGraphServer], call_commit_graph: IWriteOnlyCallCommitGraph):
17 |         assert graph_servers
18 |         assert call_commit_graph
19 |         self._graph_servers = list(graph_servers)
20 |         # We only need this for flushing.
21 |         # We actually can flush the graph at a later stage.
22 |         self._call_commit_graph = call_commit_graph
23 | 
24 |     def analyze(self, commit: ICommitInfo):
25 |         assert commit
26 |         for gs in self._graph_servers:
27 |             t0 = monotonic()
28 |             _logger.info("Analyzing %s with %s...", commit, gs)
29 |             assert isinstance(gs, IGraphServer)
30 |             gs.update_graph(commit)
31 |             _logger.info("%s finished in %.2fs.", gs, monotonic() - t0)
32 |         t0 = monotonic()
33 |         self._call_commit_graph.flush()
34 |         _logger.info("Call commit graph flush used %.2fs.", monotonic() - t0)
35 | 
36 | 
37 | class DevRankAnalyzer(IPostAnalyzer):
38 |     def __init__(self, call_commit_graph: IReadOnlyCallCommitGraph):
39 |         assert call_commit_graph
40 |         self._call_commit_graph = call_commit_graph
41 | 
42 |     def analyze(self, status: AnalysisStatus):
43 |         # TODO put analysis code here.
44 |         pass
45 | 


--------------------------------------------------------------------------------
/persper/analytics2/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 120
3 | 


--------------------------------------------------------------------------------
/persper/analytics2/utilities.py:
--------------------------------------------------------------------------------
 1 | from persper.analytics2.abstractions.callcommitgraph import IWriteOnlyCallCommitGraph, NodeId
 2 | 
 3 | 
 4 | class NodeHistoryAccumulator():
 5 |     """
 6 |     Provides convenient methods for accumulating node history.
 7 |     (i.e. the added/removed lines to the same node in a single commit)
 8 |     """
 9 | 
10 |     def __init__(self):
11 |         # [NodeId]: [added_lines, removed_lines]
12 |         self._nodes = {}
13 | 
14 |     def clear(self):
15 |         """
16 |         Clears all the accumulated histroy information contained in this instance.
17 |         """
18 |         self._nodes.clear()
19 | 
20 |     def add(self, node_id: NodeId, added_lines: int = 0, removed_lines: int = 0):
21 |         """
22 |         Accumulates the added/removed lines of code to the specific node_id.
23 |         """
24 |         info = self._nodes.get(node_id, None)
25 |         if info == None:
26 |             if not isinstance(node_id, NodeId):
27 |                 raise ValueError("node_id should be NodeId.")
28 |             if not isinstance(added_lines, int):
29 |                 raise ValueError("added_lines should be int.")
30 |             if not isinstance(removed_lines, int):
31 |                 raise ValueError("removed_lines should be int.")
32 |             if added_lines != 0 or removed_lines != 0:
33 |                 info = [added_lines, removed_lines]
34 |                 self._nodes[node_id] = info
35 |         else:
36 |             info[0] += added_lines
37 |             info[1] += removed_lines
38 | 
39 |     def get(self, node_id: NodeId):
40 |         """
41 |         Gets the accumulated added/removed lines of code for the specified node ID.
42 |         returns
43 |             (added_lines: int, removed_lines: int)
44 |         """
45 |         info = self._nodes.get(node_id, None)
46 |         if info == None:
47 |             if not isinstance(node_id, NodeId):
48 |                 raise ValueError("node_id should be NodeId.")
49 |             return 0, 0
50 |         return info[0], info[1]
51 | 
52 |     def apply(self, graph: IWriteOnlyCallCommitGraph, commit_hexsha: str):
53 |         """
54 |         Applies the node history contained in this instance to the specified call commit graph.
55 |         params
56 |             graph: the call commit graph to be updated.
57 |             commit_hexsha: When updating the call commit graph, specify the current commit hexsha.
58 |         remarks
59 |             You may want to call `clear` to reset the change history after calling this method. 
60 |         """
61 |         for id, (added, removed) in self._nodes:
62 |             graph.update_node_history(id, commit_hexsha, added, removed)
63 | 


--------------------------------------------------------------------------------
/persper/util/bidict.py:
--------------------------------------------------------------------------------
 1 | # References
 2 | # https://stackoverflow.com/questions/3318625/efficient-bidirectional-hash-table-in-python?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
 3 | # https://stackoverflow.com/questions/19855156/whats-the-exact-usage-of-reduce-in-pickler?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
 4 | # https://stackoverflow.com/questions/21144845/how-can-i-unpickle-a-subclass-of-dict-that-validates-with-setitem-in-pytho?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
 5 | 
 6 | 
 7 | class bidict(dict):
 8 |     def __init__(self, *args, **kwargs):
 9 |         super(bidict, self).__init__(*args, **kwargs)
10 |         self.inverse = {}
11 |         for key, value in self.items():
12 |             self.inverse.setdefault(value, []).append(key)
13 | 
14 |     def __setitem__(self, key, value):
15 |         if key in self:
16 |             self.inverse[self[key]].remove(key)
17 |         super(bidict, self).__setitem__(key, value)
18 |         self.inverse.setdefault(value, []).append(key)
19 | 
20 |     def __delitem__(self, key):
21 |         self.inverse.setdefault(self[key], []).remove(key)
22 |         if self[key] in self.inverse and not self.inverse[self[key]]:
23 |             del self.inverse[self[key]]
24 |         super(bidict, self).__delitem__(key)
25 | 
26 |     def __getstate__(self):
27 |         return (self.inverse, dict(self))
28 | 
29 |     def __setstate__(self, state):
30 |         self.inverse, data = state
31 |         self.update(data)
32 | 
33 |     def __reduce__(self):
34 |         return (bidict, (), self.__getstate__())
35 | 


--------------------------------------------------------------------------------
/persper/util/linguist.rb:
--------------------------------------------------------------------------------
 1 | require 'rugged'
 2 | require 'linguist'
 3 | require 'json'
 4 | 
 5 | repo_path = ARGV[0]
 6 | repo = Rugged::Repository.new(repo_path)
 7 | 
 8 | project = Linguist::Repository.new(repo, repo.head.target_id)
 9 | puts JSON.pretty_generate(project.languages)
10 | 


--------------------------------------------------------------------------------
/persper/util/normalize_score.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | 
 4 | def normalize_with_coef(scores: Dict[str, float], coef=1.0) -> Dict[str, float]:
 5 |     normalized_scores = {}
 6 |     score_sum = 0
 7 |     for _, score in scores.items():
 8 |         score_sum += score
 9 | 
10 |     for idx in scores:
11 |         normalized_scores[idx] = scores[idx] / score_sum * coef
12 | 
13 |     return normalized_scores
14 | 


--------------------------------------------------------------------------------
/persper/util/path.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def get_parent_dir(path):
 5 |     return os.path.abspath(os.path.join(path, os.pardir))
 6 | 
 7 | 
 8 | root_path = get_parent_dir(
 9 |     get_parent_dir(os.path.dirname(os.path.abspath(__file__))))
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | joblib==0.11
 2 | nltk==3.2.2
 3 | openpyxl==2.3.2
 4 | pytest==3.0.7
 5 | numpy==1.12.1
 6 | networkx==1.11
 7 | matplotlib==1.5.3
 8 | requests==2.13.0
 9 | spacy==1.8.2
10 | scipy==0.18.1
11 | GitPython==2.1.10
12 | lxml==4.2.1
13 | sh==1.12.14
14 | scikit_learn==0.19.1
15 | pydotplus==2.0.2
16 | 


--------------------------------------------------------------------------------
/setup-linux-ubuntu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | sudo update-locale LANGUAGE="en_US.UTF-8"
 4 | sudo update-locale LC_ALL="en_US.UTF-8"
 5 | export LC_ALL="en_US.UTF-8"
 6 | export LC_CTYPE="en_US.UTF-8"
 7 | 
 8 | sudo apt-get update
 9 | sudo apt install -y python3 python3-pip
10 | sudo -H pip3 install --upgrade pip
11 | sudo -H pip3 install lxml networkx numpy scipy gitpython openpyxl
12 | 
13 | if [ ! -f misc/.done ]; then
14 |   cd misc/
15 |   sudo ./apply_patch.py
16 |   touch .done
17 |   cd ..
18 | fi
19 | 
20 | if [ ! -d ./repos/linux ]; then
21 |   git clone https://github.com/torvalds/linux.git ./repos/linux
22 |   git -C ./repos/linux checkout v4.10
23 | fi
24 | 
25 | if [ ! -f srcML-Ubuntu14.04-64.deb ]; then
26 |   wget http://131.123.42.38/lmcrs/beta/srcML-Ubuntu14.04-64.deb
27 |   sudo dpkg -i srcML-Ubuntu14.04-64.deb
28 |   sudo apt install -y libarchive-dev libcurl3 
29 | fi
30 | 
31 | if [ ! -d ./repos/linux-4.10-xml/ ]; then
32 |   ./graphs/srcml.py ./repos/linux ./repos/linux-4.10-xml/
33 | fi
34 | 
35 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |  
3 | setup(
4 |     name = "code-analytics",
5 |     version = "0.1b1",
6 |     install_requires = ['networkx', 'gitpython', 'numpy', 'scipy', 'requests', 'lxml', 'sklearn', 'nltk', 'openpyxl',
7 |     'pytest', 'pydot', 'ipykernel', 'matplotlib', 'antlr4-python3-runtime', 'python-jsonrpc-server', 'pytest-asyncio',
8 |     'aenum', 'pytest-cov', 'sphinx', 'python-louvain' ]
9 |     )


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | ## pytest
 2 | 
 3 | Our recommended way to run tests is through [pytest](https://docs.pytest.org/en/latest/).
 4 | 
 5 | It should have been installed if you have run `pipenv install`. Otherwise, install pytest with your favorite package manager:
 6 | 
 7 | ```bash
 8 | // pip
 9 | $ pip install -U pytest
10 | 
11 | // or conda
12 | $ conda install pytest
13 | ```
14 | 
15 | ## Run Tests
16 | 
17 | To run the entire test suite, simply:
18 | 
19 | ```
20 | cd ${root}
21 | pipenv run pytest -s test/
22 | ```
23 | 
24 | To test a specific module:
25 | 
26 | ```
27 | pipenv run pytest -s <test_module>.py
28 | ```
29 | 
30 | To learn more about how pytest detects tests, follow this [link](https://docs.pytest.org/en/latest/goodpractices.html#goodpractices).
31 | 
32 | ## Tests that are ignored
33 | 
34 | You can ignore certain tests by customizing test collection using `conftest.py`. For details, please see [here](https://docs.pytest.org/en/latest/example/pythoncollection.html#customizing-test-collection).
35 | 
36 | Here is a list of tests that are currently ignored:
37 | 
38 | 1. `test/test_analytics/test_analyzer_cpp.py`
39 | 2. `test/test_analytics/test_analyzer_lsp_ccls.py`
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/__init__.py


--------------------------------------------------------------------------------
/test/cpp_test_files_repo/A/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | using namespace std;
 3 | 
 4 | int addition (int a, int b)
 5 | {
 6 |   int r;
 7 |   r=a+b;
 8 |   return r;
 9 | }
10 | 
11 | int main ()
12 | {
13 |   int z;
14 |   z = addition (5,3);
15 |   cout << "The result is " << z;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/cpp_test_files_repo/B/main_renamed.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | using namespace std;
 3 | 
 4 | int addition (int a, int b)
 5 | {
 6 |   int r;
 7 |   r=a+b;
 8 |   return r;
 9 | }
10 | 
11 | int main ()
12 | {
13 |   int z;
14 |   z = addition (5,3);
15 |   cout << "The result is " << z;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/cpp_test_files_repo/C/another_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | using namespace std;
 3 | 
 4 | void printmessage ()
 5 | {
 6 |   cout << "I'm a function!";
 7 | }
 8 | 
 9 | int main ()
10 | {
11 |   printmessage();
12 | }
13 | 


--------------------------------------------------------------------------------
/test/cpp_test_files_repo/C/main_renamed.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | using namespace std;
 3 | 
 4 | int addition (int a, int b)
 5 | {
 6 |   int r;
 7 |   r=a+b;
 8 |   return r;
 9 | }
10 | 
11 | int main ()
12 | {
13 |   int z;
14 |   z = addition (5,3);
15 |   cout << "The result is " << z;
16 | }
17 | 


--------------------------------------------------------------------------------
/test/cpp_test_files_repo/cg.dot:
--------------------------------------------------------------------------------
1 | digraph c_test_files_repo {
2 |     A -> B -> C ;
3 | }


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/Exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdexcept>
 4 | #include <iostream>
 5 | #include <string>
 6 | 
 7 | // ????????????û????????????
 8 | class Exception : public std::exception
 9 | {
10 | 	std::string msg;
11 | public:
12 | 	// ???????????????????????
13 | 	Exception() : msg("?????????????") {}
14 | 	// ???????????????????????????
15 | 	explicit Exception(const std::string& message) : msg(message)
16 | 	{
17 | #if _DEBUG
18 | 		std::cerr << "Exception constructed: " << message << std::endl;
19 | #endif
20 | 	}
21 | 	~Exception() noexcept override { }
22 | 	const char* what() const noexcept override { return msg.c_str(); }
23 | };
24 | 
25 | // ?????????????????????????????????????????
26 | class InvalidCastException : public Exception
27 | {
28 | public:
29 | 	InvalidCastException(const std::string& message) : Exception(message) { }
30 | };
31 | 
32 | // ?????????????????????????
33 | class ArgumentException : public Exception
34 | {
35 | 	static std::string BuildMessage(const std::string& message, const std::string& argumentName)
36 | 	{
37 | 		if (argumentName.empty()) return message;
38 | 		return message + " ????????" + argumentName + "??";
39 | 	}
40 | 	std::string _ArgumentName;
41 | public:
42 | 	const std::string& ArgumentName() const { return _ArgumentName; }
43 | public:
44 | 	ArgumentException(const std::string& message)
45 | 		: Exception(message) { }
46 | 	ArgumentException(const std::string& message, const std::string& argumentName) 
47 | 	: Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { }
48 | };
49 | 
50 | // ?????????????????????????????????????????
51 | class OperationFailureException : public Exception
52 | {
53 | private:
54 | 	int _ErrorCode;
55 | public:
56 | 	explicit OperationFailureException(int errorCode)
57 | 		: Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode)
58 | 	{
59 | 	}
60 | };
61 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/TextFileParsers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/TextFileParsers.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/TextFileParsers.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/TextFileParsers.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/TypeTraits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <type_traits>
 3 | 
 4 | template <typename T>
 5 | struct is_flags : std::false_type
 6 | {
 7 | };
 8 | 
 9 | template <class T> constexpr bool is_flags_v = is_flags<T>::value;
10 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/Utility.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/Utility.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdafx.h"
 2 | #include "TextFileParsers.h"
 3 | #include "Utility.h"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main(int argc, char* argv[])
 8 | {
 9 | 	auto ifs = OpenAndValidate<ifstream>("config.txt");
10 | 	auto parser = ConfigurationParser(ifs);
11 | 	cout << parser.GetBool("testBool", false) << endl;
12 | 	return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/stdafx.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/stdafx.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/A/stdafx.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/A/stdafx.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/Exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdexcept>
 4 | #include <iostream>
 5 | #include <string>
 6 | 
 7 | // ????????????û????????????
 8 | class Exception : public std::exception
 9 | {
10 | 	std::string msg;
11 | public:
12 | 	// ???????????????????????
13 | 	Exception() : msg("?????????????") {}
14 | 	// ???????????????????????????
15 | 	explicit Exception(const std::string& message) : msg(message)
16 | 	{
17 | #if _DEBUG
18 | 		std::cerr << "Exception constructed: " << message << std::endl;
19 | #endif
20 | 	}
21 | 	~Exception() noexcept override { }
22 | 	const char* what() const noexcept override { return msg.c_str(); }
23 | };
24 | 
25 | // ?????????????????????????????????????????
26 | class InvalidCastException : public Exception
27 | {
28 | public:
29 | 	InvalidCastException(const std::string& message) : Exception(message) { }
30 | };
31 | 
32 | // ?????????????????????????
33 | class ArgumentException : public Exception
34 | {
35 | 	static std::string BuildMessage(const std::string& message, const std::string& argumentName)
36 | 	{
37 | 		if (argumentName.empty()) return message;
38 | 		return message + " ????????" + argumentName + "??";
39 | 	}
40 | 	std::string _ArgumentName;
41 | public:
42 | 	const std::string& ArgumentName() const { return _ArgumentName; }
43 | public:
44 | 	ArgumentException(const std::string& message)
45 | 		: Exception(message) { }
46 | 	ArgumentException(const std::string& message, const std::string& argumentName) 
47 | 	: Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { }
48 | };
49 | 
50 | // ?????????????????????????????????????????
51 | class OperationFailureException : public Exception
52 | {
53 | private:
54 | 	int _ErrorCode;
55 | public:
56 | 	explicit OperationFailureException(int errorCode)
57 | 		: Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode)
58 | 	{
59 | 	}
60 | };
61 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/TextFileParsers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/TextFileParsers.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/TextFileParsers.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/TextFileParsers.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/TypeTraits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <type_traits>
 3 | 
 4 | template <typename T>
 5 | struct is_flags : std::false_type
 6 | {
 7 | };
 8 | 
 9 | template <class T> constexpr bool is_flags_v = is_flags<T>::value;
10 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/Utility-1.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/Utility-1.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdafx.h"
 2 | #include "TextFileParsers.h"
 3 | #include "Utility.h"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main(int argc, char* argv[])
 8 | {
 9 | 	auto ifs = OpenAndValidate<ifstream>("config.txt");
10 | 	auto parser = ConfigurationParser(ifs);
11 | 	cout << parser.GetBool("testBool", false) << endl;
12 | 	cout << parser.GetDouble("textDouble", 1.23) << endl;
13 | 	cout << parser.GetString("rawValue", "test") << endl;
14 | 	exception testException("message");
15 | 	ReportException(testException);
16 | 	return 0;
17 | }
18 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/stdafx.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/stdafx.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/B/stdafx.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/B/stdafx.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/CppProject1.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="资源文件">
 5 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
 6 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
 7 |     </Filter>
 8 |   </ItemGroup>
 9 |   <ItemGroup>
10 |     <ClInclude Include="Exceptions.h" />
11 |     <ClInclude Include="TextFileParsers.h" />
12 |     <ClInclude Include="Utility.h" />
13 |     <ClInclude Include="stdafx.h" />
14 |     <ClInclude Include="TypeTraits.h" />
15 |   </ItemGroup>
16 |   <ItemGroup>
17 |     <ClCompile Include="main.cpp" />
18 |     <ClCompile Include="TextFileParsers.cpp" />
19 |     <ClCompile Include="Utility.cpp" />
20 |     <ClCompile Include="stdafx.cpp" />
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/Exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdexcept>
 4 | #include <iostream>
 5 | #include <string>
 6 | 
 7 | // ????????????û????????????
 8 | class Exception : public std::exception
 9 | {
10 | 	std::string msg;
11 | public:
12 | 	// ???????????????????????
13 | 	Exception() : msg("?????????????") {}
14 | 	// ???????????????????????????
15 | 	explicit Exception(const std::string& message) : msg(message)
16 | 	{
17 | #if _DEBUG
18 | 		std::cerr << "Exception constructed: " << message << std::endl;
19 | #endif
20 | 	}
21 | 	~Exception() noexcept override { }
22 | 	const char* what() const noexcept override { return msg.c_str(); }
23 | };
24 | 
25 | // ?????????????????????????????????????????
26 | class InvalidCastException : public Exception
27 | {
28 | public:
29 | 	InvalidCastException(const std::string& message) : Exception(message) { }
30 | };
31 | 
32 | // ?????????????????????????
33 | class ArgumentException : public Exception
34 | {
35 | 	static std::string BuildMessage(const std::string& message, const std::string& argumentName)
36 | 	{
37 | 		if (argumentName.empty()) return message;
38 | 		return message + " ????????" + argumentName + "??";
39 | 	}
40 | 	std::string _ArgumentName;
41 | public:
42 | 	const std::string& ArgumentName() const { return _ArgumentName; }
43 | public:
44 | 	ArgumentException(const std::string& message)
45 | 		: Exception(message) { }
46 | 	ArgumentException(const std::string& message, const std::string& argumentName) 
47 | 	: Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { }
48 | };
49 | 
50 | // ?????????????????????????????????????????
51 | class OperationFailureException : public Exception
52 | {
53 | private:
54 | 	int _ErrorCode;
55 | public:
56 | 	explicit OperationFailureException(int errorCode)
57 | 		: Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode)
58 | 	{
59 | 	}
60 | };
61 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/TextFileParsers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/TextFileParsers.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/TextFileParsers.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/TextFileParsers.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/TypeTraits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <type_traits>
 3 | 
 4 | template <typename T>
 5 | struct is_flags : std::false_type
 6 | {
 7 | };
 8 | 
 9 | template <class T> constexpr bool is_flags_v = is_flags<T>::value;
10 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/Utility.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/Utility.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdafx.h"
 2 | #include "TextFileParsers.h"
 3 | #include "Utility.h"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main(int argc, char* argv[])
 8 | {
 9 | 	auto ifs = OpenAndValidate<ifstream>("config.txt");
10 | 	auto parser = ConfigurationParser(ifs);
11 | 	cout << parser.GetBool("testBool", false) << endl;
12 | 	return 0;
13 | }
14 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/stdafx.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/stdafx.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/C/stdafx.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/C/stdafx.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/Exceptions.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdexcept>
 4 | #include <iostream>
 5 | #include <string>
 6 | 
 7 | // ????????????û????????????
 8 | class Exception : public std::exception
 9 | {
10 | 	std::string msg;
11 | public:
12 | 	// ???????????????????????
13 | 	Exception() : msg("?????????????") {}
14 | 	// ???????????????????????????
15 | 	explicit Exception(const std::string& message) : msg(message)
16 | 	{
17 | #if _DEBUG
18 | 		std::cerr << "Exception constructed: " << message << std::endl;
19 | #endif
20 | 	}
21 | 	~Exception() noexcept override { }
22 | 	const char* what() const noexcept override { return msg.c_str(); }
23 | };
24 | 
25 | // ?????????????????????????????????????????
26 | class InvalidCastException : public Exception
27 | {
28 | public:
29 | 	InvalidCastException(const std::string& message) : Exception(message) { }
30 | };
31 | 
32 | // ?????????????????????????
33 | class ArgumentException : public Exception
34 | {
35 | 	static std::string BuildMessage(const std::string& message, const std::string& argumentName)
36 | 	{
37 | 		if (argumentName.empty()) return message;
38 | 		return message + " ????????" + argumentName + "??";
39 | 	}
40 | 	std::string _ArgumentName;
41 | public:
42 | 	const std::string& ArgumentName() const { return _ArgumentName; }
43 | public:
44 | 	ArgumentException(const std::string& message)
45 | 		: Exception(message) { }
46 | 	ArgumentException(const std::string& message, const std::string& argumentName) 
47 | 	: Exception(BuildMessage(message, argumentName)), _ArgumentName(argumentName) { }
48 | };
49 | 
50 | // ?????????????????????????????????????????
51 | class OperationFailureException : public Exception
52 | {
53 | private:
54 | 	int _ErrorCode;
55 | public:
56 | 	explicit OperationFailureException(int errorCode)
57 | 		: Exception("???????????????" + std::to_string(errorCode) + "??"), _ErrorCode(errorCode)
58 | 	{
59 | 	}
60 | };
61 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/TextFileParsers.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/TextFileParsers.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/TextFileParsers.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/TextFileParsers.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/TypeTraits.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <type_traits>
 3 | 
 4 | template <typename T>
 5 | struct is_flags : std::false_type
 6 | {
 7 | };
 8 | 
 9 | template <class T> constexpr bool is_flags_v = is_flags<T>::value;
10 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/Utility.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/Utility.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "stdafx.h"
 2 | #include "TextFileParsers.h"
 3 | #include "Utility.h"
 4 | 
 5 | using namespace std;
 6 | 
 7 | int main(int argc, char* argv[])
 8 | {
 9 | 	auto ifs = ifstream("config.txt");
10 | 	string line{};
11 | 	getline(ifs, line);
12 | 	cout << line << endl;
13 | 	return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/stdafx.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/stdafx.cpp


--------------------------------------------------------------------------------
/test/cpp_test_repo/D/stdafx.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/cpp_test_repo/D/stdafx.h


--------------------------------------------------------------------------------
/test/cpp_test_repo/cg.dot:
--------------------------------------------------------------------------------
1 | digraph cpp_test_branch {
2 | 	A -> B -> C -> D;
3 | }
4 | 


--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |     ignore:inspect\.getargspec\(\) is deprecated:DeprecationWarning
4 | 


--------------------------------------------------------------------------------
/test/test_analytics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_analytics/__init__.py


--------------------------------------------------------------------------------
/test/test_analytics/baseline/analyzer_pickling/A.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 }
13 |             },
14 |             "size": null
15 |         },
16 |         "int str_len(char *string)": {
17 |             "files": [
18 |                 "main.c"
19 |             ],
20 |             "history": {
21 |                 "A": {
22 |                     "adds": 6,
23 |                     "dels": 0
24 |                 }
25 |             },
26 |             "size": null
27 |         }
28 |     }
29 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/analyzer_pickling/B.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 },
13 |                 "B": {
14 |                     "adds": 0,
15 |                     "dels": 3
16 |                 }
17 |             },
18 |             "size": null
19 |         },
20 |         "char *str_append_chr(char *string, char append)": {
21 |             "files": [
22 |                 "main.c"
23 |             ],
24 |             "history": {
25 |                 "B": {
26 |                     "adds": 3,
27 |                     "dels": 0
28 |                 }
29 |             },
30 |             "size": null
31 |         },
32 |         "int str_equals(char *equal1, char *eqaul2)": {
33 |             "files": [
34 |                 "main.c"
35 |             ],
36 |             "history": {
37 |                 "B": {
38 |                     "adds": 11,
39 |                     "dels": 0
40 |                 }
41 |             },
42 |             "size": null
43 |         },
44 |         "int str_len(char *string)": {
45 |             "files": [
46 |                 "main.c"
47 |             ],
48 |             "history": {
49 |                 "A": {
50 |                     "adds": 6,
51 |                     "dels": 0
52 |                 }
53 |             },
54 |             "size": null
55 |         }
56 |     }
57 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/analyzer_pickling/C.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "C",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "C",
 9 |             "weight": null
10 |         }
11 |     },
12 |     "nodes": {
13 |         "char *str_append(char *string, char *append)": {
14 |             "files": [
15 |                 "main.c"
16 |             ],
17 |             "history": {
18 |                 "A": {
19 |                     "adds": 7,
20 |                     "dels": 0
21 |                 },
22 |                 "B": {
23 |                     "adds": 0,
24 |                     "dels": 3
25 |                 }
26 |             },
27 |             "size": null
28 |         },
29 |         "char *str_append_chr(char *string, char append)": {
30 |             "files": [
31 |                 "main.c"
32 |             ],
33 |             "history": {
34 |                 "B": {
35 |                     "adds": 3,
36 |                     "dels": 0
37 |                 },
38 |                 "C": {
39 |                     "adds": 30,
40 |                     "dels": 4
41 |                 }
42 |             },
43 |             "size": null
44 |         },
45 |         "int str_equals(char *equal1, char *eqaul2)": {
46 |             "files": [
47 |                 "main.c"
48 |             ],
49 |             "history": {
50 |                 "B": {
51 |                     "adds": 11,
52 |                     "dels": 0
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "int str_len(char *string)": {
58 |             "files": [
59 |                 "main.c"
60 |             ],
61 |             "history": {
62 |                 "A": {
63 |                     "adds": 6,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         }
69 |     }
70 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/analyzer_pickling/D.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "D",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "D",
 9 |             "weight": null
10 |         },
11 |         "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": {
12 |             "addedBy": "G",
13 |             "weight": null
14 |         },
15 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": {
16 |             "addedBy": "G",
17 |             "weight": null
18 |         },
19 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": {
20 |             "addedBy": "G",
21 |             "weight": null
22 |         }
23 |     },
24 |     "nodes": {
25 |         "char *str_append(char *string, char *append)": {
26 |             "files": [
27 |                 "main.c"
28 |             ],
29 |             "history": {
30 |                 "A": {
31 |                     "adds": 7,
32 |                     "dels": 0
33 |                 },
34 |                 "B": {
35 |                     "adds": 0,
36 |                     "dels": 3
37 |                 }
38 |             },
39 |             "size": null
40 |         },
41 |         "char *str_append_chr(char *string, char append)": {
42 |             "files": [
43 |                 "main.c"
44 |             ],
45 |             "history": {
46 |                 "B": {
47 |                     "adds": 3,
48 |                     "dels": 0
49 |                 },
50 |                 "C": {
51 |                     "adds": 30,
52 |                     "dels": 4
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "char *str_replace(char *search, char *replace, char *subject)": {
58 |             "files": [
59 |                 "feature-G.c"
60 |             ],
61 |             "history": {
62 |                 "G": {
63 |                     "adds": 26,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         },
69 |         "int str_equals(char *equal1, char *eqaul2)": {
70 |             "files": [
71 |                 "main.c"
72 |             ],
73 |             "history": {
74 |                 "B": {
75 |                     "adds": 11,
76 |                     "dels": 0
77 |                 }
78 |             },
79 |             "size": null
80 |         },
81 |         "int str_len(char *string)": {
82 |             "files": [
83 |                 "main.c"
84 |             ],
85 |             "history": {
86 |                 "A": {
87 |                     "adds": 6,
88 |                     "dels": 0
89 |                 }
90 |             },
91 |             "size": null
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/analyzer_pickling/G.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "C",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "C",
 9 |             "weight": null
10 |         },
11 |         "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": {
12 |             "addedBy": "G",
13 |             "weight": null
14 |         },
15 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": {
16 |             "addedBy": "G",
17 |             "weight": null
18 |         },
19 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": {
20 |             "addedBy": "G",
21 |             "weight": null
22 |         }
23 |     },
24 |     "nodes": {
25 |         "char *str_append(char *string, char *append)": {
26 |             "files": [
27 |                 "main.c"
28 |             ],
29 |             "history": {
30 |                 "A": {
31 |                     "adds": 7,
32 |                     "dels": 0
33 |                 },
34 |                 "B": {
35 |                     "adds": 0,
36 |                     "dels": 3
37 |                 }
38 |             },
39 |             "size": null
40 |         },
41 |         "char *str_append_chr(char *string, char append)": {
42 |             "files": [
43 |                 "main.c"
44 |             ],
45 |             "history": {
46 |                 "B": {
47 |                     "adds": 3,
48 |                     "dels": 0
49 |                 },
50 |                 "C": {
51 |                     "adds": 30,
52 |                     "dels": 4
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "char *str_replace(char *search, char *replace, char *subject)": {
58 |             "files": [
59 |                 "feature-G.c"
60 |             ],
61 |             "history": {
62 |                 "G": {
63 |                     "adds": 26,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         },
69 |         "int str_equals(char *equal1, char *eqaul2)": {
70 |             "files": [
71 |                 "main.c"
72 |             ],
73 |             "history": {
74 |                 "B": {
75 |                     "adds": 11,
76 |                     "dels": 0
77 |                 }
78 |             },
79 |             "size": null
80 |         },
81 |         "int str_len(char *string)": {
82 |             "files": [
83 |                 "main.c"
84 |             ],
85 |             "history": {
86 |                 "A": {
87 |                     "adds": 6,
88 |                     "dels": 0
89 |                 }
90 |             },
91 |             "size": null
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch/A.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 }
13 |             },
14 |             "size": null
15 |         },
16 |         "int str_len(char *string)": {
17 |             "files": [
18 |                 "main.c"
19 |             ],
20 |             "history": {
21 |                 "A": {
22 |                     "adds": 6,
23 |                     "dels": 0
24 |                 }
25 |             },
26 |             "size": null
27 |         }
28 |     }
29 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch/B.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 },
13 |                 "B": {
14 |                     "adds": 0,
15 |                     "dels": 3
16 |                 }
17 |             },
18 |             "size": null
19 |         },
20 |         "char *str_append_chr(char *string, char append)": {
21 |             "files": [
22 |                 "main.c"
23 |             ],
24 |             "history": {
25 |                 "B": {
26 |                     "adds": 3,
27 |                     "dels": 0
28 |                 }
29 |             },
30 |             "size": null
31 |         },
32 |         "int str_equals(char *equal1, char *eqaul2)": {
33 |             "files": [
34 |                 "main.c"
35 |             ],
36 |             "history": {
37 |                 "B": {
38 |                     "adds": 11,
39 |                     "dels": 0
40 |                 }
41 |             },
42 |             "size": null
43 |         },
44 |         "int str_len(char *string)": {
45 |             "files": [
46 |                 "main.c"
47 |             ],
48 |             "history": {
49 |                 "A": {
50 |                     "adds": 6,
51 |                     "dels": 0
52 |                 }
53 |             },
54 |             "size": null
55 |         }
56 |     }
57 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch/C.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "C",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "C",
 9 |             "weight": null
10 |         }
11 |     },
12 |     "nodes": {
13 |         "char *str_append(char *string, char *append)": {
14 |             "files": [
15 |                 "main.c"
16 |             ],
17 |             "history": {
18 |                 "A": {
19 |                     "adds": 7,
20 |                     "dels": 0
21 |                 },
22 |                 "B": {
23 |                     "adds": 0,
24 |                     "dels": 3
25 |                 }
26 |             },
27 |             "size": null
28 |         },
29 |         "char *str_append_chr(char *string, char append)": {
30 |             "files": [
31 |                 "main.c"
32 |             ],
33 |             "history": {
34 |                 "B": {
35 |                     "adds": 3,
36 |                     "dels": 0
37 |                 },
38 |                 "C": {
39 |                     "adds": 30,
40 |                     "dels": 4
41 |                 }
42 |             },
43 |             "size": null
44 |         },
45 |         "int str_equals(char *equal1, char *eqaul2)": {
46 |             "files": [
47 |                 "main.c"
48 |             ],
49 |             "history": {
50 |                 "B": {
51 |                     "adds": 11,
52 |                     "dels": 0
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "int str_len(char *string)": {
58 |             "files": [
59 |                 "main.c"
60 |             ],
61 |             "history": {
62 |                 "A": {
63 |                     "adds": 6,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         }
69 |     }
70 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch/D.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "D",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "D",
 9 |             "weight": null
10 |         },
11 |         "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": {
12 |             "addedBy": "G",
13 |             "weight": null
14 |         },
15 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": {
16 |             "addedBy": "G",
17 |             "weight": null
18 |         },
19 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": {
20 |             "addedBy": "G",
21 |             "weight": null
22 |         }
23 |     },
24 |     "nodes": {
25 |         "char *str_append(char *string, char *append)": {
26 |             "files": [
27 |                 "main.c"
28 |             ],
29 |             "history": {
30 |                 "A": {
31 |                     "adds": 7,
32 |                     "dels": 0
33 |                 },
34 |                 "B": {
35 |                     "adds": 0,
36 |                     "dels": 3
37 |                 }
38 |             },
39 |             "size": null
40 |         },
41 |         "char *str_append_chr(char *string, char append)": {
42 |             "files": [
43 |                 "main.c"
44 |             ],
45 |             "history": {
46 |                 "B": {
47 |                     "adds": 3,
48 |                     "dels": 0
49 |                 },
50 |                 "C": {
51 |                     "adds": 30,
52 |                     "dels": 4
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "char *str_replace(char *search, char *replace, char *subject)": {
58 |             "files": [
59 |                 "feature-G.c"
60 |             ],
61 |             "history": {
62 |                 "G": {
63 |                     "adds": 26,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         },
69 |         "int str_equals(char *equal1, char *eqaul2)": {
70 |             "files": [
71 |                 "main.c"
72 |             ],
73 |             "history": {
74 |                 "B": {
75 |                     "adds": 11,
76 |                     "dels": 0
77 |                 }
78 |             },
79 |             "size": null
80 |         },
81 |         "int str_len(char *string)": {
82 |             "files": [
83 |                 "main.c"
84 |             ],
85 |             "history": {
86 |                 "A": {
87 |                     "adds": 6,
88 |                     "dels": 0
89 |                 }
90 |             },
91 |             "size": null
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch/G.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "C",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "C",
 9 |             "weight": null
10 |         },
11 |         "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": {
12 |             "addedBy": "G",
13 |             "weight": null
14 |         },
15 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": {
16 |             "addedBy": "G",
17 |             "weight": null
18 |         },
19 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": {
20 |             "addedBy": "G",
21 |             "weight": null
22 |         }
23 |     },
24 |     "nodes": {
25 |         "char *str_append(char *string, char *append)": {
26 |             "files": [
27 |                 "main.c"
28 |             ],
29 |             "history": {
30 |                 "A": {
31 |                     "adds": 7,
32 |                     "dels": 0
33 |                 },
34 |                 "B": {
35 |                     "adds": 0,
36 |                     "dels": 3
37 |                 }
38 |             },
39 |             "size": null
40 |         },
41 |         "char *str_append_chr(char *string, char append)": {
42 |             "files": [
43 |                 "main.c"
44 |             ],
45 |             "history": {
46 |                 "B": {
47 |                     "adds": 3,
48 |                     "dels": 0
49 |                 },
50 |                 "C": {
51 |                     "adds": 30,
52 |                     "dels": 4
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "char *str_replace(char *search, char *replace, char *subject)": {
58 |             "files": [
59 |                 "feature-G.c"
60 |             ],
61 |             "history": {
62 |                 "G": {
63 |                     "adds": 26,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         },
69 |         "int str_equals(char *equal1, char *eqaul2)": {
70 |             "files": [
71 |                 "main.c"
72 |             ],
73 |             "history": {
74 |                 "B": {
75 |                     "adds": 11,
76 |                     "dels": 0
77 |                 }
78 |             },
79 |             "size": null
80 |         },
81 |         "int str_len(char *string)": {
82 |             "files": [
83 |                 "main.c"
84 |             ],
85 |             "history": {
86 |                 "A": {
87 |                     "adds": 6,
88 |                     "dels": 0
89 |                 }
90 |             },
91 |             "size": null
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch_first_parent/A.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 }
13 |             },
14 |             "size": null
15 |         },
16 |         "int str_len(char *string)": {
17 |             "files": [
18 |                 "main.c"
19 |             ],
20 |             "history": {
21 |                 "A": {
22 |                     "adds": 6,
23 |                     "dels": 0
24 |                 }
25 |             },
26 |             "size": null
27 |         }
28 |     }
29 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch_first_parent/B.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {},
 3 |     "nodes": {
 4 |         "char *str_append(char *string, char *append)": {
 5 |             "files": [
 6 |                 "main.c"
 7 |             ],
 8 |             "history": {
 9 |                 "A": {
10 |                     "adds": 7,
11 |                     "dels": 0
12 |                 },
13 |                 "B": {
14 |                     "adds": 0,
15 |                     "dels": 3
16 |                 }
17 |             },
18 |             "size": null
19 |         },
20 |         "char *str_append_chr(char *string, char append)": {
21 |             "files": [
22 |                 "main.c"
23 |             ],
24 |             "history": {
25 |                 "B": {
26 |                     "adds": 3,
27 |                     "dels": 0
28 |                 }
29 |             },
30 |             "size": null
31 |         },
32 |         "int str_equals(char *equal1, char *eqaul2)": {
33 |             "files": [
34 |                 "main.c"
35 |             ],
36 |             "history": {
37 |                 "B": {
38 |                     "adds": 11,
39 |                     "dels": 0
40 |                 }
41 |             },
42 |             "size": null
43 |         },
44 |         "int str_len(char *string)": {
45 |             "files": [
46 |                 "main.c"
47 |             ],
48 |             "history": {
49 |                 "A": {
50 |                     "adds": 6,
51 |                     "dels": 0
52 |                 }
53 |             },
54 |             "size": null
55 |         }
56 |     }
57 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch_first_parent/C.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "C",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "C",
 9 |             "weight": null
10 |         }
11 |     },
12 |     "nodes": {
13 |         "char *str_append(char *string, char *append)": {
14 |             "files": [
15 |                 "main.c"
16 |             ],
17 |             "history": {
18 |                 "A": {
19 |                     "adds": 7,
20 |                     "dels": 0
21 |                 },
22 |                 "B": {
23 |                     "adds": 0,
24 |                     "dels": 3
25 |                 }
26 |             },
27 |             "size": null
28 |         },
29 |         "char *str_append_chr(char *string, char append)": {
30 |             "files": [
31 |                 "main.c"
32 |             ],
33 |             "history": {
34 |                 "B": {
35 |                     "adds": 3,
36 |                     "dels": 0
37 |                 },
38 |                 "C": {
39 |                     "adds": 30,
40 |                     "dels": 4
41 |                 }
42 |             },
43 |             "size": null
44 |         },
45 |         "int str_equals(char *equal1, char *eqaul2)": {
46 |             "files": [
47 |                 "main.c"
48 |             ],
49 |             "history": {
50 |                 "B": {
51 |                     "adds": 11,
52 |                     "dels": 0
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "int str_len(char *string)": {
58 |             "files": [
59 |                 "main.c"
60 |             ],
61 |             "history": {
62 |                 "A": {
63 |                     "adds": 6,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         }
69 |     }
70 | }


--------------------------------------------------------------------------------
/test/test_analytics/baseline/feature_branch_first_parent/D.g.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "edges": {
 3 |         "char *str_append_chr(char *string, char append)|->|int str_equals(char *equal1, char *eqaul2)": {
 4 |             "addedBy": "D",
 5 |             "weight": null
 6 |         },
 7 |         "char *str_append_chr(char *string, char append)|->|int str_len(char *string)": {
 8 |             "addedBy": "D",
 9 |             "weight": null
10 |         },
11 |         "char *str_replace(char *search, char *replace, char *subject)|->|char *str_append_chr(char *string, char append)": {
12 |             "addedBy": "D",
13 |             "weight": null
14 |         },
15 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_equals(char *equal1, char *eqaul2)": {
16 |             "addedBy": "D",
17 |             "weight": null
18 |         },
19 |         "char *str_replace(char *search, char *replace, char *subject)|->|int str_len(char *string)": {
20 |             "addedBy": "D",
21 |             "weight": null
22 |         }
23 |     },
24 |     "nodes": {
25 |         "char *str_append(char *string, char *append)": {
26 |             "files": [
27 |                 "main.c"
28 |             ],
29 |             "history": {
30 |                 "A": {
31 |                     "adds": 7,
32 |                     "dels": 0
33 |                 },
34 |                 "B": {
35 |                     "adds": 0,
36 |                     "dels": 3
37 |                 }
38 |             },
39 |             "size": null
40 |         },
41 |         "char *str_append_chr(char *string, char append)": {
42 |             "files": [
43 |                 "main.c"
44 |             ],
45 |             "history": {
46 |                 "B": {
47 |                     "adds": 3,
48 |                     "dels": 0
49 |                 },
50 |                 "C": {
51 |                     "adds": 30,
52 |                     "dels": 4
53 |                 }
54 |             },
55 |             "size": null
56 |         },
57 |         "char *str_replace(char *search, char *replace, char *subject)": {
58 |             "files": [
59 |                 "feature-G.c"
60 |             ],
61 |             "history": {
62 |                 "D": {
63 |                     "adds": 26,
64 |                     "dels": 0
65 |                 }
66 |             },
67 |             "size": null
68 |         },
69 |         "int str_equals(char *equal1, char *eqaul2)": {
70 |             "files": [
71 |                 "main.c"
72 |             ],
73 |             "history": {
74 |                 "B": {
75 |                     "adds": 11,
76 |                     "dels": 0
77 |                 }
78 |             },
79 |             "size": null
80 |         },
81 |         "int str_len(char *string)": {
82 |             "files": [
83 |                 "main.c"
84 |             ],
85 |             "history": {
86 |                 "A": {
87 |                     "adds": 6,
88 |                     "dels": 0
89 |                 }
90 |             },
91 |             "size": null
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/test/test_analytics/conftest.py:
--------------------------------------------------------------------------------
1 | 
2 | collect_ignore = ["test_analyzer_cpp.py", "test_analyzer_lsp_ccls.py"]
3 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example2.patch:
--------------------------------------------------------------------------------
1 | @@ -1 +1,7 @@
2 | +/*
3 | + *  linux/lib/errno.c
4 | + *
5 | + *  (C) 1991  Linus Torvalds
6 | + */
7 | +
8 |  int errno;
9 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example3.patch:
--------------------------------------------------------------------------------
 1 | @@ -6,12 +6,27 @@ int str_len(char *string)
 2 |      return count - string;
 3 |  }
 4 |  
 5 | -/* added in A*/
 6 | -char* str_append(char* string, char* append) {
 7 | +/* str_append is deleted in B */
 8 | +
 9 | +/* added in B */
10 | +char* str_append_chr(char* string, char append) {
11 |      char* newstring = NULL;
12 | -    size_t needed = snprintf(NULL, 0, "%s%s", string, append);
13 | +    size_t needed = snprintf(NULL, 0, "%s%c", string, append);
14 |      newstring = malloc(needed);
15 | -    sprintf(newstring, "%s%s", string, append);
16 | +    sprintf(newstring, "%s%c", string, append);
17 |      return newstring;
18 |  }
19 |  
20 | +/* added in B */
21 | +int str_equals(char *equal1, char *eqaul2)
22 | +{
23 | +   while(*equal1==*eqaul2)
24 | +   {
25 | +      if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
26 | +      equal1++;
27 | +      eqaul2++;
28 | +   }
29 | +   if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
30 | +   else {return -1};
31 | +}
32 | +


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example4.patch:
--------------------------------------------------------------------------------
 1 | @@ -0,0 +1,27 @@
 2 | +/* added in G */
 3 | +char* str_replace(char* search, char* replace, char* subject) {
 4 | +    char* newstring = "";
 5 | +    int i = 0;
 6 | +    for(i = 0; i < str_len(subject); i++) {
 7 | +        if (subject[i] == search[0]) {
 8 | +            int e = 0;
 9 | +            char* calc = "";
10 | +            for(e = 0; e < str_len(search); e++) {
11 | +                if(subject[i+e] == search[e]) {
12 | +                    calc = str_append_chr(calc, search[e]);
13 | +                }
14 | +            }
15 | +            if (str_equals(search, calc) == 0) {
16 | +                newstring = str_append(newstring, replace);
17 | +                i = i + str_len (search)-1;
18 | +            }
19 | +            else {
20 | +                newstring = str_append_chr(newstring, subject[i]);
21 | +            }
22 | +        }
23 | +        else {
24 | +            newstring = str_append_chr(newstring, subject[i]);
25 | +        }
26 | +    }
27 | +    return newstring;
28 | +} 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example5.patch:
--------------------------------------------------------------------------------
 1 | --- patch_error.c	2019-02-27 16:20:31.000000000 -0800
 2 | +++ new_patch_error.c	2019-02-27 16:15:41.000000000 -0800
 3 | @@ -9,4 +9,4 @@
 4 |     }
 5 |     if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
 6 |     else {return -1};
 7 | -} 
 8 | +}
 9 | \ No newline at end of file
10 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example6.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/main.go b/main.go
 2 | index 5398e6d..2a81399 100644
 3 | --- a/main.go
 4 | +++ b/main.go
 5 | @@ -9,22 +9,20 @@ type animal interface {
 6 | 
 7 |  type cat int
 8 |  type dog int
 9 | -
10 |  func (c cat) printInfo(){
11 |      fmt.Println("a cat")
12 |  }
13 | 
14 | -func (d dog) printInfo(){
15 | +func (c dog) printInfo(){
16 |      fmt.Println("a dog")
17 |  }
18 | -
19 | +func invoke(a animal){
20 | +    a.printInfo()
21 | +}
22 |  func main() {
23 | -    var a animal
24 |      var c cat
25 | -    a=c
26 | -    a.printInfo()
27 | -    //other type
28 | -    var d dog
29 | -    a=d
30 | -    a.printInfo()
31 | -}
32 | \ No newline at end of file
33 | +       var d dog
34 | +       //as value convert
35 | +       invoke(c)
36 | +       invoke(d)
37 | +}
38 | 
39 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example7.patch:
--------------------------------------------------------------------------------
 1 | --- old-feature-H.c	2019-03-13 15:49:07.000000000 -0700
 2 | +++ new-feature-H.c	2019-03-13 15:49:25.000000000 -0700
 3 | @@ -5,21 +5,38 @@
 4 |      struct node *next;
 5 |  }*head;
 6 |   
 7 | -/* added in H */
 8 | +/* added in H, edited in I */
 9 |  void append(int num)
10 |  {
11 | -    struct node *temp,*right;
12 | -    temp= (struct node *)malloc(sizeof(struct node));
13 | -    temp->data=num;
14 | -    right=(struct node *)head;
15 | -    while(right->next != NULL)
16 | -    right=right->next;
17 | -    right->next =temp;
18 | -    right=temp;
19 | -    right->next=NULL;
20 | +    struct node *temp, *prev;
21 | +    temp=head;
22 | +    while(temp!=NULL)
23 | +    {
24 | +        if(temp->data==num)
25 | +        {
26 | +            if(temp==head)
27 | +            {
28 | +                head=temp->next;
29 | +                free(temp);
30 | +                return 1;
31 | +            }
32 | +            else
33 | +            {
34 | +                prev->next=temp->next;
35 | +                free(temp);
36 | +                return 1;
37 | +            }
38 | +        }
39 | +        else
40 | +        {
41 | +            prev=temp;
42 | +            temp= temp->next;
43 | +        }
44 | +    }
45 | +    return 0;
46 |  }
47 |  
48 | -/* added in H */
49 | +/* added in H, edited in G */
50 |  void add( int num )
51 |  {
52 |      struct node *temp;
53 | @@ -30,36 +47,6 @@
54 |          head=temp;
55 |          head->next=NULL;
56 |      }
57 | -    else
58 | -    {
59 | -        temp->next=head;
60 | -        head=temp;
61 | -    }
62 |  }
63 |  
64 | -/* added in H */ 
65 | -void insert(int num)
66 | -{
67 | -    int c=0;
68 | -    struct node *temp;
69 | -    temp=head;
70 | -    if(temp==NULL)
71 | -    {
72 | -        add(num);
73 | -    }
74 | -    else
75 | -    {
76 | -        while(temp!=NULL)
77 | -        {
78 | -            if(temp->data<num)
79 | -            c++;
80 | -            temp=temp->next;
81 | -        }
82 | -        if(c==0)
83 | -            add(num);
84 | -        else if(c<count())
85 | -            addafter(num,++c);
86 | -        else
87 | -            append(num);
88 | -    }
89 | -}
90 | \ No newline at end of file
91 | +/* insert() is deleted in I */ 
92 | \ No newline at end of file
93 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example7_new.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 | 
 8 | /* added in H, edited in I */
 9 | void append(int num)
10 | {
11 |     struct node *temp, *prev;
12 |     temp=head;
13 |     while(temp!=NULL)
14 |     {
15 |         if(temp->data==num)
16 |         {
17 |             if(temp==head)
18 |             {
19 |                 head=temp->next;
20 |                 free(temp);
21 |                 return 1;
22 |             }
23 |             else
24 |             {
25 |                 prev->next=temp->next;
26 |                 free(temp);
27 |                 return 1;
28 |             }
29 |         }
30 |         else
31 |         {
32 |             prev=temp;
33 |             temp= temp->next;
34 |         }
35 |     }
36 |     return 0;
37 | }
38 | 
39 | /* added in H, edited in G */
40 | void add( int num )
41 | {
42 |     struct node *temp;
43 |     temp=(struct node *)malloc(sizeof(struct node));
44 |     temp->data=num;
45 |     if (head== NULL)
46 |     {
47 |         head=temp;
48 |         head->next=NULL;
49 |     }
50 | }
51 | 
52 | /* insert() is deleted in I */ 
53 | 


--------------------------------------------------------------------------------
/test/test_analytics/patch_test_files/example7_old.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 | 
 8 | /* added in H */
 9 | void append(int num)
10 | {
11 |     struct node *temp,*right;
12 |     temp= (struct node *)malloc(sizeof(struct node));
13 |     temp->data=num;
14 |     right=(struct node *)head;
15 |     while(right->next != NULL)
16 |     right=right->next;
17 |     right->next =temp;
18 |     right=temp;
19 |     right->next=NULL;
20 | }
21 | 
22 | /* added in H */
23 | void add( int num )
24 | {
25 |     struct node *temp;
26 |     temp=(struct node *)malloc(sizeof(struct node));
27 |     temp->data=num;
28 |     if (head== NULL)
29 |     {
30 |         head=temp;
31 |         head->next=NULL;
32 |     }
33 |     else
34 |     {
35 |         temp->next=head;
36 |         head=temp;
37 |     }
38 | }
39 | 
40 | /* added in H */ 
41 | void insert(int num)
42 | {
43 |     int c=0;
44 |     struct node *temp;
45 |     temp=head;
46 |     if(temp==NULL)
47 |     {
48 |         add(num);
49 |     }
50 |     else
51 |     {
52 |         while(temp!=NULL)
53 |         {
54 |             if(temp->data<num)
55 |             c++;
56 |             temp=temp->next;
57 |         }
58 |         if(c==0)
59 |             add(num);
60 |         else if(c<count())
61 |             addafter(num,++c);
62 |         else
63 |             append(num);
64 |     }
65 | } 
66 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_analyzer_cpp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | import subprocess
 4 | import shutil
 5 | from persper.analytics.cpp import CPPGraphServer
 6 | from persper.analytics.analyzer2 import Analyzer
 7 | from persper.analytics.graph_server import CPP_FILENAME_REGEXES
 8 | from persper.util.path import root_path
 9 | 
10 | @pytest.fixture(scope='module')
11 | def az():
12 |     # build the repo first if not exists yet
13 |     repo_path = os.path.join(root_path, 'repos/cpp_test_files_repo')
14 |     script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py')
15 |     test_src_path = os.path.join(root_path, 'test/cpp_test_files_repo')
16 | 
17 |     # Always use latest source to create test repo
18 |     if os.path.exists(repo_path):
19 |         shutil.rmtree(repo_path)
20 | 
21 |     cmd = '{} {}'.format(script_path, test_src_path)
22 |     subprocess.call(cmd, shell=True)
23 | 
24 |     return Analyzer(repo_path, CPPGraphServer(CPP_FILENAME_REGEXES))
25 | 
26 | 
27 | @pytest.mark.asyncio
28 | async def test_analyzer_files(az):
29 |     az.terminalCommit = 'A'
30 |     await az.analyze()
31 |     assert az.graph.nodes(data=True)['main']['files'] == set(['main.cpp'])
32 | 
33 |     az.terminalCommit = 'B'
34 |     await az.analyze()
35 |     assert az.graph.nodes(data=True)['main']['files'] == set(['main_renamed.cpp'])
36 | 
37 |     az.terminalCommit = 'C'
38 |     await az.analyze()
39 |     assert az.graph.nodes(data=True)['main']['files'] == set(['main_renamed.cpp', 'another_main.cpp'])
40 |     ccgraph = az.graph
41 | 
42 |     history_truth = {
43 |         'C': {
44 |             'printmessage': {'adds': 4, 'dels': 0, 'added_units': 7, 'removed_units': 0},
45 |             'main': {'adds': 4, 'dels': 0, 'added_units': 3, 'removed_units': 0}
46 |         },
47 |         'B': {},
48 |         'A': {
49 |             'addition': {'adds': 6, 'dels': 0, 'added_units': 13, 'removed_units': 0},
50 |             'main': {'adds': 6, 'dels': 0, 'added_units': 13, 'removed_units': 0}
51 |         },
52 |     }
53 | 
54 |     commits = ccgraph.commits()
55 |     for func, data in ccgraph.nodes(data=True):
56 |         history = data['history']
57 | 
58 |         for cid, chist in history.items():
59 |             message = commits[cid]['message']
60 |             assert chist == history_truth[message.strip()][func]
61 | 
62 |     edges_truth = [
63 |         ('main', 'addition'),
64 |         ('main', 'printmessage')
65 |     ]
66 |     assert set(ccgraph.edges()) == set(edges_truth)
67 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_devrank.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | from persper.analytics.devrank import devrank
 3 | 
 4 | 
 5 | def test_devrank():
 6 |     G = nx.DiGraph()
 7 |     G.add_node(1, weight=10)
 8 |     G.add_node(2, weight=10)
 9 |     G.add_edge(1, 2)
10 |     G.add_edge(2, 1)
11 |     assert devrank(G, 'weight') == {1: 0.5, 2: 0.5}
12 | 
13 |     G2 = nx.DiGraph()
14 |     G2.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 1)])
15 |     for u in G2:
16 |         G2.node[u]['weight'] = 10
17 |     assert devrank(G2, 'weight') == {1: 0.25, 2: 0.25, 3: 0.25, 4: 0.25}
18 | 
19 |     G3 = nx.DiGraph()
20 |     G3.add_edge(1, 2)
21 |     for u in G3:
22 |         G3.node[u]['weight'] = 10
23 |     dr = devrank(G3, 'weight', alpha=1.0)
24 |     assert abs(dr[1] - 0.3333) < 0.0001
25 |     assert abs(dr[2] - 0.6666) < 0.0001
26 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_diff.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from git import Repo
 4 | from persper.analytics.git_tools import diff_with_first_parent, diff_with_commit
 5 | from persper.util.path import root_path
 6 | 
 7 | 
 8 | @pytest.fixture(scope='module')
 9 | def bitcoin_repo():
10 |     repo_path = os.path.join(root_path, "repos/bitcoin")
11 |     bitcoin_url = 'https://github.com/bitcoin/bitcoin'
12 |     if not os.path.exists(repo_path):
13 |         Repo.clone_from(bitcoin_url, repo_path)
14 |     return Repo(repo_path)
15 | 
16 | 
17 | def test_diff_ignore_space(bitcoin_repo):
18 |     """
19 |     bitcoin project has a commit which only converts CRLF to LF
20 |     its diff with parent should be empty when
21 |     ignore space option is enabled
22 |     The CRLF commit: https://github.com/bitcoin/bitcoin/commit/0a61b0df1224a5470bcddab302bc199ca5a9e356
23 |     """
24 |     crlf_sha = '0a61b0df1224a5470bcddab302bc199ca5a9e356'
25 |     crlf_commit = bitcoin_repo.commit(crlf_sha)
26 |     diff_result = diff_with_first_parent(bitcoin_repo, crlf_commit)
27 |     assert len(diff_result) == 0
28 | 
29 | 
30 | def test_empty_current_commit(bitcoin_repo):
31 |     """
32 |     When rewinding to an orphaned commit, the `current_commit` passed to the `diff_with_commit` function is None.
33 |     This test case makes sure we handle this scenario without throwing an `Exception`.
34 |     `base_commit` is the first commit in the bitcoin repo, which adds 45 new files
35 |     Link: https://github.com/bitcoin/bitcoin/commit/4405b78d6059e536c36974088a8ed4d9f0f29898
36 |     """
37 |     current_commit = None
38 |     base_commit_sha = '4405b78d6059e536c36974088a8ed4d9f0f29898'
39 |     diff_index = diff_with_commit(bitcoin_repo, current_commit, base_commit_sha)
40 |     assert len(diff_index) == 45
41 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_filter_commit.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | import subprocess
 4 | import shutil
 5 | from persper.analytics.c import CGraphServer
 6 | from persper.analytics.analyzer2 import Analyzer
 7 | from persper.analytics.graph_server import C_FILENAME_REGEXES, CommitSeekingMode
 8 | from persper.util.path import root_path
 9 | 
10 | 
11 | @pytest.fixture(scope='module')
12 | def az():
13 |     # build the repo first if not exists yet
14 |     repo_path = os.path.join(root_path, 'repos/test_feature_branch')
15 |     script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py')
16 |     test_src_path = os.path.join(root_path, 'test/test_feature_branch')
17 | 
18 |     # Always use latest source to create test repo
19 |     if os.path.exists(repo_path):
20 |         shutil.rmtree(repo_path)
21 | 
22 |     cmd = '{} {}'.format(script_path, test_src_path)
23 |     subprocess.call(cmd, shell=True)
24 | 
25 |     return Analyzer(repo_path, CGraphServer(C_FILENAME_REGEXES))
26 | 
27 | 
28 | def test_analyzer_filter_monolithic_commit(az):
29 |     threshold = az._monolithic_commit_lines_threshold
30 | 
31 |     # case 1: changes above threshold, but the commit is the first commit
32 |     # expected result: merge commit
33 |     case_1_files = {
34 |         'main.c': {'lines': threshold + 1},
35 |     }
36 |     case_1_commit = MockCommit(case_1_files, 0)
37 |     case_1_seeking_mode = az._filter_monolithic_commit(case_1_commit, CommitSeekingMode.NormalForward)
38 |     assert case_1_seeking_mode == CommitSeekingMode.MergeCommit
39 | 
40 |     # case 2: changes equal to threshold, the commit has one parent commit
41 |     # expected result: normal forward
42 |     case_2_files = {
43 |         'a.c': {'lines': threshold},
44 |     }
45 |     case_2_commit = MockCommit(case_2_files, 1)
46 |     case_2_seeking_mode = az._filter_monolithic_commit(case_2_commit, CommitSeekingMode.NormalForward)
47 |     assert case_2_seeking_mode == CommitSeekingMode.NormalForward
48 | 
49 |     # case 3: changes above threshold, the commit has one parent commit
50 |     # expected result: merge commit
51 |     case_3_files = {
52 |         'a.c': {'lines': threshold},
53 |         'b.c': {'lines': 1},
54 |     }
55 |     case_3_commit = MockCommit(case_3_files, 1)
56 |     case_3_seeking_mode = az._filter_monolithic_commit(case_3_commit, CommitSeekingMode.NormalForward)
57 |     assert case_3_seeking_mode == CommitSeekingMode.MergeCommit
58 | 
59 |     # case 4: changes equal to threshold, the commit is a merge commit
60 |     # expected result: merge commit
61 |     case_4_files = {
62 |         'a.c': {'lines': threshold},
63 |     }
64 |     case_4_commit = MockCommit(case_4_files, 2)
65 |     case_4_seeking_mode = az._filter_monolithic_commit(case_4_commit, CommitSeekingMode.MergeCommit)
66 |     assert case_4_seeking_mode == CommitSeekingMode.MergeCommit
67 | 
68 | 
69 | class MockCommit:
70 |     def __init__(self, files: dict, parent_number: int = 1):
71 |         self.hexsha = 'test'
72 |         self.stats = MockCommitStats(files)
73 |         self.parents = [{}] * parent_number
74 | 
75 | 
76 | class MockCommitStats:
77 |     def __init__(self, files: dict):
78 |         self.files = files
79 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_inverse_diff.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from persper.analytics.inverse_diff import inverse_diff
 3 | 
 4 | 
 5 | def test_inverse_diff():
 6 |     # view parsing ground truth here
 7 |     # https://github.com/basicthinker/Sexain-MemController/commit/f050c6f6dd4b1d3626574b0d23bb41125f7b75ca
 8 |     adds_dels = (
 9 |         [[7, 31], [27, 3], [44, 1], [50, 2], [70, 1], [77, 2], [99, 2]],
10 |         [[32, 44], [56, 70]]
11 |     )
12 |     inv_truth = (
13 |         [[65, 13], [79, 15]],
14 |         [[8, 38], [59, 61], [66, 66], [73, 74], [80, 80], [88, 89], [112, 113]]
15 |     )
16 | 
17 |     inv_result = inverse_diff(*adds_dels)
18 |     assert inv_truth == inv_result
19 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_modularity.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from persper.analytics.call_commit_graph import CallCommitGraph
 4 | 
 5 | 
 6 | def test_modularity():
 7 |     ccgraph = CallCommitGraph()
 8 |     for i in range(0, 100):
 9 |         ccgraph.add_node("node" + str(i))
10 |     # No any edges
11 |     assert ccgraph.compute_modularity() == 0
12 | 
13 |     # Only one edge
14 |     ccgraph.add_edge("node0", "node1")
15 |     assert ccgraph.compute_modularity() == 0
16 | 
17 |     # Multiple edges
18 |     for i in range(1, 99):
19 |         ccgraph.add_edge("node" + str(i), "node" + str(i + 1))
20 |     assert int(ccgraph.compute_modularity()) == 80
21 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_multi_analyzer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from git import Repo
 4 | from persper.analytics.multi_analyzer import MultiAnalyzer
 5 | from persper.util.path import root_path
 6 | 
 7 | 
 8 | @pytest.fixture(scope='module')
 9 | def vue_repo_path():
10 |     repo_path = os.path.join(root_path, "repos/vue-realworld-example-app")
11 |     repo_url = 'https://github.com/gothinkster/vue-realworld-example-app'
12 |     if not os.path.exists(repo_path):
13 |         Repo.clone_from(repo_url, repo_path)
14 |     return repo_path
15 | 
16 | 
17 | @pytest.fixture(scope='module')
18 | def ts_repo_path():
19 |     repo_path = os.path.join(root_path, "repos/TypeScriptSamples")
20 |     repo_url = 'https://github.com/microsoft/TypeScriptSamples'
21 |     if not os.path.exists(repo_path):
22 |         Repo.clone_from(repo_url, repo_path)
23 |     return repo_path
24 | 
25 | 
26 | def test_set_linguist_vue(vue_repo_path):
27 |     # _set_linguist is called during the initialization of MultiAnalyzer
28 |     maz = MultiAnalyzer(vue_repo_path)
29 | 
30 | 
31 | def test_set_linguist_ts(ts_repo_path):
32 |     # _set_linguist is called during the initialization of MultiAnalyzer
33 |     maz = MultiAnalyzer(ts_repo_path)
34 | 


--------------------------------------------------------------------------------
/test/test_analytics/test_score.py:
--------------------------------------------------------------------------------
 1 | from persper.analytics.score import commit_overall_scores
 2 | from pytest import fixture
 3 | 
 4 | @fixture
 5 | def commit_devranks():
 6 |     # sums up to 1
 7 |     return {
 8 |         'abcdefg': 0.2,
 9 |         'bcdefgh': 0.3,
10 |         'cdefghi': 0.4,
11 |         'defghij': 0.1,
12 |     }
13 | 
14 | @fixture
15 | def clf_results():
16 |     # suppose a commit can be one of two types
17 |     return {
18 |         'abcdefg': [0.3, 0.7],
19 |         'bcdefgh': [0.9, 0.1],
20 |         'cdefghi': [0.2, 0.8],
21 |         'defghij': [0.6, 0.4],
22 |     }
23 | 
24 | @fixture
25 | def label_weights():
26 |     # the first type is twice as valuable as the second type
27 |     return [2, 1]
28 | 
29 | def test_commit_overall_scores_multiplicative(commit_devranks, clf_results, label_weights):
30 |     expected = {
31 |         'abcdefg': 0.17687074829931967,
32 |         'bcdefgh': 0.3877551020408163,
33 |         'cdefghi': 0.326530612244898,
34 |         'defghij': 0.108843537414966
35 |     }
36 | 
37 |     assert expected == commit_overall_scores(commit_devranks, clf_results, label_weights)
38 | 
39 | def test_commit_overall_scores_multiplicative_with_top_one(commit_devranks, clf_results, label_weights):
40 |     expected = {
41 |         'abcdefg': 0.14285714285714285,
42 |         'bcdefgh': 0.4285714285714285,
43 |         'cdefghi': 0.2857142857142857,
44 |         'defghij': 0.14285714285714285
45 |     }
46 | 
47 |     assert expected == commit_overall_scores(commit_devranks, clf_results, label_weights, top_one=True)
48 | 
49 | def test_commit_overall_scores_additive(commit_devranks, clf_results, label_weights):
50 |     expected = {
51 |         'abcdefg': 0.17687074829931967,
52 |         'bcdefgh': 0.3877551020408163,
53 |         'cdefghi': 0.326530612244898,
54 |         'defghij': 0.108843537414966,
55 |     }
56 | 
57 |     assert expected == commit_overall_scores(commit_devranks, clf_results, label_weights)
58 | 
59 | def test_commit_overall_scores_additive_with_top_one(commit_devranks, clf_results, label_weights, additive=True):
60 |     expected = {
61 |         'abcdefg': 0.18,
62 |         'bcdefgh': 0.32,
63 |         'cdefghi': 0.26,
64 |         'defghij': 0.24,
65 |     }
66 | 
67 |     assert expected == commit_overall_scores(commit_devranks, clf_results, label_weights, top_one=True, additive=True)


--------------------------------------------------------------------------------
/test/test_analytics/test_srcml.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from persper.analytics.srcml import src_to_tree
 3 | from persper.util.path import root_path
 4 | 
 5 | 
 6 | def test_src_to_tree():
 7 |     filename = 'patch_test_files/example.cc'
 8 |     full_path = os.path.join(root_path, 'test/test_analytics', filename)
 9 |     with open(full_path, 'r') as f:
10 |         src = f.read()
11 |     root = src_to_tree(filename, src)
12 |     assert root.attrib['filename'] == filename
13 | 


--------------------------------------------------------------------------------
/test/test_analytics/util.py:
--------------------------------------------------------------------------------
1 | 
2 | def assert_size_match_history(size, history):
3 |     size_from_history = 0
4 |     for _, csize in history.items():
5 |         size_from_history += csize
6 |     assert(size == size_from_history)
7 | 


--------------------------------------------------------------------------------
/test/test_analytics/utility/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_analytics/utility/__init__.py


--------------------------------------------------------------------------------
/test/test_analytics2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_analytics2/__init__.py


--------------------------------------------------------------------------------
/test/test_analytics2/helpers/__init__.py:
--------------------------------------------------------------------------------
 1 | from persper.analytics2.utilities import NodeHistoryAccumulator
 2 | from persper.analytics2.abstractions.callcommitgraph import NodeId
 3 | 
 4 | 
 5 | def test_node_history_accumulator():
 6 |     nodeHistory = NodeHistoryAccumulator()
 7 |     testId0 = NodeId("CTest0", "cpp")
 8 |     testId1 = NodeId("CTest1", "cpp")
 9 |     testId2 = NodeId("CTest2", "cpp")
10 |     nodeHistory.add(testId1, 10, 20)
11 |     nodeHistory.add(testId2, -10, 20)
12 |     nodeHistory.add(testId1, 5, -5)
13 |     assert nodeHistory.get(testId0) == (0, 0)
14 |     assert nodeHistory.get(testId1) == (15, 15)
15 |     assert nodeHistory.get(testId2) == (-10, 20)
16 |     # TODO test `apply` with MemoryCCG
17 |     nodeHistory.clear()
18 |     assert nodeHistory.get(testId0) == (0, 0)
19 |     assert nodeHistory.get(testId1) == (0, 0)
20 |     assert nodeHistory.get(testId2) == (0, 0)
21 | 


--------------------------------------------------------------------------------
/test/test_analytics2/setup.cfg:
--------------------------------------------------------------------------------
1 | [pep8]
2 | max-line-length = 120
3 | 


--------------------------------------------------------------------------------
/test/test_analytics2/test_callcommitgraph.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import pytest
 3 | import test.test_analytics2.helpers.callcommitgraph as ccghelper
 4 | 
 5 | #   TODO import your call commit graph implementation(s)
 6 | from persper.analytics2.memorycallcommitgraph import MemoryCallCommitGraph
 7 | from persper.util.path import root_path
 8 | 
 9 | 
10 | def test_memory_call_commit_graph():
11 |     ccg = MemoryCallCommitGraph()
12 |     ccghelper.test_call_commit_graph(ccg)
13 |     serialized = ccg.serialize()
14 |     print("Serialized:", serialized)
15 |     assert isinstance(serialized, str)
16 |     ccg2 = MemoryCallCommitGraph.deserialize(serialized)
17 |     ccghelper.assert_graph_same(ccg, ccg2)
18 | 


--------------------------------------------------------------------------------
/test/test_analytics2/test_metaanalyzer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from itertools import islice
 3 | from test.test_analytics2.utilities import prepare_repository
 4 | 
 5 | from persper.analytics2.abstractions.analyzers import (
 6 |     AnalysisStatus, CommitAnalysisStopReason, ICommitAnalyzer, IPostAnalyzer)
 7 | from persper.analytics2.abstractions.repository import ICommitInfo
 8 | from persper.analytics2.metaanalyzer import MetaAnalyzer
 9 | from persper.analytics2.repository import GitRepository
10 | 
11 | _logger = logging.getLogger(__file__)
12 | 
13 | 
14 | class DummyCommitAnalyzer(ICommitAnalyzer):
15 |     def __init__(self, raiseExceptionAtIndex=-1):
16 |         self.analyzedCommits = []
17 |         self._raiseExceptionAtIndex = raiseExceptionAtIndex
18 | 
19 |     def analyze(self, commit: ICommitInfo) -> None:
20 |         assert commit
21 |         index = self.analyzedCommits
22 |         print("Current commit #{0}, hexsha {1}", index, commit.hexsha)
23 |         if index == self._raiseExceptionAtIndex:
24 |             raise Exception("Raised exception at commit #{0}.".format(index))
25 |         self.analyzedCommits.append(commit.hexsha)
26 | 
27 | 
28 | class DummyPostAnalyzer(IPostAnalyzer):
29 |     def __init__(self):
30 |         self.status = None
31 | 
32 |     def analyze(self, status: AnalysisStatus) -> None:
33 |         self.status = status
34 | 
35 | 
36 | def test_meta_analyzer():
37 |     repoPath = prepare_repository("test_feature_branch")
38 |     repo = GitRepository(repoPath)
39 |     ca = DummyCommitAnalyzer()
40 |     pa = DummyPostAnalyzer()
41 |     ma = MetaAnalyzer(repo, [ca], [pa], origin_commit=None, terminal_commit="HEAD", analyzed_commits=())
42 |     status = ma.analyze(100)
43 |     assert status == pa.status
44 | 
45 |     commits = [c.hexsha for c in islice(repo.enum_commits(None, "HEAD"), 101)]
46 |     if len(commits) <= 100:
47 |         assert pa.status.stop_reason == CommitAnalysisStopReason.ReachedTerminalCommit
48 |     else:
49 |         assert pa.status.stop_reason == CommitAnalysisStopReason.ReachedMaximumCommits
50 |     commits = commits[:100]
51 |     assert ca.analyzedCommits == commits
52 |     assert status.analyzed_commits_ref == commits
53 |     assert status.origin_commit_ref == None
54 |     assert status.terminal_commit_ref == "HEAD"
55 |     assert status.last_commit_ref == commits[-1]
56 |     assert status.exception == None
57 | 
58 |     if len(commits) < 2:
59 |         _logger.warning("Skipped exception test because it needs repository have at least 2 commits.")
60 |         exceptionIndex = len(commits)//2
61 |         ca = DummyCommitAnalyzer(raiseExceptionAtIndex=exceptionIndex)
62 |         pa = DummyPostAnalyzer()
63 |         ma = MetaAnalyzer(repo, [ca], [pa], origin_commit=None, terminal_commit="HEAD", analyzed_commits=())
64 |         status = ma.analyze(100)
65 |         assert status == pa.status
66 |         assert status.stop_reason == CommitAnalysisStopReason.FatalError
67 |         assert isinstance(status.exception, Exception)
68 |         assert status.analyzed_commits_ref == commits[:exceptionIndex]
69 |         assert status.origin_commit_ref == None
70 |         assert status.terminal_commit_ref == "HEAD"
71 |         assert status.last_commit_ref == commits[exceptionIndex]
72 | 


--------------------------------------------------------------------------------
/test/test_analytics2/test_repository.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import subprocess
 3 | import test.test_analytics2.helpers.repository as repositoryhelper
 4 | 
 5 | from persper.analytics2.repository import GitRepository
 6 | from persper.util.path import root_path
 7 | 
 8 | 
 9 | def prepare_repository(repo_name: str):
10 |     # build the repo first if not exists yet
11 |     repo_path = os.path.join(root_path, 'repos/' + repo_name)
12 |     script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py')
13 |     test_src_path = os.path.join(root_path, 'test/' + repo_name)
14 |     if not os.path.isdir(repo_path):
15 |         cmd = '{} {}'.format(script_path, test_src_path)
16 |         subprocess.call(cmd, shell=True)
17 |     print("Repository path: ", repo_path)
18 |     return repo_path
19 | 
20 | 
21 | def test_git_repository():
22 |     repoPath = prepare_repository("test_feature_branch")
23 |     # TODO introduce some really complex repo, such as
24 |     # repoPath = r"F:\WRS\testrepos\ccls"
25 |     repo = GitRepository(repoPath)
26 |     repositoryhelper.test_repository_history_provider(repo)
27 | 


--------------------------------------------------------------------------------
/test/test_analytics2/utilities.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import subprocess
 3 | import test.test_analytics2.helpers.repository as repositoryhelper
 4 | 
 5 | from persper.analytics2.repository import GitRepository
 6 | from persper.util.path import root_path
 7 | 
 8 | 
 9 | def prepare_repository(repo_name: str):
10 |     # build the repo first if not exists yet
11 |     repo_path = os.path.join(root_path, 'repos/' + repo_name)
12 |     script_path = os.path.join(root_path, 'tools/repo_creater/create_repo.py')
13 |     test_src_path = os.path.join(root_path, 'test/' + repo_name)
14 |     if not os.path.isdir(repo_path):
15 |         cmd = '{} {}'.format(script_path, test_src_path)
16 |         subprocess.call(cmd, shell=True)
17 |     print("Repository path: ", repo_path)
18 |     return repo_path
19 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/A/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* added in A*/
10 | char* str_append(char* string, char* append) {
11 |     char* newstring = NULL;
12 |     size_t needed = snprintf(NULL, 0, "%s%s", string, append);
13 |     newstring = malloc(needed);
14 |     sprintf(newstring, "%s%s", string, append);
15 |     return newstring;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/B/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | /* added in B */
12 | char* str_append_chr(char* string, char append) {
13 |     char* newstring = NULL;
14 |     size_t needed = snprintf(NULL, 0, "%s%c", string, append);
15 |     newstring = malloc(needed);
16 |     sprintf(newstring, "%s%c", string, append);
17 |     return newstring;
18 | }
19 | 
20 | /* added in B */
21 | int str_equals(char *equal1, char *eqaul2)
22 | {
23 |    while(*equal1==*eqaul2)
24 |    {
25 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
26 |       equal1++;
27 |       eqaul2++;
28 |    }
29 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
30 |    else {return -1};
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/C/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl.
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/D/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/D/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/E/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/E/feature-H.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 |  
 8 | /* added in H, edited in I */
 9 | void append(int num)
10 | {
11 |     struct node *temp, *prev;
12 |     temp=head;
13 |     while(temp!=NULL)
14 |     {
15 |         if(temp->data==num)
16 |         {
17 |             if(temp==head)
18 |             {
19 |                 head=temp->next;
20 |                 free(temp);
21 |                 return 1;
22 |             }
23 |             else
24 |             {
25 |                 prev->next=temp->next;
26 |                 free(temp);
27 |                 return 1;
28 |             }
29 |         }
30 |         else
31 |         {
32 |             prev=temp;
33 |             temp= temp->next;
34 |         }
35 |     }
36 |     return 0;
37 | }
38 | 
39 | /* added in H, edited in G */
40 | void add( int num )
41 | {
42 |     struct node *temp;
43 |     temp=(struct node *)malloc(sizeof(struct node));
44 |     temp->data=num;
45 |     if (head== NULL)
46 |     {
47 |         head=temp;
48 |         head->next=NULL;
49 |     }
50 | }
51 | 
52 | /* insert() is deleted in I */ 
53 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/E/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/F/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/F/feature-H.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 |  
 8 | /* added in H, edited in I */
 9 | void append(int num)
10 | {
11 |     struct node *temp, *prev;
12 |     temp=head;
13 |     while(temp!=NULL)
14 |     {
15 |         if(temp->data==num)
16 |         {
17 |             if(temp==head)
18 |             {
19 |                 head=temp->next;
20 |                 free(temp);
21 |                 return 1;
22 |             }
23 |             else
24 |             {
25 |                 prev->next=temp->next;
26 |                 free(temp);
27 |                 return 1;
28 |             }
29 |         }
30 |         else
31 |         {
32 |             prev=temp;
33 |             temp= temp->next;
34 |         }
35 |     }
36 |     return 0;
37 | }
38 | 
39 | /* added in H, edited in G */
40 | void add( int num )
41 | {
42 |     struct node *temp;
43 |     temp=(struct node *)malloc(sizeof(struct node));
44 |     temp->data=num;
45 |     if (head== NULL)
46 |     {
47 |         head=temp;
48 |         head->next=NULL;
49 |     }
50 | }
51 | 
52 | /* insert() is deleted in I */ 
53 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/F/feature-J.c:
--------------------------------------------------------------------------------
 1 | /* added in J */
 2 | void display(struct node *r)
 3 | {
 4 |     r=head;
 5 |     if(r==NULL)
 6 |     {
 7 |         return;
 8 |     }
 9 |     while(r!=NULL)
10 |     {
11 |         printf("%d ",r->data);
12 |         r=r->next;
13 |     }
14 |     printf("\n");
15 | }
16 |  
17 | /* added in J */ 
18 | int count()
19 | {
20 |     struct node *n;
21 |     int c=0;
22 |     n=head;
23 |     while(n!=NULL)
24 |     {
25 |         n=n->next;
26 |         c++;
27 |     }
28 |     return c;
29 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/F/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/G/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/G/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B*/
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = NULL;
16 |     size_t needed = snprintf(NULL, 0, "%s%c", string, append);
17 |     newstring = malloc(needed);
18 |     sprintf(newstring, "%s%c", string, append);
19 |     return newstring;
20 | }
21 | 
22 | /* added in B */
23 | int str_equals(char *equal1, char *eqaul2)
24 | {
25 |    while(*equal1==*eqaul2)
26 |    {
27 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
28 |       equal1++;
29 |       eqaul2++;
30 |    }
31 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
32 |    else {return -1};
33 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/H/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/H/feature-H.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 |  
 8 | /* added in H */
 9 | void append(int num)
10 | {
11 |     struct node *temp,*right;
12 |     temp= (struct node *)malloc(sizeof(struct node));
13 |     temp->data=num;
14 |     right=(struct node *)head;
15 |     while(right->next != NULL)
16 |     right=right->next;
17 |     right->next =temp;
18 |     right=temp;
19 |     right->next=NULL;
20 | }
21 | 
22 | /* added in H */
23 | void add( int num )
24 | {
25 |     struct node *temp;
26 |     temp=(struct node *)malloc(sizeof(struct node));
27 |     temp->data=num;
28 |     if (head== NULL)
29 |     {
30 |         head=temp;
31 |         head->next=NULL;
32 |     }
33 |     else
34 |     {
35 |         temp->next=head;
36 |         head=temp;
37 |     }
38 | }
39 | 
40 | /* added in H */ 
41 | void insert(int num)
42 | {
43 |     int c=0;
44 |     struct node *temp;
45 |     temp=head;
46 |     if(temp==NULL)
47 |     {
48 |         add(num);
49 |     }
50 |     else
51 |     {
52 |         while(temp!=NULL)
53 |         {
54 |             if(temp->data<num)
55 |             c++;
56 |             temp=temp->next;
57 |         }
58 |         if(c==0)
59 |             add(num);
60 |         else if(c<count())
61 |             addafter(num,++c);
62 |         else
63 |             append(num);
64 |     }
65 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/H/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/I/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/I/feature-H.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 |  
 8 | /* added in H, edited in I */
 9 | void append(int num)
10 | {
11 |     struct node *temp, *prev;
12 |     temp=head;
13 |     while(temp!=NULL)
14 |     {
15 |         if(temp->data==num)
16 |         {
17 |             if(temp==head)
18 |             {
19 |                 head=temp->next;
20 |                 free(temp);
21 |                 return 1;
22 |             }
23 |             else
24 |             {
25 |                 prev->next=temp->next;
26 |                 free(temp);
27 |                 return 1;
28 |             }
29 |         }
30 |         else
31 |         {
32 |             prev=temp;
33 |             temp= temp->next;
34 |         }
35 |     }
36 |     return 0;
37 | }
38 | 
39 | /* added in H, edited in G */
40 | void add( int num )
41 | {
42 |     struct node *temp;
43 |     temp=(struct node *)malloc(sizeof(struct node));
44 |     temp->data=num;
45 |     if (head== NULL)
46 |     {
47 |         head=temp;
48 |         head->next=NULL;
49 |     }
50 | }
51 | 
52 | /* insert() is deleted in I */ 
53 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/I/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/J/feature-J.c:
--------------------------------------------------------------------------------
 1 | /* added in J */
 2 | void display(struct node *r)
 3 | {
 4 |     r=head;
 5 |     if(r==NULL)
 6 |     {
 7 |         return;
 8 |     }
 9 |     while(r!=NULL)
10 |     {
11 |         printf("%d ",r->data);
12 |         r=r->next;
13 |     }
14 |     printf("\n");
15 | }
16 |  
17 | /* added in J */ 
18 | int count()
19 | {
20 |     struct node *n;
21 |     int c=0;
22 |     n=head;
23 |     while(n!=NULL)
24 |     {
25 |         n=n->next;
26 |         c++;
27 |     }
28 |     return c;
29 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/J/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* added in A*/
10 | char* str_append(char* string, char* append) {
11 |     char* newstring = NULL;
12 |     size_t needed = snprintf(NULL, 0, "%s%s", string, append);
13 |     newstring = malloc(needed);
14 |     sprintf(newstring, "%s%s", string, append);
15 |     return newstring;
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/K/feature-G.c:
--------------------------------------------------------------------------------
 1 | /* added in G */
 2 | char* str_replace(char* search, char* replace, char* subject) {
 3 |     char* newstring = "";
 4 |     int i = 0;
 5 |     for(i = 0; i < str_len(subject); i++) {
 6 |         if (subject[i] == search[0]) {
 7 |             int e = 0;
 8 |             char* calc = "";
 9 |             for(e = 0; e < str_len(search); e++) {
10 |                 if(subject[i+e] == search[e]) {
11 |                     calc = str_append_chr(calc, search[e]);
12 |                 }
13 |             }
14 |             if (str_equals(search, calc) == 0) {
15 |                 newstring = str_append(newstring, replace);
16 |                 i = i + str_len (search)-1;
17 |             }
18 |             else {
19 |                 newstring = str_append_chr(newstring, subject[i]);
20 |             }
21 |         }
22 |         else {
23 |             newstring = str_append_chr(newstring, subject[i]);
24 |         }
25 |     }
26 |     return newstring;
27 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/K/feature-H.c:
--------------------------------------------------------------------------------
 1 | /* added in H */
 2 | struct node
 3 | {
 4 |     int data;
 5 |     struct node *next;
 6 | }*head;
 7 |  
 8 | /* added in H, edited in I */
 9 | void append(int num)
10 | {
11 |     struct node *temp, *prev;
12 |     temp=head;
13 |     while(temp!=NULL)
14 |     {
15 |         if(temp->data==num)
16 |         {
17 |             if(temp==head)
18 |             {
19 |                 head=temp->next;
20 |                 free(temp);
21 |                 return 1;
22 |             }
23 |             else
24 |             {
25 |                 prev->next=temp->next;
26 |                 free(temp);
27 |                 return 1;
28 |             }
29 |         }
30 |         else
31 |         {
32 |             prev=temp;
33 |             temp= temp->next;
34 |         }
35 |     }
36 |     return 0;
37 | }
38 | 
39 | /* added in H, edited in G */
40 | void add( int num )
41 | {
42 |     struct node *temp;
43 |     temp=(struct node *)malloc(sizeof(struct node));
44 |     temp->data=num;
45 |     if (head== NULL)
46 |     {
47 |         head=temp;
48 |         head->next=NULL;
49 |     }
50 | }
51 | 
52 | /* insert() is deleted in I */ 
53 | 


--------------------------------------------------------------------------------
/test/test_feature_branch/K/feature-K.c:
--------------------------------------------------------------------------------
 1 | /* added in J, edited in K */
 2 | void display(struct node *r)
 3 | {
 4 |     r=head;
 5 |     if(r==NULL)
 6 |     {
 7 |         return;
 8 |     }
 9 |     printf("\n");
10 | }
11 |  
12 | /* added in J */ 
13 | int count()
14 | {
15 |     struct node *n;
16 |     int c=0;
17 |     n=head;
18 |     while(n!=NULL)
19 |     {
20 |         n=n->next;
21 |         c++;
22 |     }
23 |     return c;
24 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/K/main.c:
--------------------------------------------------------------------------------
 1 | /* added in A */
 2 | int str_len(char *string)
 3 | {
 4 |     char *count = string;
 5 |     while(*count) {count++;}
 6 |     return count - string;
 7 | }
 8 | 
 9 | /* str_append is deleted in B */
10 | 
11 | int str_equals(char *equal1, char *eqaul2); // Forward decl
12 | 
13 | /* added in B, edited in C */
14 | char* str_append_chr(char* string, char append) {
15 |     char* newstring = "";
16 |     int i = 0;
17 |     for(i = 0; i < str_len(subject); i++) {
18 |         if (subject[i] == search[0]) {
19 |             int e = 0;
20 |             char* calc = "";
21 |             for(e = 0; e < str_len(search); e++) {
22 |                 if(subject[i+e] == search[e]) {
23 |                     calc = str_append_chr(calc, search[e]);
24 |                 }
25 |             }
26 |             if (str_equals(search, calc) == 0) {
27 |                 if(count > 0) {
28 |                     newstring = str_append(newstring, replace);
29 |                     i = i + str_len (search)-1;
30 |                     count = count - 1;
31 |                 }
32 |                 else {
33 |                     newstring = str_append_chr(newstring, subject[i]);
34 |                 }
35 |                  
36 |             }
37 |             else {
38 |                 newstring = str_append_chr(newstring, subject[i]);
39 |             }
40 |         }
41 |         else {
42 |             newstring = str_append_chr(newstring, subject[i]);
43 |         }
44 |     }
45 |     return newstring;
46 | }
47 | 
48 | /* added in B */
49 | int str_equals(char *equal1, char *eqaul2)
50 | {
51 |    while(*equal1==*eqaul2)
52 |    {
53 |       if ( *equal1 == '\0' || *eqaul2 == '\0' ){break;}
54 |       equal1++;
55 |       eqaul2++;
56 |    }
57 |    if(*eqaul1 == '\0' && *eqaul2 == '\0' ){return 0;}
58 |    else {return -1};
59 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/cg.dot:
--------------------------------------------------------------------------------
1 | digraph test_feature_branch {
2 | 	A -> B -> C -> D ->           E -> F -> K;
3 | 	     B -> G -> D 							           ;
4 | 								 D -> H -> I -> E          ;
5 | 	A -> J -> 										     F     ;
6 | }


--------------------------------------------------------------------------------
/test/test_feature_branch/cg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/test/test_feature_branch/cg.png


--------------------------------------------------------------------------------
/tools/build_history.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import sys
 6 | import pickle
 7 | import subprocess
 8 | from git import Repo
 9 | from persper.graphs.analyzer import Analyzer
10 | from persper.graphs.c import CGraph
11 | from persper.util.path import root_path
12 | 
13 | 
14 | def usage(cmd):
15 |     print("Usage: {0} [i]".format(cmd))
16 |     print("\tBuild history for data/branch_commits_chunk[i].pickle")
17 | 
18 | 
19 | def run(i):
20 |     repo_path = os.path.join(root_path, 'repos/linux-complete')
21 |     pickle_path = os.path.join(
22 |         root_path, 'data/branch_commits_chunk' + i + '.pickle')
23 |     with open(pickle_path, 'rb') as f:
24 |         sha_lst = pickle.load(f)
25 | 
26 |     az = Analyzer(repo_path, CGraph())
27 |     r = Repo(repo_path)
28 |     chunk_commits = [r.commit(sha) for sha in sha_lst]
29 |     az.build_history(chunk_commits, phase='history-chunk-' + i)
30 | 
31 | 
32 | def main():
33 |     if len(sys.argv) == 2:
34 |         i = sys.argv[1]
35 |         run(i)
36 |     else:
37 |         usage(sys.argv[0])
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/tools/excel_charts/distance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import math
 4 | 
 5 | 
 6 | def deviation(map1, map2, index):
 7 |     n = len(map1)
 8 |     assert len(map2) == n
 9 |     var = 0
10 |     for func, values in map1.items():
11 |         var += (values[index] - map2.get(func, values)[index])**2
12 |     return math.sqrt(var / n)
13 | 
14 | 
15 | def pair_changes(map1, map2, index):
16 |     n = len(map1)
17 |     assert len(map2) == n
18 |     p = 0
19 |     keys = list(map1.keys())
20 |     for i in range(n - 1):
21 |         for j in range(i + 1, n):
22 |             d1 = map1[keys[i]][index] - map1[keys[j]][index]
23 |             d2 = map2[keys[i]][index] - map2[keys[j]][index]
24 |             if d1 == 0 and d2 == 0:
25 |                 continue
26 |             elif d1 == 0 or d2 == 0:
27 |                 p += 1
28 |             elif d1 * d2 < 0:
29 |                 p += 1
30 |     return p
31 | 


--------------------------------------------------------------------------------
/tools/excel_charts/excel.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from openpyxl import Workbook
 4 | 
 5 | 
 6 | def fillout(worksheet, position, data):
 7 |     row = position[0]
 8 |     column = position[1]
 9 |     for i, array in enumerate(data):
10 |         for j, value in enumerate(array):
11 |             worksheet.cell(row=row + i, column=column + j, value=value)
12 | 
13 | 
14 | def fillin(worksheet, position, num_rows, num_columns):
15 |     row = position[0]
16 |     column = position[1] 
17 |     data = [[None for _ in range(num_columns)] for _ in range(num_rows)]
18 |     for i in range(num_rows):
19 |         for j in range(num_columns):
20 |             data[i][j] = worksheet.cell(row=row + i, column=column + j).value
21 |     return data
22 | 
23 | 
24 | def cell(worksheet, position, step_row=0, step_column=0, index=0):
25 |     row = position[0]
26 |     column = position[1]
27 |     return worksheet.cell(row=row + index * step_row,
28 |                           column=column + index * step_column)
29 | 
30 | 
31 | def sheet(workbook, sheet_name):
32 |     try:
33 |         return workbook[sheet_name]
34 |     except KeyError:
35 |         return None
36 | 
37 | 
38 | def main():
39 |     wb = Workbook()
40 |     ws = wb.active
41 |     data = [[x] for x in range(10)]
42 |     fillout(ws, (1, 1), data)
43 | 
44 |     data = [[x, 2 * x] for x in range(10)]
45 |     fillout(ws, (2, 2), data)
46 | 
47 |     data = [['Sheet1 rank distance', 'Sheet1 value distance'],
48 |             [0.0, 0.5], [1.0, 0.5]]
49 |     fillout(ws, (1, 2), data)
50 | 
51 |     print(sheet(wb, 'Sheet'))
52 |     print(sheet(wb, 'InvalidSheetName'))
53 | 
54 |     print(fillin(ws, (1, 1), 10, 1))
55 |     print(fillin(ws, (2, 2), 10, 2))
56 | 
57 |     i = 0
58 |     while True:
59 |         c = cell(ws, (2, 1), step_column=2, index=i)
60 |         if c.value is None:
61 |             break
62 |         print(c.value)
63 |         i += 1
64 | 
65 |     wb.save('check.xlsx')
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/tools/excel_charts/gini/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/tools/excel_charts/gini/README.md:
--------------------------------------------------------------------------------
 1 | # gini
 2 | A Gini coefficient calculator in Python.
 3 | 
 4 | ## Overview
 5 | This is a function that calculates the Gini coefficient of a numpy array. Gini coefficients are often used to quantify income inequality, read more [here](http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm).
 6 | 
 7 | The function in ```gini.py``` is based on the third equation from [here](http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm), which defines the Gini coefficient as:
 8 | 
 9 | ![G = \dfrac{ \sum_{i=1}^{n} (2i - n - 1) x_i}{n  \sum_{i=1}^{n} x_i}](https://github.com/oliviaguest/gini/raw/master/gini.png "Gini equation")
10 | 
11 | 
12 | ## Examples
13 | For a very unequal sample, 999 zeros and a single one,
14 | ```
15 | >>> from gini import *
16 | >>> a = np.zeros((1000))
17 | >>> a[0] = 1.0
18 | ```
19 | the Gini coefficient is very close to 1.0:
20 | ```
21 | >>> gini(a)
22 | 0.99890010998900103
23 | ```
24 | 
25 | For uniformly distributed random numbers, it will be low, around 0.33:
26 | ```
27 | >>> s = np.random.uniform(-1,0,1000)
28 | >>> gini(s)
29 | 0.3295183767105907
30 | ```
31 | 
32 | For a homogeneous sample, the Gini coefficient is 0.0:
33 | ```
34 | >>> b = np.ones((1000))
35 | >>> gini(b)
36 | 0.0
37 | ```
38 | 
39 | ## Input Assumptions
40 | The Gini calculation by definition requires non-zero positive (ascending-order) sorted values within a 1d vector. This is dealt with within [```gini()```](https://github.com/oliviaguest/gini/blob/master/gini.py). So these four assumptions can be violated, as they are controlled for:
41 | ``` python
42 | def gini(array):
43 |     """Calculate the Gini coefficient of a numpy array."""
44 |     # based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
45 |     # from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
46 |     array = array.flatten() #all values are treated equally, arrays must be 1d
47 |     if np.amin(array) < 0:
48 |         array -= np.amin(array) #values cannot be negative
49 |     array += 0.0000001 #values cannot be 0
50 |     array = np.sort(array) #values must be sorted
51 |     index = np.arange(1,array.shape[0]+1) #index per array element
52 |     n = array.shape[0]#number of array elements
53 |     return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array))) #Gini coefficient
54 | ```
55 | 
56 | ## Notes
57 | * It is significantly faster than (the [current implementation of](https://github.com/pysal/pysal/issues/855)) PySAL's Gini coefficient function (see  [pysal.inequality.gini](http://pysal.readthedocs.io/en/latest/_modules/pysal/inequality/gini.html)) and outputs are indistinguishable before approximately 6 decimal places. In other words, the two functions are arithmetically identical.
58 | 
59 | * It is slightly faster than the [Gini coefficient function by David on Ellipsix](http://www.ellipsix.net/blog/2012/11/the-gini-coefficient-for-distribution-inequality.html).
60 | 
61 | Many other Gini coefficient functions found online do not produce equivalent results, hence why I wrote this.
62 | 


--------------------------------------------------------------------------------
/tools/excel_charts/gini/gini.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/gini/gini.png


--------------------------------------------------------------------------------
/tools/excel_charts/gini/gini.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def gini(array):
 4 |     """Calculate the Gini coefficient of a numpy array."""
 5 |     # based on bottom eq:
 6 |     # http://www.statsdirect.com/help/generatedimages/equations/equation154.svg
 7 |     # from:
 8 |     # http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm
 9 |     # All values are treated equally, arrays must be 1d:
10 |     array = array.flatten()
11 |     if np.amin(array) < 0:
12 |         # Values cannot be negative:
13 |         array -= np.amin(array)
14 |     # Values cannot be 0:
15 |     array += 0.0000001
16 |     # Values must be sorted:
17 |     array = np.sort(array)
18 |     # Index per array element:
19 |     index = np.arange(1,array.shape[0]+1)
20 |     # Number of array elements:
21 |     n = array.shape[0]
22 |     # Gini coefficient:
23 |     return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))
24 | 


--------------------------------------------------------------------------------
/tools/excel_charts/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/tests/__init__.py


--------------------------------------------------------------------------------
/tools/excel_charts/tests/draw_charts_test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Persper/code-analytics/3a2eb076153e29bc49b8e67265e04a5321e90af0/tools/excel_charts/tests/draw_charts_test.xlsx


--------------------------------------------------------------------------------
/tools/excel_charts/tests/test_distance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import distance
 4 | import math
 5 | from statistics import mean
 6 | from statistics import pstdev
 7 | from random import random
 8 | import unittest
 9 | 
10 | class TestDistanceMethods(unittest.TestCase):
11 | 
12 |     def test_deviation(self):
13 |         n = 1000000
14 |         r = [random() for x in range(n)]
15 |         m = mean(r)
16 |         map1 = {}
17 |         map2 = {}
18 |         for i, v in enumerate(r):
19 |           map1[i] = [v]
20 |           map2[i] = [m]
21 |         d1 = distance.deviation(map1, map2, 0)
22 |         d2 = pstdev(r, m)
23 |         self.assertTrue(math.isclose(d1, d2))
24 | 
25 |     def test_pair_changes(self):
26 |         map1 = {'A': [1], 'B': [2], 'C': [3], 'D': [4], 'E': [5]}
27 |         map2 = {'A': [1], 'B': [3], 'C': [2], 'D': [4], 'E': [5]}
28 |         map3 = {'A': [3], 'B': [2], 'C': [1], 'D': [4], 'E': [5]}
29 |         map4 = {'A': [5], 'B': [1], 'C': [2], 'D': [3], 'E': [4]}
30 |         self.assertEqual(distance.pair_changes(map1, map1, 0), 0)
31 |         self.assertEqual(distance.pair_changes(map1, map2, 0), 1)
32 |         self.assertEqual(distance.pair_changes(map1, map3, 0), 3)
33 |         self.assertEqual(distance.pair_changes(map1, map4, 0), 4)
34 | 
35 | if __name__ == '__main__':
36 |     unittest.main()
37 | 
38 | 


--------------------------------------------------------------------------------
/tools/jira_stats/collect_git_urls.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | import re
 6 | import requests
 7 | import sys
 8 | 
 9 | 
10 | def main():
11 |     parser = argparse.ArgumentParser(
12 |         description='Collect Apache and GitHub repo URLs of Apache projects')
13 |     parser.add_argument('-f', '--file', required=True,
14 |                         help='the output file')
15 |     args = parser.parse_args()
16 | 
17 |     if os.path.isfile(args.file):
18 |         sys.exit('Error: output file already exists!')
19 | 
20 |     out_file = open(args.file, 'w')
21 | 
22 |     apache_git = 'https://git.apache.org/'
23 | 
24 |     resp = requests.get(apache_git)
25 | 
26 |     pattern = re.compile(r'<td>(.+)</td>\s*'
27 |                          r'<td>\s*<a href="(.+)">.+</a>\s*</td>\s*'
28 |                          r'<td>\s*<a href="(https://github\.com/.+)">')
29 | 
30 |     for match in pattern.finditer(resp.text):
31 |         name = match.group(1)
32 |         apache_repo = match.group(2)
33 |         github_repo = match.group(3)
34 |         print(name, apache_repo, github_repo, sep=',', file=out_file)
35 | 
36 |     out_file.close()
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     main()
41 | 


--------------------------------------------------------------------------------
/tools/jira_stats/process_stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | import re
 6 | 
 7 | 
 8 | def find_github(name, urls):
 9 |     candidates = []
10 |     target = set(x.lower() for x in name.split() if len(x) > 1)
11 |     for item in urls:
12 |         name_set = set(x.lower() for x in item['name'].split() if len(x) > 1)
13 |         if target <= name_set:
14 |             candidates.append({
15 |                 'name': item['name'],
16 |                 'github_repo': item['github_repo']
17 |             })
18 |     return candidates
19 | 
20 | 
21 | def get_issue_stats(file_path):
22 |     issue_stats = []
23 |     with open(file_path, 'r') as stats:
24 |         for line in stats:
25 |             name, key, id, count, \
26 |                 feature, bug, improvement, maintenance, \
27 |                 high, mid, low = line.split(',')
28 |             if name == 'name' and key == 'key':
29 |                 continue
30 |             issue_stats.append({
31 |                 'name': name, 'key': key, 'id': id, 'count': count,
32 |                 'feature': feature, 'bug': bug,
33 |                 'improvement': improvement, 'maintenance': maintenance,
34 |                 'high': high, 'mid': mid, 'low': low
35 |             })
36 |     return issue_stats
37 | 
38 | 
39 | def main():
40 |     parser = argparse.ArgumentParser(
41 |         description='Select projects to produce the config file for'
42 |                     'the JIRA issue crawler')
43 |     parser.add_argument('-s', '--stats-file', required=True,
44 |                         help='the project issue stats file '
45 |                              'produced by global_stats')
46 |     parser.add_argument('-u', '--url-file', required=True,
47 |                         help='the git url file produced by collect_git_urls')
48 |     parser.add_argument('-d', '--parent-dir', required=True,
49 |                         help='the dir to contain repos')
50 |     parser.add_argument('-o', '--output-file', required=True,
51 |                         help='output file')
52 |     args = parser.parse_args()
53 | 
54 |     issue_stats = get_issue_stats(args.stats_file)
55 | 
56 |     project_urls = []
57 |     with open(args.url_file, 'r') as urls:
58 |         for line in urls:
59 |             name, apache_repo, github_repo = line.split(',')
60 |             project_urls.append({
61 |                 'name': name,
62 |                 'apache_repo': apache_repo,
63 |                 'github_repo': github_repo
64 |             })
65 | 
66 |     out_file = open(args.output_file, 'w')
67 |     empty_file = open(args.output_file + '.empty', 'w')
68 | 
69 |     re_name = re.compile(r'https://github\.com/apache/(\S+)')
70 |     for project in issue_stats:
71 |         candidates = find_github(project['name'], project_urls)
72 |         if len(candidates) == 0:
73 |             print(args.parent_dir, project['key'], 'master',
74 |                   sep='\t', file=empty_file)
75 |             continue
76 |         for candidate in candidates:
77 |             github = candidate['github_repo'].strip()
78 |             dir_name = re_name.search(github).group(1)
79 |             path = os.path.join(args.parent_dir, dir_name)
80 |             print(path, project['key'], 'master', github + '.git',
81 |                   sep='\t', file=out_file)
82 | 
83 |     empty_file.close()
84 |     out_file.close()
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/.gitignore:
--------------------------------------------------------------------------------
1 | *-issues
2 | deleted.files
3 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/README.md:
--------------------------------------------------------------------------------
 1 | ## Data set format
 2 | 
 3 | Each [project]-issues directory contains JIRA issues and GitHub pull request
 4 | (PR) comments of the project. Only issues resolved and PRs closed by commits
 5 | are included.
 6 | 
 7 | In a project directory, every file starts with the commit hash (first ten
 8 | digits) that the issue/PR is associated with. You can browse the commit via
 9 | https://github.com/[user]/[project]/commit/[hash]. E.g.,
10 | https://github.com/apache/spark/commit/b8aec6cd23.
11 | 
12 | There are two types of files.
13 | 
14 | 1. [hash]-[PROJECT]-[#].xml is an XML representation of the JIRA issue. You can
15 | browse the original issue via
16 | https://issues.apache.org/jira/browse/[PROJECT]-[#].  E.g.,
17 | https://issues.apache.org/jira/browse/SPARK-10474.
18 | 
19 | 2. [hash]-GitHub-[#].xml is an XML representation of the PR conversation. You
20 | can browse the original PR via https://github.com/[user]/[project]/pull/[#].
21 | E.g., https://github.com/apache/spark/pull/13796.
22 | 
23 | Besides, there are shadow files starting with ``.invalid.''. They can be
24 | ignored by users of this data set. Those files denote wrong information in
25 | commit messages. 
26 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/github_comments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import dicttoxml
 3 | import github3
 4 | import string
 5 | import threading
 6 | import time
 7 | import xml.etree.ElementTree as ET
 8 | 
 9 | class GitHubComments:
10 |     def __init__(self, user = None, password = None, limit_per_min=81):
11 |         self.gh = github3.login(user, password)
12 |         self._limit_per_min = limit_per_min
13 | 
14 |         self._lock = threading.Lock()
15 |         self._last_time = time.time()
16 |         self._rest = limit_per_min
17 | 
18 |     def login(self, user, password):
19 |         self.gh = github3.login(user, password)
20 | 
21 |     def get_lease(self):
22 |         with self._lock:
23 |             if self._rest > 0:
24 |                 self._rest -= 1
25 |                 return True
26 |             elif time.time() - self._last_time > 60:
27 |                 self._rest = self._limit_per_min - 1
28 |                 self._last_time = time.time()
29 |                 return True
30 |             else:
31 |                 return False
32 | 
33 |     def download(self, user, repo, num, file_path):
34 |         while not self.get_lease():
35 |             time.sleep(5)
36 |         pr = self.gh.pull_request(user, repo, num)
37 |         comments = ET.Element('comments')
38 |         for comment in pr.issue_comments():
39 |             snippet = dicttoxml.dicttoxml(comment.as_dict(),
40 |                                           attr_type=False,
41 |                                           custom_root='comment')
42 |             snippet = ''.join(x for x in snippet if x in string.printable)
43 |             comments.append(ET.fromstring(snippet))
44 |         for comment in pr.review_comments():
45 |             snippet = dicttoxml.dicttoxml(comment.as_dict(),
46 |                                           attr_type=False,
47 |                                           custom_root='comment')
48 |             snippet = ''.join(x for x in snippet if x in string.printable)
49 |             comments.append(ET.fromstring(snippet))
50 |         return ET.ElementTree(comments).write(file_path, encoding="utf-8")
51 | 
52 | def add_args(parser):
53 |     parser.add_argument('-u', '--github-user',
54 |                         help='user name of a GitHub account',
55 |                         type=str, required=True)
56 |     parser.add_argument('-p', '--github-password',
57 |                         help='password of a GitHub account',
58 |                         type=str, required=True)
59 | 
60 | def main():
61 |     parser = argparse.ArgumentParser()
62 |     add_args(parser)
63 |     args = parser.parse_args()
64 | 
65 |     ghc = GitHubComments(args.github_user, args.github_password)
66 |     ghc.download('apache', 'spark', 8060, '8060.xml')
67 |     ghc.download('apache', 'spark', 8069, '8069.xml')
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/jira_issue.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | import urllib
 5 | 
 6 | _URL_PREFIX_XML = "https://issues.apache.org/jira/si/jira.issueviews:issue-xml/"
 7 | _URL_SUFFIX_XML = ".xml"
 8 | 
 9 | class JiraIssue:
10 |     def __init__(self,
11 |                  url_prefix=_URL_PREFIX_XML,
12 |                  url_suffix=_URL_SUFFIX_XML):
13 |         self.url_prefix = url_prefix
14 |         self.url_suffix = url_suffix
15 | 
16 |     def download(self, issue_id, dir_path, file_name):
17 |         url = self.url_prefix + issue_id + "/" + issue_id + self.url_suffix
18 |         file_path = os.path.join(dir_path, file_name)
19 |         invalid_path = os.path.join(dir_path, ".invalid." + file_name)
20 |         if os.path.isfile(file_path) or os.path.isfile(invalid_path):
21 |             return
22 |         for i in range(3):
23 |             try:
24 |                 print urllib.urlretrieve(url, file_path)[0]
25 |                 with open(file_path, 'r') as downloaded:
26 |                     if "<h1>Oops, you&#39;ve found a dead link.</h1>" in \
27 |                             downloaded.read():
28 |                         os.rename(file_path, invalid_path)
29 |                         print "Invalid issue ID:", invalid_path
30 |                 break
31 |             except Exception as e:
32 |                 if i == 2:
33 |                     print "[Error] JiraIssue.download: ", type(e), e
34 |                 else:
35 |                     time.sleep(10)
36 | 
37 | if __name__ == "__main__":
38 |     if len(sys.argv) != 3:
39 |         print sys.argv[0] + " ISSUE_ID FILE_PATH"
40 |         sys.exit(1)
41 |     jira_issue = JiraIssue()
42 |     jira_issue.download(sys.argv[1], sys.argv[2]);
43 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/repo.config:
--------------------------------------------------------------------------------
 1 | ../../repos/hbase	HBASE	rel/1.3.1	https://github.com/apache/hbase.git
 2 | ../../repos/spark	SPARK	v2.1.1	https://github.com/apache/spark.git
 3 | ../../repos/zookeeper	ZOOKEEPER	release-3.5.3	https://github.com/apache/zookeeper.git
 4 | ../../repos/incubator-systemml	SYSTEMML	v0.14.0-incubating-rc4	https://github.com/apache/incubator-systemml.git
 5 | ../../repos/maven	MNG	maven-3.5.0	https://github.com/apache/maven.git
 6 | ../../repos/cassandra	CASSANDRA	cassandra-3.11.0  https://github.com/apache/cassandra.git
 7 | ../../repos/couchdb	COUCHDB	2.0.0	https://github.com/apache/couchdb.git
 8 | ../../repos/hive	HIVE	release-2.3.0-rc0	https://github.com/apache/hive.git
 9 | ../../repos/activemq	AMQ	activemq-5.15.0	https://github.com/apache/activemq.git
10 | ../../repos/beam	BEAM	v2.0.0	https://github.com/apache/beam.git
11 | ../../repos/cloudstack	CLOUDSTACK	4.9.2.0	https://github.com/apache/cloudstack.git
12 | ../../repos/ambari	AMBARI	release-2.5.1	https://github.com/apache/ambari.git
13 | ../../repos/geode	GEODE	rel/v1.1.1	https://github.com/apache/geode.git
14 | ../../repos/jackrabbit	JCR	jackrabbit-2.15.4	https://github.com/apache/jackrabbit.git
15 | ../../repos/airavata	AIRAVATA	airavata-0.16	https://github.com/apache/airavata.git
16 | ../../repos/ant-ivy	IVY	2.4.0	https://github.com/apache/ant-ivy.git
17 | ../../repos/archiva	MRM	archiva-2.2.3	https://github.com/apache/archiva.git
18 | ../../repos/arrow	ARROW	apache-arrow-0.4.1	https://github.com/apache/arrow.git
19 | ../../repos/avro	AVRO	release-1.8.2	https://github.com/apache/avro.git
20 | ../../repos/buildr	BUILDR	1.4.25	https://github.com/apache/buildr.git
21 | ../../repos/camel	CAMEL	camel-2.19.1	https://github.com/apache/camel.git
22 | 


--------------------------------------------------------------------------------
/tools/repo_crawler/setup.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | sudo apt install -y python
 4 | sudo apt install -y python-pip
 5 | sudo pip install --upgrade pip
 6 | sudo pip install sh
 7 | 
 8 | sudo apt install -y libssl-dev
 9 | sudo pip install --pre github3.py
10 | sudo pip install dicttoxml
11 | 


--------------------------------------------------------------------------------
/tools/repo_creater/README.md:
--------------------------------------------------------------------------------
 1 | # Repo Creater Tool
 2 | 
 3 | **Goal**: To be able to quickly create fake development history for test purpose 
 4 |     
 5 | # Workflow
 6 | 1. `cd test`  and `mkdir <name_of_this_test>`
 7 | 2. For each commit in the fake history, `mkdir <commit_dir>`
 8 | 3. Add source files for each commit
 9 | 4. Write commit graph to `cg.dot` file, see `test/test_feature_branch/cg.dot` for an example. You can also plot it out for inspection with `dot -Tpng cg.dot -o cg.png`
10 | 5. Run repo_creater tool
11 | ```
12 | cd tools/repo_creater
13 | ./create_repo.py ../../test/<name_of_this_test>
14 | ```
15 | The newly created repo has the same name and will be under `repos/` folder.
16 | 
17 | 6. Examine repo history
18 | ```	
19 | cd repos/<name_of_this_test>
20 | git log --graph
21 | # alternatively, to see only master
22 | git log --first-parent
23 | ```
24 | 
25 | # Assumptions
26 | - Merge only happens on master branch
27 | - No merge conflicts resolved manually.
28 | - All files dwell directly under `<commit_dir>`  (not in some subfolders)
29 | 
30 | 


--------------------------------------------------------------------------------
/tools/repo_stats/setup_ubuntu.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | sudo apt install -y python3 python3-pip
4 | sudo -H pip3 install --upgrade pip
5 | sudo -H pip3 install sh
6 | 


--------------------------------------------------------------------------------
/tools/repo_stats/stats_author.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | import subprocess
 7 | import sys
 8 | 
 9 | 
10 | def stats_commits(repo_path, branch, author_stats=None):
11 |     if not author_stats:
12 |         author_stats = { }
13 |     git_cmd = ['git', '-C', repo_path, 'checkout', branch]
14 |     subprocess.check_output(git_cmd)
15 |     git_cmd = ['git', '--no-pager', '-C', repo_path, 'shortlog', '-sn']
16 |     p = subprocess.Popen(git_cmd, stdout=subprocess.PIPE)
17 |     with os.fdopen(os.dup(p.stdout.fileno())) as commits_per_author:
18 |         for line in commits_per_author:
19 |             num, name = [s.strip() for s in line.split('\t')]
20 |             if name not in author_stats:
21 |                 author_stats[name] = {'n_commits': int(num)}
22 |             else:
23 |                 author_stats[name]['n_commits'] = int(num)
24 |     return author_stats
25 | 
26 | 
27 | def main():
28 |     parser = argparse.ArgumentParser(
29 |         description='List author stats of git repo(s)')
30 |     parser.add_argument('-c', '--count-commits', metavar='DIR',
31 |                         help='Git repo dir to list authors and their # commits')
32 |     parser.add_argument('-b', '--branch', default='master',
33 |                         help='Branch of the repo to analyze')
34 |     parser.add_argument('-a', '--count-authors', metavar='DIR', nargs='+',
35 |                         help='Multiple git repos to list their # authors')
36 |     args = parser.parse_args()
37 |     if args.count_commits:
38 |         if not os.path.isdir(args.count_commits):
39 |             sys.exit('Error: ' + args.dir + ' is not a valid dir!')
40 |         author_stats = stats_commits(args.count_commits, args.branch)
41 |         for name, stats in sorted(author_stats.items(),
42 |                                   key=lambda x: x[1]['n_commits'],
43 |                                   reverse=True):
44 |             print(name, stats['n_commits'], sep=',')
45 |     elif args.count_authors:
46 |         project_authors = { }
47 |         for d in args.count_authors:
48 |             if os.path.isfile(d) or d.startswith('.'):
49 |                 continue
50 |             repo_name = os.path.basename(os.path.normpath(d))
51 |             print('Parsing ' + repo_name)
52 |             project_authors[repo_name] = stats_commits(d, args.branch)
53 |         for repo_name, author_stats in sorted(project_authors.items(),
54 |                                               key=lambda x: len(x[1]),
55 |                                               reverse=True):
56 |             print(repo_name, len(author_stats), sep=',')
57 |     else:
58 |         sys.exit('Error: see -h for usage.')
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 
64 | 


--------------------------------------------------------------------------------
/tools/repo_stats/stats_pr.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | import re
 7 | import sys
 8 | 
 9 | from sh.contrib import git
10 | from sh import wc
11 | 
12 | 
13 | def jira_issue(commit_message, key):
14 |     if key is None:
15 |         return []
16 |     matches = re.findall(key + "-\d+(?!\d*.\d+)", commit_message, re.IGNORECASE)
17 |     return [m.upper() for m in matches]
18 | 
19 | 
20 | def parse_pr(commit_message):
21 |     matches = re.findall("(?:close[ds]*|"
22 |                          "pull\s*request|"
23 |                          "fix(?:e[ds])?|"
24 |                          "merge[ds]*)"
25 |                          "\s*#\d+",
26 |                          commit_message, re.IGNORECASE)
27 |     return [m.split('#')[-1] for m in matches]
28 | 
29 | 
30 | def num_commits(repo_dir):
31 |     git_repo = git.bake('-C', os.path.expanduser(repo_dir))
32 |     logs = git_repo.log('--oneline', '--first-parent')
33 |     n = wc(logs, '-l')
34 |     return int(n)
35 | 
36 | 
37 | def stats_pr(repo_dir, key, begin, end):
38 |     """Lists the number of PR/issue-based commits in the range
39 |     """
40 |     git_repo = git.bake('-C', os.path.expanduser(repo_dir))
41 |     num = 0
42 |     prs = []
43 |     for i in range(begin, end):
44 |         message = str(git_repo.log('--first-parent', '-1', 'HEAD~' + str(i)))
45 |         pi = []
46 |         pi += jira_issue(message, key)
47 |         pi += parse_pr(message)
48 |         if pi:
49 |             num += 1
50 |             prs += pi
51 |     return num, prs
52 | 
53 | 
54 | def main():
55 |     parser = argparse.ArgumentParser(
56 |         description='Stats commits through pull requests/issues')
57 |     parser.add_argument('-n', '--num-groups', type=int, required=True,
58 |                         help='number of groups of commits in stats')
59 |     parser.add_argument('-d', '--dir', required=True,
60 |                         help='dir of the git repo')
61 |     parser.add_argument('-k', '--key', help='key of JIRA issue')
62 |     parser.add_argument('-t', '--tag', help='tag to check out of the repo')
63 |     parser.add_argument('-m', '--max', type=int,
64 |                         help='max number of commits to process')
65 |     args = parser.parse_args()
66 | 
67 |     if not os.path.isdir(args.dir):
68 |         sys.exit('Error: ' + args.dir + ' is not a valid dir!')
69 | 
70 |     if args.tag:
71 |         git_repo = git.bake('-C', os.path.expanduser(args.dir))
72 |         git_repo.checkout(args.tag)
73 | 
74 |     print(os.path.basename(os.path.normpath(args.dir)))
75 |     n = num_commits(args.dir)
76 |     if args.max < n:
77 |         n = args.max
78 |     n //= args.num_groups
79 |     for i in reversed(range(args.num_groups)):
80 |         np, prs = stats_pr(args.dir, args.key, i * n, (i + 1) * n)
81 |         print(np / n, end=',')
82 |         print('"{0}"'.format(','.join(prs)))
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     main()
87 | 


--------------------------------------------------------------------------------
/tools/repo_stats/stats_pr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | N=100
 4 | M=1000000
 5 | 
 6 | DIR=results
 7 | 
 8 | mkdir -p $DIR
 9 | 
10 | ./stats_pr.py -n $N -d repos/hbase -t rel/1.3.0 -k HBASE -m $M > $DIR/hbase.pr.csv &
11 | ./stats_pr.py -n $N -d repos/spark -t v2.1.0 -k SPARK -m $M > $DIR/spark.pr.csv &
12 | ./stats_pr.py -n $N -d repos/zookeeper -t release-3.4.9 -k ZOOKEEPER	-m $M > $DIR/zookeeper.pr.csv &
13 | ./stats_pr.py -n $N -d repos/incubator-systemml -t v0.14.0-incubating-rc4 -k SYSTEMML -m $M > $DIR/systemml.pr.csv &
14 | ./stats_pr.py -n $N -d repos/maven -t maven-3.3.9 -k MNG -m $M > $DIR/maven.pr.csv &
15 | ./stats_pr.py -n $N -d repos/cassandra -t cassandra-3.10 -k CASSANDRA -m $M > $DIR/cassandra.pr.csv &
16 | ./stats_pr.py -n $N -d repos/couchdb -t 2.0.0 -k COUCHDB -m $M > $DIR/couchdb.pr.csv &
17 | ./stats_pr.py -n $N -d repos/hive -t rel/release-2.1.1 -k HIVE -m $M > $DIR/hive.pr.csv &
18 | ./stats_pr.py -n $N -d repos/rails -t v5.1.1 -m $M > $DIR/rails.pr.csv &
19 | ./stats_pr.py -n $N -d repos/opencv -t 3.2.0 -m $M > $DIR/opencv.pr.csv & 
20 | ./stats_pr.py -n $N -d repos/tensorflow -t v1.1.0 -m $M > $DIR/tensorflow.pr.csv &
21 | ./stats_pr.py -n $N -d repos/vagrant -t v1.9.4 -m $M > $DIR/vagrant.pr.csv &
22 | ./stats_pr.py -n $N -d repos/jekyll -t v3.4.3 -m $M > $DIR/jekyll.pr.csv &
23 | ./stats_pr.py -n $N -d repos/discourse -t v1.7.8 -m $M > $DIR/discourse.pr.csv &
24 | 
25 | for pid in $(jobs -p)
26 | do
27 |   wait $pid
28 | done
29 | 
30 | 


--------------------------------------------------------------------------------
/tools/repo_stats/test/couchdb.pr.csv:
--------------------------------------------------------------------------------
  1 | couchdb
  2 | 0.0,""
  3 | 0.2,"COUCHDB-1911,COUCHDB-1853"
  4 | 0.1,"COUCHDB-1922"
  5 | 0.0,""
  6 | 0.0,""
  7 | 0.1,"COUCHDB-1923"
  8 | 0.3,"COUCHDB-1647,COUCHDB-1921,COUCHDB-1921"
  9 | 0.1,"COUCHDB-1911"
 10 | 0.1,"COUCHDB-1668"
 11 | 0.0,""
 12 | 0.2,"COUCHDB-1986,COUCHDB-1795,COUCHDB-1962"
 13 | 0.0,""
 14 | 0.0,""
 15 | 0.0,""
 16 | 0.0,""
 17 | 0.0,""
 18 | 0.0,""
 19 | 0.0,""
 20 | 0.1,"COUCHDB-2031"
 21 | 0.0,""
 22 | 0.2,"COUCHDB-2040,COUCHDB-2028"
 23 | 0.1,"COUCHDB-2054"
 24 | 0.1,"COUCHDB-1474"
 25 | 0.1,"COUCHDB-2086"
 26 | 0.1,"COUCHDB-2067"
 27 | 0.6,"COUCHDB-2189,170,COUCHDB-1076,COUCHDB-2187,COUCHDB-2170,COUCHDB-2123"
 28 | 0.3,"COUCHDB-2196,COUCHDB-1180,COUCHDB-1036,COUCHDB-1180,169"
 29 | 0.5,"184,183,COUCHDB-2110,COUCHDB-2166,COUCHDB-2201"
 30 | 0.3,"COUCHDB-2169,186,185"
 31 | 0.2,"COUCHDB-2209,190"
 32 | 0.0,""
 33 | 0.1,"COUCHDB-2104"
 34 | 0.1,"COUCHDB-1986"
 35 | 0.0,""
 36 | 0.3,"COUCHDB-1697,200,COUCHDB-2206"
 37 | 0.1,"211"
 38 | 0.2,"COUCHDB-2136,COUCHDB-2220,COUCHDB-1669"
 39 | 0.0,""
 40 | 0.2,"COUCHDB-2233,COUCHDB-2200"
 41 | 0.1,"COUCHDB-2158"
 42 | 0.0,""
 43 | 0.4,"COUCHDB-2222,COUCHDB-2153,COUCHDB-2248,COUCHDB-2249"
 44 | 0.1,"COUCHDB-2238"
 45 | 0.1,"250"
 46 | 0.0,""
 47 | 0.0,""
 48 | 0.1,"COUCHDB-2026"
 49 | 0.2,"COUCHDB-1133,COUCHDB-1133"
 50 | 0.3,"COUCHDB-1986,COUCHDB-2324,268"
 51 | 0.1,"274"
 52 | 0.5,"COUCHDB-1432,COUCHDB-2430,260,276,COUCHDB-708"
 53 | 0.1,"COUCHDB-2362"
 54 | 0.3,"293,295,277"
 55 | 0.3,"COUCHDB-1145,291,COUCHDB-2557,296"
 56 | 0.2,"272,COUCHDB-2619,306"
 57 | 0.1,"COUCHDB-2684"
 58 | 0.1,"COUCHDB-2237"
 59 | 0.0,""
 60 | 0.0,""
 61 | 0.0,""
 62 | 0.0,""
 63 | 0.1,"COUCHDB-2762"
 64 | 0.0,""
 65 | 0.1,"COUCHDB-2797"
 66 | 0.1,"COUCHDB-2824"
 67 | 0.1,"COUCHDB-2824"
 68 | 0.0,""
 69 | 0.0,""
 70 | 0.2,"359,COUCHDB-2844"
 71 | 0.0,""
 72 | 0.1,"COUCHDB-2511"
 73 | 0.2,"COUCHDB-2775,COUCHDB-2775"
 74 | 0.0,""
 75 | 0.2,"COUCHDB-2409,COUCHDB-2674,COUCHDB-2850,COUCHDB-2858,COUCHDB-2666"
 76 | 0.5,"COUCHDB-2879,COUCHDB-1447,COUCHDB-2835,COUCHDB-2534,COUCHDB-2859"
 77 | 0.2,"COUCHDB-2905,COUCHDB-2897,COUCHDB-2898"
 78 | 0.0,""
 79 | 0.3,"COUCHDB-2874,COUCHDB-2082,COUCHDB-2082"
 80 | 0.0,""
 81 | 0.2,"COUCHDB-2938,COUCHDB-2866,COUCHDB-2872"
 82 | 0.1,"COUCHDB-2938"
 83 | 0.6,"COUCHDB-2968,COUCHDB-2965,COUCHDB-2963,COUCHDB-2959,390,386"
 84 | 0.6,"COUCHDB-2978,COUCHDB-2978,COUCHDB-2900,397,396,393,391"
 85 | 0.2,"408,COUCHDB-2988"
 86 | 0.0,""
 87 | 0.0,""
 88 | 0.4,"COUCHDB-3016,COUCHDB-3016,COUCHDB-3016,COUCHDB-2990"
 89 | 0.5,"COUCHDB-3028,420,421,419,416"
 90 | 0.4,"COUCHDB-3054,428,426,COUCHDB-3039"
 91 | 0.0,""
 92 | 0.1,"COUCHDB-3060"
 93 | 0.1,"COUCHDB-3066"
 94 | 0.1,"COUCHDB-3070"
 95 | 0.6,"COUCHDB-3096,COUCHDB-3089,COUCHDB-3092,COUCHDB-3084,COUCHDB-3084,COUCHDB-3082"
 96 | 0.4,"COUCHDB-3060,COUCHDB-3104,COUCHDB-3104,COUCHDB-3104,COUCHDB-2779,COUCHDB-3097,COUCHDB-3099"
 97 | 0.3,"COUCHDB-3102,COUCHDB-3017,438"
 98 | 0.5,"COUCHDB-3135,COUCHDB-3122,COUCHDB-3134,COUCHDB-3136,COUCHDB-3121,COUCHDB-3114,COUCHDB-3118"
 99 | 0.1,"COUCHDB-3132"
100 | 0.0,""
101 | 0.1,"COUCHDB-3143"
102 | 


--------------------------------------------------------------------------------
/tools/repo_stats/test/discourse.pr.csv:
--------------------------------------------------------------------------------
  1 | discourse
  2 | 0.1,"2547"
  3 | 0.0,""
  4 | 0.1,"2549"
  5 | 0.3,"2552,2551,2550"
  6 | 0.5,"2546,2530,2537,2553,2554"
  7 | 0.2,"2532,2555"
  8 | 0.1,"2556"
  9 | 0.1,"2557"
 10 | 0.0,""
 11 | 0.0,""
 12 | 0.1,"2560"
 13 | 0.2,"2563,2561"
 14 | 0.3,"2572,2571,2564"
 15 | 0.6,"2568,2566,2565,2548,2575,2573"
 16 | 0.3,"2578,2569,2567"
 17 | 0.1,"2580"
 18 | 0.0,""
 19 | 0.4,"2584,2583,2581,2582"
 20 | 0.2,"2587,2586"
 21 | 0.2,"2589,2588"
 22 | 0.5,"2592,2595,2590,2596,2597"
 23 | 0.0,""
 24 | 0.2,"2585,2599"
 25 | 0.3,"2600,2602,2601"
 26 | 0.1,"2603"
 27 | 0.0,""
 28 | 0.3,"2591,2604,2606"
 29 | 0.2,"2609,2608"
 30 | 0.1,"2607"
 31 | 0.0,""
 32 | 0.1,"2612"
 33 | 0.1,"2613"
 34 | 0.2,"2614,2615"
 35 | 0.0,""
 36 | 0.0,""
 37 | 0.4,"2617,2620,2618,2619"
 38 | 0.2,"2622,2623"
 39 | 0.3,"2625,2624,2627"
 40 | 0.1,"2628"
 41 | 0.0,""
 42 | 0.1,"2632"
 43 | 0.0,""
 44 | 0.0,""
 45 | 0.1,"2634"
 46 | 0.0,""
 47 | 0.1,"2629"
 48 | 0.0,""
 49 | 0.3,"2638,2643,2642"
 50 | 0.1,"2644"
 51 | 0.1,"2646"
 52 | 0.0,""
 53 | 0.3,"2649,2636,2648"
 54 | 0.2,"2652,2651"
 55 | 0.1,"2654"
 56 | 0.2,"2655,2656"
 57 | 0.0,""
 58 | 0.2,"2662,2661"
 59 | 0.2,"2663,2645"
 60 | 0.1,"2658"
 61 | 0.1,"2665"
 62 | 0.5,"2667,2659,2650,2666,2511"
 63 | 0.0,""
 64 | 0.0,""
 65 | 0.0,""
 66 | 0.2,"2671,2669"
 67 | 0.1,"2672"
 68 | 0.0,""
 69 | 0.0,""
 70 | 0.2,"2676,2681"
 71 | 0.5,"2682,2647,2675,2633,2670"
 72 | 0.2,"2678,2683"
 73 | 0.1,"2684"
 74 | 0.2,"2690,2688"
 75 | 0.2,"2677,2689"
 76 | 0.3,"2693,2691,2692"
 77 | 0.3,"2686,2695,2694"
 78 | 0.5,"2700,2685,2696,2698,2699"
 79 | 0.0,""
 80 | 0.0,""
 81 | 0.0,""
 82 | 0.1,"3192"
 83 | 0.0,""
 84 | 0.0,""
 85 | 0.0,""
 86 | 0.0,""
 87 | 0.0,""
 88 | 0.0,""
 89 | 0.0,""
 90 | 0.0,""
 91 | 0.0,""
 92 | 0.0,""
 93 | 0.0,""
 94 | 0.0,""
 95 | 0.0,""
 96 | 0.0,""
 97 | 0.0,""
 98 | 0.0,""
 99 | 0.0,""
100 | 0.0,""
101 | 0.0,""
102 | 


--------------------------------------------------------------------------------
/tools/repo_stats/test/jekyll.pr.csv:
--------------------------------------------------------------------------------
  1 | jekyll
  2 | 0.5,"4410,4429,4424,4423,4404"
  3 | 0.5,"4452,4428,4437,4436,4434"
  4 | 0.7,"4460,4461,4459,4465,4463,4464,4455"
  5 | 0.6,"4374,4496,4487,4485,4484,4473"
  6 | 0.3,"4505,4502,4492"
  7 | 0.6,"4522,4526,4525,4512,4514,4517"
  8 | 0.3,"4546,4543,4535"
  9 | 0.2,"4553,4547"
 10 | 0.8,"4591,4592,4594,4566,4559,4561,4562,4554"
 11 | 0.5,"4545,4606,4597,4583,4589"
 12 | 0.5,"4557,4602,4611,4599,4381"
 13 | 0.5,"4620,4621,4618,4598,4590"
 14 | 0.5,"4635,4630,4637,4633,4601"
 15 | 0.4,"4639,4636,4558,4641"
 16 | 0.5,"4645,4646,4596,4628,4555"
 17 | 0.6,"4658,4659,4660,4647,4653,4652"
 18 | 0.3,"4685,4682,4670"
 19 | 0.5,"4700,4694,4699,4686,4491"
 20 | 0.5,"4704,4706,4542,4533,4474"
 21 | 0.5,"4703,4712,4640,3849,4624"
 22 | 0.6,"4755,4750,4751,4717,4537,4720"
 23 | 0.6,"4756,4760,4741,4763,4758,4759"
 24 | 0.4,"4769,4771,4775,4781"
 25 | 0.4,"4789,4734,4478,4689"
 26 | 0.4,"4804,4754,4813,4786"
 27 | 0.6,"4808,4595,4819,4792,4793,4799"
 28 | 0.4,"4854,4847,4844,4710"
 29 | 0.6,"4863,4872,4874,4867,4857,4855"
 30 | 0.5,"4849,4887,4886,4839,4859"
 31 | 0.5,"4888,4881,4892,4510,4890"
 32 | 0.3,"4848,4903,4902"
 33 | 0.1,"4916"
 34 | 0.4,"4947,4950,4951,4949"
 35 | 0.4,"4948,4931,4933,4934"
 36 | 0.3,"4974,4958,4971"
 37 | 0.5,"4959,4956,4953,4978,4975"
 38 | 0.5,"4980,4976,4966,4977,4962"
 39 | 0.6,"4989,4973,4940,4987,4985,4979"
 40 | 0.4,"5006,4908,5000,5005"
 41 | 0.5,"5009,4917,5012,4993,5010"
 42 | 0.4,"4922,5018,5017,5014"
 43 | 0.4,"5025,5026,5027,5019"
 44 | 0.4,"5030,5031,5032,5024"
 45 | 0.5,"5056,5058,5054,5043,5015"
 46 | 0.5,"5067,5096,5068,5063,5065"
 47 | 0.4,"5100,5097,5069,5098"
 48 | 0.5,"5060,5101,5053,5042,5011"
 49 | 0.5,"5112,5114,5117,5022,5106"
 50 | 0.5,"5124,5119,5115,5113,5116"
 51 | 0.4,"5109,5118,5122,5123"
 52 | 0.5,"5135,4860,5127,5129,5131"
 53 | 0.3,"5138,5139,5137"
 54 | 0.3,"5150,5140,5141"
 55 | 0.5,"5177,5173,5152,5158,5143"
 56 | 0.5,"5178,5168,5164,5154,5156"
 57 | 0.5,"5194,5180,5185,5187,5188"
 58 | 0.4,"5205,5183,5196,5190"
 59 | 0.3,"5226,5214,5167"
 60 | 0.5,"5244,5254,5239,5221,5222"
 61 | 0.5,"5272,5274,5271,5258,5249"
 62 | 0.5,"5291,5286,5287,5273,5279"
 63 | 0.5,"5240,5293,5281,5294,5262"
 64 | 0.5,"5189,5045,5295,5304,5280"
 65 | 0.5,"5318,5334,5320,5308,5224"
 66 | 0.4,"5337,5316,5335,5235"
 67 | 0.5,"5372,5361,5199,5347,5326"
 68 | 0.5,"5237,5369,5364,5381,5383"
 69 | 0.4,"5389,5375,5376,5380"
 70 | 0.5,"5403,5402,5399,5338,5397"
 71 | 0.6,"5413,5416,5325,5421,5420,5405"
 72 | 0.4,"5428,5157,5210,5408"
 73 | 0.5,"5411,5410,5427,5430,5412"
 74 | 0.5,"5212,5256,5431,5441,5264"
 75 | 0.4,"5456,5433,5452,4873"
 76 | 0.4,"5472,5471,5449,5442"
 77 | 0.5,"5497,5494,5479,5489,5491"
 78 | 0.5,"5504,5502,5495,5496,5492"
 79 | 0.5,"5519,5512,5457,5505,5511"
 80 | 0.5,"5538,5532,5529,5459,5466"
 81 | 0.4,"5536,5539,5540,5533"
 82 | 0.4,"5546,5564,5524,5557"
 83 | 0.1,"5526"
 84 | 0.5,"5572,5571,5464,5570,5559"
 85 | 0.4,"5597,5435,5592,5582"
 86 | 0.5,"5573,5587,5600,5608,5605"
 87 | 0.5,"5614,5611,5530,5609,5384"
 88 | 0.5,"5513,5612,5643,5641,5632"
 89 | 0.5,"5655,5658,5657,5653,5652"
 90 | 0.4,"5671,5670,5668,5666"
 91 | 0.5,"5712,5711,5683,5705,5689"
 92 | 0.5,"5726,5720,5688,5713,5693"
 93 | 0.5,"5740,5738,5696,5692,5544"
 94 | 0.5,"5746,5748,5725,5745,5473"
 95 | 0.5,"5758,5761,5744,5752,5621"
 96 | 0.5,"5768,5769,5765,5764,5750"
 97 | 0.5,"5782,5731,5691,5767,5779"
 98 | 0.5,"5780,5791,5784,5312,5781"
 99 | 0.3,"5640,5542,5753"
100 | 0.0,""
101 | 0.3,"5968,5940,5924"
102 | 


--------------------------------------------------------------------------------
/tools/repo_stats/test/stats_pr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | N=100
 4 | M=1000
 5 | 
 6 | ../stats_pr.py -n $N -d ../repos/hbase -t rel/1.3.0 -k HBASE -m $M > hbase.pr.csv &
 7 | ../stats_pr.py -n $N -d ../repos/spark -t v2.1.0 -k SPARK -m $M > spark.pr.csv &
 8 | ../stats_pr.py -n $N -d ../repos/zookeeper -t release-3.4.9 -k ZOOKEEPER	-m $M > zookeeper.pr.csv &
 9 | ../stats_pr.py -n $N -d ../repos/incubator-systemml -t v0.14.0-incubating-rc4 -k SYSTEMML -m $M > systemml.pr.csv &
10 | ../stats_pr.py -n $N -d ../repos/maven -t maven-3.3.9 -k MNG -m $M > maven.pr.csv &
11 | ../stats_pr.py -n $N -d ../repos/cassandra -t cassandra-3.10 -k CASSANDRA -m $M > cassandra.pr.csv &
12 | ../stats_pr.py -n $N -d ../repos/couchdb -t 2.0.0 -k COUCHDB -m $M > couchdb.pr.csv &
13 | ../stats_pr.py -n $N -d ../repos/hive -t rel/release-2.1.1 -k HIVE -m $M > hive.pr.csv &
14 | ../stats_pr.py -n $N -d ../repos/rails -t v5.1.1 -m $M > rails.pr.csv &
15 | ../stats_pr.py -n $N -d ../repos/opencv -t 3.2.0 -m $M > opencv.pr.csv & 
16 | ../stats_pr.py -n $N -d ../repos/tensorflow -t v1.1.0 -m $M > tensorflow.pr.csv &
17 | ../stats_pr.py -n $N -d ../repos/vagrant -t v1.9.4 -m $M > vagrant.pr.csv &
18 | ../stats_pr.py -n $N -d ../repos/jekyll -t v3.4.3 -m $M > jekyll.pr.csv &
19 | ../stats_pr.py -n $N -d ../repos/discourse -t v1.7.8 -m $M > discourse.pr.csv &
20 | 
21 | for pid in $(jobs -p)
22 | do
23 |   wait $pid
24 | done
25 | 
26 | 


--------------------------------------------------------------------------------
/tools/repo_stats/test/vagrant.pr.csv:
--------------------------------------------------------------------------------
  1 | vagrant
  2 | 0.4,"6502,5986,5981,6534"
  3 | 0.3,"5991,5993,4738"
  4 | 0.6,"6150,6149,6073,6071,6050,5999"
  5 | 0.6,"6195,6185,6160,6172,6157,6156"
  6 | 0.6,"6259,6254,6232,6219,6213,6203"
  7 | 0.6,"6364,6322,6318,6307,6305,6288"
  8 | 0.6,"6386,6383,6373,6444,6404,6367"
  9 | 0.7,"6488,6479,6475,6474,6389,6521,6407"
 10 | 0.6,"6515,6536,6535,6493,6382,6489"
 11 | 0.5,"6049,6538,6539,6537,6406"
 12 | 0.2,"6540,6541"
 13 | 0.2,"6543,6542"
 14 | 0.6,"6553,6551,6550,6548,6520,6545,6544"
 15 | 0.5,"6557,6556,6555,6554,6552"
 16 | 0.3,"4473,6560,6559"
 17 | 0.4,"6565,6563,6562,6561"
 18 | 0.2,"6568,6564"
 19 | 0.0,""
 20 | 0.3,"6581,6567,6575"
 21 | 0.4,"6584,6582,6583,6118"
 22 | 0.5,"6590,6589,6588,6587,6585"
 23 | 0.5,"6601,5086,6599,6591,6597"
 24 | 0.1,"6606"
 25 | 0.4,"6649,6636,6650,6643,3539"
 26 | 0.7,"6603,6612,6659,6664,6661,6288,6652"
 27 | 0.6,"6681,6691,6675,6671,6671,6662"
 28 | 0.5,"6722,6728,6714,6711,6700"
 29 | 0.3,"6731,6706,6718"
 30 | 0.3,"6756,6753,6740"
 31 | 0.7,"6800,6795,6844,6833,6816,6774,6771"
 32 | 0.6,"6932,6806,6926,6891,6763,6874"
 33 | 0.7,"6912,6963,6962,6952,6948,6950,6923"
 34 | 0.5,"6909,6899,6922,6760,6848"
 35 | 0.5,"6602,6749,6897,6867,6893"
 36 | 0.5,"6977,6969,6610,6843,6805"
 37 | 0.7,"7024,7026,7016,7001,6991,6981,6983"
 38 | 0.6,"7086,7085,7081,7078,7056,7041"
 39 | 0.4,"7101,7107,6879,7093"
 40 | 0.6,"7151,7050,7123,7120,7122,7121,7104"
 41 | 0.6,"7203,7191,7159,7162,7154,7153"
 42 | 0.6,"7219,7216,7215,7158,7184,7204"
 43 | 0.5,"7251,7103,7239,7090,7223"
 44 | 0.4,"7327,7110,7299,7283"
 45 | 0.7,"7059,7352,7349,7347,7339,7334,7108"
 46 | 0.3,"7290,7298,7353"
 47 | 0.4,"7308,7355,7354,5670"
 48 | 0.5,"7126,7179,7356,7287,7293"
 49 | 0.4,"7363,7360,7358,7009"
 50 | 0.6,"7359,7370,7369,7366,7365,7364"
 51 | 0.5,"7379,7382,7377,7376,7276"
 52 | 0.4,"7270,7387,7395,7393"
 53 | 0.4,"6838,7207,7396,7372"
 54 | 0.4,"7190,7409,7406,7400"
 55 | 0.3,"7269,7419,7418"
 56 | 0.4,"7460,7453,7432,7428"
 57 | 0.4,"7478,7477,7467,7456"
 58 | 0.4,"7484,7483,7481,7480"
 59 | 0.5,"7505,7499,7487,7492,7491"
 60 | 0.5,"7550,7587,7605,7589,7574"
 61 | 0.4,"7569,7568,7571,7012,7524"
 62 | 0.2,"7630,7611"
 63 | 0.6,"6765,7650,7647,7643,7639,7632"
 64 | 0.7,"7705,7676,7703,7690,7701,7698,7684"
 65 | 0.4,"7725,7720,7675,7623"
 66 | 0.6,"7752,7798,7781,7724,7740,7726"
 67 | 0.5,"7712,7489,7778,7758,7819,7830"
 68 | 0.5,"7813,7751,7831,7848,7802"
 69 | 0.5,"7877,7873,7679,7674,7688"
 70 | 0.5,"7881,7536,7866,7874,7756"
 71 | 0.3,"7889,7887,7818"
 72 | 0.2,"7929,7907"
 73 | 0.6,"7931,7719,7928,7922,7921,7926"
 74 | 0.3,"7947,7944,7943"
 75 | 0.5,"7986,7793,7976,7980,7978"
 76 | 0.5,"7897,7854,8000,7985,7989"
 77 | 0.4,"8011,8008,7918,7879"
 78 | 0.8,"8032,8033,8009,7896,7998,8027,8031,8028"
 79 | 0.4,"8071,8070,8062,8041"
 80 | 0.5,"8051,8052,8066,8068,8079"
 81 | 0.5,"8106,8098,7867,8094,8087"
 82 | 0.5,"8148,8102,8146,8092,8160"
 83 | 0.8,"8198,8100,8246,8205,8233,8109,8119,8143"
 84 | 0.4,"8192,8176,8191,8195"
 85 | 0.5,"8270,8165,8272,8252,8248"
 86 | 0.6,"8237,8219,8283,8167,8296,8273"
 87 | 0.5,"8302,8291,8300,8194,8196"
 88 | 0.2,"7035,8314"
 89 | 0.6,"8073,7967,8089,8326,8334,7956"
 90 | 0.4,"8337,8341,8327,8122"
 91 | 0.5,"8353,8308,8364,8350,8344"
 92 | 0.3,"8390,8366,8325"
 93 | 0.3,"8385,8379,8336"
 94 | 0.5,"8264,8401,8400,8399,8393"
 95 | 0.2,"8416,8410"
 96 | 0.5,"8436,8422,8414,8368,8421"
 97 | 0.6,"8454,8451,7425,8442,8437,7840"
 98 | 0.4,"8428,8457,8456,8329"
 99 | 0.5,"8407,8443,8472,8310,8482"
100 | 0.4,"8497,8495,7797,8485"
101 | 0.4,"8504,8507,8503,8498"
102 | 


--------------------------------------------------------------------------------