├── output.json ├── rosmap ├── __init__.py ├── config │ ├── __init__.py │ └── config.json ├── loaders │ ├── __init__.py │ └── module_loader.py ├── api_bindings │ ├── __init__.py │ ├── bitbucket_api_bindings.py │ └── github_api_bindings.py ├── file_analyzers │ ├── __init__.py │ ├── i_file_analyzer.py │ ├── cpp_file_analyzer.py │ └── existence_file_analyzer.py ├── repository_cloners │ ├── __init__.py │ ├── git_askpass.py │ ├── i_repository_cloner.py │ ├── mercurial_repository_cloner.py │ ├── subversion_repository_cloner.py │ └── git_repository_cloner.py ├── repository_parsers │ ├── __init__.py │ ├── i_repository_parser.py │ ├── github_repository_parser.py │ ├── bitbucket_repo_parser.py │ └── rosdistro_repo_parser.py ├── repository_analyzers │ ├── __init__.py │ ├── offline │ │ ├── __init__.py │ │ ├── i_repository_analyzer.py │ │ ├── git_repository_analyzer.py │ │ ├── mercurial_repository_analyzer.py │ │ ├── abstract_repository_analyzer.py │ │ └── subversion_repository_analyzer.py │ └── online │ │ ├── __init__.py │ │ ├── i_scs_analyzer.py │ │ ├── bitbucket_repository_analyzer.py │ │ └── github_repository_analyzer.py ├── package_analyzers │ ├── package_xml_analyzer.py │ ├── manifest_xml_analyzer.py │ └── package_analyzer.py └── analyze.py ├── MANIFEST.in ├── rosmap-launcher ├── requirements.txt ├── setup.py ├── LICENSE.txt ├── .gitignore ├── schema.json └── README.md /output.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /rosmap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft rosmap -------------------------------------------------------------------------------- /rosmap/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/loaders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/api_bindings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/file_analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/repository_cloners/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/repository_parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/online/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rosmap-launcher: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from rosmap import analyze 3 | 4 | if __name__ == "__main__": 5 | analyze.main() 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | GitPython>=2.1.8 2 | pyyaml>=4.2b1 3 | pyquery>=1.4.0 4 | urllib3 5 | certifi 6 | python-hglib>=2.6.1 7 | svn>=0.3.46 8 | python-dateutil>=2.7.5 9 | cpplint -------------------------------------------------------------------------------- /rosmap/repository_cloners/git_askpass.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from sys import argv 3 | from os import environ 4 | 5 | if __name__ == "__main__": 6 | if argv[1] == "Username for 'https://github.com': ": 7 | print(environ['GIT_USERNAME']) 8 | exit() 9 | 10 | if argv[1] == "Password for 'https://" + environ['GIT_USERNAME'] + "@github.com': ": 11 | print(environ['GIT_PASSWORD']) 12 | exit() 13 | 14 | exit(1) -------------------------------------------------------------------------------- /rosmap/repository_parsers/i_repository_parser.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class IRepositoryParser(object): 5 | __metaclass__ = ABCMeta 6 | 7 | @abstractmethod 8 | def parse_repositories(self, repository_dict: dict) -> None: 9 | """ 10 | Parses repository URLs and adds them to the dictionary. 11 | :param repository_dict: The dictionary to add the repository URLs to (key: repo-type, value: repo-url) 12 | :return: None 13 | """ 14 | 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /rosmap/file_analyzers/i_file_analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class IFileAnalyzer(object): 5 | __metaclass__ = ABCMeta 6 | 7 | @abstractmethod 8 | def initialize_fields(self, repo_detail: dict) -> None: 9 | """ 10 | Initialize fields on repo_detail needed for analysis of this file-type. 11 | :param repo_detail: 12 | :return: 13 | """ 14 | raise NotImplementedError 15 | 16 | def analyze_files(self, path_list, repo_detail: dict): 17 | raise NotImplementedError 18 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/i_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class IRepositoryAnalyzer(object): 5 | """ 6 | Interface for classes implementing Repository-analysis. 7 | """ 8 | __metaclass__ = ABCMeta 9 | 10 | @abstractmethod 11 | def analyze_repositories(self, path: str, repo_details: dict) -> None: 12 | """ 13 | Analyzes all repositories directly under the root of the given path (does not recurse). 14 | :param path: Path to the repositories. 15 | :param repo_details: Details about the repositories. 16 | :return: None 17 | """ 18 | raise NotImplementedError 19 | 20 | @abstractmethod 21 | def analyzes(self) -> str: 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/online/i_scs_analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class ISCSRepositoryAnalyzer(object): 5 | """ 6 | Interface for classes implementing remote analysis of repositories from social coding sites (scs). 7 | """ 8 | __metaclass__ = ABCMeta 9 | 10 | @abstractmethod 11 | def analyze_repositories(self, repo_details: dict) -> None: 12 | """ 13 | Analyzes repositories listed in repo_details. 14 | """ 15 | raise NotImplementedError 16 | 17 | @abstractmethod 18 | def analyzes(self) -> str: 19 | """ 20 | Returns which type of remote is analyzed by this analyzer. 21 | :return: A string designating which type of repository is cloned (e.g. "bitbucket", "github",...) 22 | """ 23 | raise NotImplementedError 24 | -------------------------------------------------------------------------------- /rosmap/repository_cloners/i_repository_cloner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class IRepositoryCloner(object): 5 | """ 6 | Interface for classes implementing cloning-functionality for different repository-types. 7 | """ 8 | __metaclass__ = ABCMeta 9 | 10 | @abstractmethod 11 | def clone_repositories(self, repository_set: set) -> None: 12 | """ 13 | Clones repositories from URLs provided by repository set 14 | :param repository_set: A set containing repository-URLs. 15 | :return: None 16 | """ 17 | raise NotImplementedError 18 | 19 | @abstractmethod 20 | def clones(self) -> str: 21 | """ 22 | Returns which type of repository is cloned by this cloner. 23 | :return: A string designating which type of repository is cloned (e.g. "git", "hg", "svn", ...) 24 | """ 25 | raise NotImplementedError 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from setuptools import setup, find_packages 4 | setup( 5 | name="rosmap", 6 | version="0.2", 7 | packages=find_packages(), 8 | scripts=['rosmap-launcher'], 9 | install_requires=['GitPython>=2.1.8', 10 | 'pyyaml>=4.2b1', 11 | 'pyquery>=1.4.0', 12 | 'urllib3', 13 | 'certifi', 14 | 'python-hglib>=2.6.1', 15 | 'svn>=0.3.46', 16 | 'python-dateutil>=2.7.5', 17 | 'cpplint'], 18 | include_package_data=True, 19 | author="Marc Pichler", 20 | author_email="marc.pichler@joanneum.at", 21 | license="MIT", 22 | description="Clones and analyzes ROS-Packages.", 23 | url="https://github.com/jr-robotics/rosmap", 24 | project_urls={ 25 | "Source Code": "https://github.com/jr-robotics/rosmap" 26 | }, 27 | python_requires='~=3.5', 28 | ) 29 | -------------------------------------------------------------------------------- /rosmap/config/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "github_username": "", 3 | "github_password": "", 4 | "github_search_topic": "ros", 5 | "github_search_rate_limit": 1800, 6 | "github_api_rate_limit": 5000, 7 | "rosdistro_url": "https://github.com/ros/rosdistro", 8 | "rosdistro_workspace": "~/.rosdistro_workspace/", 9 | "bitbucket_repo_page": "https://bitbucket.org/repo/all/", 10 | "bitbucket_repo_search_string": "ros", 11 | "bitbucket_api_rate_limit": 1000, 12 | "version_control_systems": ["git","svn","hg"], 13 | "analysis_workspace" : "~/.analysis_workspace/", 14 | "repository_folder": "repositories/", 15 | "social_coding_sites": ["bitbucket", "github"], 16 | "package_xml_dependency_tags": ["build_depend", 17 | "run_depend", 18 | "depend", 19 | "buildtool_depend", 20 | "build_export_depend", 21 | "exec_depend", 22 | "test_depend", 23 | "doc_depend"], 24 | "manifest_xml_dependency_tags": ["depend"] 25 | } -------------------------------------------------------------------------------- /rosmap/repository_parsers/github_repository_parser.py: -------------------------------------------------------------------------------- 1 | from rosmap.api_bindings.github_api_bindings import GithubApiBindings 2 | from .i_repository_parser import IRepositoryParser 3 | 4 | 5 | class GithubRepositoryParser(IRepositoryParser): 6 | """ 7 | Parses repository-URLs from GitHub using the GitHub-search API. 8 | """ 9 | def __init__(self, settings: dict): 10 | """ 11 | Creates a new instance of the GithubRepositoryParser class. 12 | :param settings: Settings dict containing keys github_username, github_password (token works as well), as well as 13 | search-API rate limit. 14 | """ 15 | self.__api_bindings = GithubApiBindings(settings["github_username"], 16 | settings["github_password"], 17 | settings["github_search_rate_limit"]) 18 | self.__settings = settings 19 | 20 | def parse_repositories(self, repository_dict: dict) -> None: 21 | # Github only hosts git repositories. 22 | repository_dict["git"].update(self.__api_bindings.get_urls_of_topic(self.__settings["github_search_topic"])) -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 JOANNEUM RESEARCH Forschungsgesellschaft mbH 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /rosmap/package_analyzers/package_xml_analyzer.py: -------------------------------------------------------------------------------- 1 | from .package_analyzer import PackageAnalyzer 2 | from xml.etree.cElementTree import parse 3 | from xml.etree.cElementTree import ParseError 4 | import logging 5 | 6 | class PackageXmlAnalyzer(PackageAnalyzer): 7 | """ 8 | Analyzer plug-in for ROS' package.xml files (catkin). 9 | """ 10 | 11 | def analyze_file(self, path: str, dependencies: dict) -> dict: 12 | # Parse xml 13 | try: 14 | file = open(path, "r") 15 | tree = parse(file) 16 | except ParseError: 17 | logging.warning("[PackageXmlAnalyzer]: Could not parse " + path + "; omitting file.") 18 | return dependencies 19 | 20 | element = tree.getroot() 21 | packagename = element.find('name').text 22 | 23 | for tag in self._settings["package_xml_dependency_tags"]: 24 | for element in element.findall(tag): 25 | self.add_dependency(packagename, element.text, dependencies) 26 | 27 | def _analyze(self, path: str) -> dict: 28 | 29 | packages = dict() 30 | filellist = self.search_files(path, "package.xml") 31 | 32 | for filename in filellist: 33 | logging.info("[PackageXmlAnalyzer]: Analyzing " + filename) 34 | self.analyze_file(filename, packages) 35 | 36 | return packages 37 | 38 | 39 | -------------------------------------------------------------------------------- /rosmap/file_analyzers/cpp_file_analyzer.py: -------------------------------------------------------------------------------- 1 | from .i_file_analyzer import IFileAnalyzer 2 | import subprocess 3 | 4 | 5 | class CppFileAnalyzer(IFileAnalyzer): 6 | """Analyzes C++ source and header files.""" 7 | def initialize_fields(self, repo_detail: dict) -> None: 8 | try: 9 | repo_detail["cpplint_errors"] 10 | except KeyError: 11 | repo_detail["cpplint_errors"] = 0 12 | 13 | def __analyze_file(self, path: str, repo_detail: dict) -> None: 14 | try: 15 | cpplint_report = str(subprocess.check_output( 16 | "cpplint --filter=-whitespace/tab,-whitespace/braces,-build/headerguard,-readability/streams,-build/include_order,-whitespace/newline,-whitespace/labels,-runtime/references " + path, 17 | shell=True, stderr=subprocess.STDOUT)) 18 | if "Total errors found:" in cpplint_report: 19 | repo_detail["cpplint_errors"] += int(cpplint_report.split('\n')[-2].split(': ')[-1]) 20 | except subprocess.CalledProcessError as error: 21 | cpplint_report = error.output 22 | repo_detail["cpplint_errors"] += int(cpplint_report.decode("utf-8").split('\n')[-2].split(': ')[-1]) 23 | 24 | def analyze_files(self, path_list: list, repo_detail: dict): 25 | for file_path in filter(lambda k: k.endswith(".hpp") or k.endswith(".cpp") or k.endswith(".h"), path_list): 26 | self.__analyze_file(file_path, repo_detail) 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /rosmap/file_analyzers/existence_file_analyzer.py: -------------------------------------------------------------------------------- 1 | from .i_file_analyzer import IFileAnalyzer 2 | 3 | 4 | class ExistenceFileAnalyzer(IFileAnalyzer): 5 | """ 6 | Checks if files exist, and saves true or false to the repo_detail. 7 | """ 8 | 9 | def analyze_files(self, path_list, repo_detail: dict): 10 | for path in path_list: 11 | self.__analyze_file(path, repo_detail) 12 | 13 | def __analyze_file(self, path: str, repo_details: dict) -> None: 14 | file = path.split("/")[-1] 15 | repo_details["readme"] = repo_details["readme"] or "readme" in file.lower() 16 | repo_details["changelog"] = repo_details["changelog"] or "changelog" in file.lower() 17 | repo_details["continuous_integration"] = repo_details["continuous_integration"] or \ 18 | ".travis.yml" in file.lower() \ 19 | or ".gitlab-ci.yml" in file.lower() \ 20 | or "bitbucket-pipelines" in file.lower() 21 | repo_details["rosinstall"] = repo_details["rosinstall"] or ".rosinstall" in file.lower() 22 | 23 | def initialize_fields(self, repo_detail: dict) -> None: 24 | details = ["readme", "changelog", "continuous_integration", "rosinstall"] 25 | for detail in details: 26 | try: 27 | repo_detail[detail] 28 | except KeyError: 29 | repo_detail[detail] = False 30 | -------------------------------------------------------------------------------- /rosmap/package_analyzers/manifest_xml_analyzer.py: -------------------------------------------------------------------------------- 1 | from .package_analyzer import PackageAnalyzer 2 | from xml.etree.cElementTree import parse 3 | from xml.etree.cElementTree import ParseError 4 | import os 5 | import logging 6 | 7 | 8 | class ManifestXmlAnalyzer(PackageAnalyzer): 9 | """ 10 | Analyzer plug-in that analyzes manifest.xml (rosbuild) package files. 11 | """ 12 | 13 | def analyze_file(self, path: str, dependencies: dict) -> dict: 14 | """ 15 | Analyzes a manifest.xml file. 16 | :param path: Path to the manifest.xml file. 17 | :param dependencies: Dictionary containing (key: package name, value: list[dependency, dependency, ...] 18 | :return: updated dependencies-dictionary. 19 | """ 20 | # Parse xml 21 | try: 22 | file = open(path, "r") 23 | tree = parse(file) 24 | except ParseError: 25 | logging.warning("[ManifestXmlAnalyzer]: Could not parse " + path + "; omitting file.") 26 | return dependencies 27 | 28 | element = tree.getroot() 29 | packagename = os.path.basename(os.path.dirname(path)) 30 | 31 | for tag in self._settings["manifest_xml_dependency_tags"]: 32 | for element in element.findall(tag): 33 | self.add_dependency(packagename, element.attrib["package"], dependencies) 34 | 35 | def _analyze(self, path: str) -> dict: 36 | 37 | packages = dict() 38 | filellist = self.search_files(path, "manifest.xml") 39 | 40 | for filename in filellist: 41 | logging.info("[ManifestXmlAnalyzer]: Analyzing " + filename) 42 | self.analyze_file(filename, packages) 43 | 44 | return packages 45 | -------------------------------------------------------------------------------- /rosmap/repository_cloners/mercurial_repository_cloner.py: -------------------------------------------------------------------------------- 1 | from .i_repository_cloner import IRepositoryCloner 2 | import hglib 3 | import os 4 | import re 5 | import logging 6 | 7 | 8 | REGEX_REPO_NAME = '\/([^/]+)\/*$' 9 | REGEX_REPO_NAME_GROUP = 1 10 | 11 | 12 | class MercurialRepositoryCloner(IRepositoryCloner): 13 | """ 14 | Clones mercurial-repositories. 15 | """ 16 | def __init__(self, settings: dict): 17 | """ 18 | Creates a new instance of the MercurialRepositoryCloner class. 19 | :param settings: settings including keys analysis_workspace (path) and repository_folder (folder in 20 | analysis_workspace) 21 | """ 22 | self.__settings = settings 23 | 24 | def clone_repositories(self, repository_set: set) -> None: 25 | # Get path to mercurial repositories. 26 | directory = self.__settings["analysis_workspace"] + self.__settings["repository_folder"] + "hg/" 27 | 28 | # Create path for mercurial repositories. 29 | if not os.path.exists(directory): 30 | os.makedirs(directory) 31 | 32 | # Iterate over every url and clone repositories. 33 | for url in repository_set: 34 | # Get repo name 35 | regex_result = re.search(REGEX_REPO_NAME, url) 36 | repo_name = regex_result.group(REGEX_REPO_NAME_GROUP) 37 | 38 | # Notify user. 39 | logging.info("[MercurialRepositoryCloner]: Cloning repository " + repo_name + " from " + url + "...") 40 | 41 | try: 42 | # Create repo directory. 43 | repo_directory = directory + repo_name 44 | if not os.path.exists(repo_directory): 45 | os.makedirs(repo_directory) 46 | 47 | # Clone repository. 48 | hglib.clone(url, repo_directory) 49 | except hglib.error.CommandError: 50 | logging.warning("[MercurialRepositoryCloner]: Could not clone repository " + repo_name) 51 | 52 | def clones(self) -> str: 53 | return "hg" 54 | -------------------------------------------------------------------------------- /rosmap/package_analyzers/package_analyzer.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | import os 3 | 4 | 5 | class PackageAnalyzer(object): 6 | """ 7 | Abstract base class for plug-ins seeking to implement package analysis. 8 | """ 9 | __metaclass__ = ABCMeta 10 | 11 | def __init__(self, settings): 12 | """ 13 | Creates a new instance of a package-analyzer class. 14 | :param settings: settings containing information for the plug-ins. 15 | """ 16 | self._settings = settings 17 | 18 | def add_dependency(self, dependant: str, dependency: str, packages: dict) -> None: 19 | """ 20 | Adds a dependency 21 | :param dependant: The package that depends on the dependency 22 | :param dependency: The package that the dependant is dependent on. 23 | :param packages: The packages and depdendencies of this repository (key: package, value: list of dependencies). 24 | :return: None 25 | """ 26 | if not dependant in packages: 27 | packages[dependant] = dict() 28 | if not "dependencies" in packages[dependant]: 29 | packages[dependant]["dependencies"] = list() 30 | packages[dependant]["name"] = dependant 31 | packages[dependant]["dependencies"].append(dependency) 32 | 33 | @abstractmethod 34 | def _analyze(self, path: str) -> dict: 35 | """ 36 | Analyze the current path for packages (recursively) 37 | :param path: Path to the repository that possibly contains files. 38 | :return: Dictionary with package-names and dependencies. 39 | """ 40 | raise NotImplementedError 41 | 42 | def analyze(self, path: str) -> list: 43 | return list(self._analyze(path).values()) 44 | 45 | def search_files(self, path: str, pattern: str) -> list: 46 | """ 47 | Searches for files recursively in the file system matching the provided pattern. 48 | :param path: The path to search in. 49 | :param pattern: The pattern to search for. 50 | :return: A list of paths to the found files. 51 | """ 52 | filellist = [] 53 | for root, dirs, files in os.walk(path): 54 | for name in files: 55 | if name.endswith(pattern): 56 | filellist.append(os.path.join(root,str(pattern))) 57 | return filellist -------------------------------------------------------------------------------- /rosmap/repository_cloners/subversion_repository_cloner.py: -------------------------------------------------------------------------------- 1 | from .i_repository_cloner import IRepositoryCloner 2 | import os 3 | import svn.remote 4 | import svn.exception 5 | import urllib3 6 | import logging 7 | 8 | 9 | class SubversionRepositoryCloner(IRepositoryCloner): 10 | 11 | def __init__(self, settings: dict): 12 | """ 13 | Creates a new instance of the SubversionRepositoryCloner-class. 14 | :param settings: settings including keys analysis_workspace (path) and repository_folder (folder in 15 | analysis_workspace) 16 | """ 17 | self.__settings = settings 18 | 19 | def clone_repositories(self, repository_set: set) -> None: 20 | # Create folder 21 | directory = self.__settings["analysis_workspace"] + self.__settings["repository_folder"] + "svn/" 22 | if not os.path.exists(directory): 23 | os.makedirs(directory) 24 | 25 | for url in repository_set: 26 | # Generate folder-name 27 | repo_name = url.replace("/", "_") 28 | 29 | # Notify user. 30 | logging.info("[SubversionRepositoryCloner]: Cloning repository " + repo_name + " from " + url + "...") 31 | 32 | repo_directory = directory + repo_name 33 | http = urllib3.PoolManager() 34 | 35 | try: 36 | # Make sure server and path still exist. 37 | status = http.request('GET', url, timeout=2).status 38 | if status == 200: 39 | try: 40 | # Create repo directory. 41 | if not os.path.exists(repo_directory): 42 | os.makedirs(repo_directory) 43 | 44 | # Check out SVN repository. 45 | svn.remote.RemoteClient(url).checkout(repo_directory) 46 | except svn.exception.SvnException: 47 | logging.warning("[SubversionRepositoryCloner]: Could not clone from " + url) 48 | else: 49 | logging.warning("[SubversionRepositoryCloner]: Could not clone from " 50 | + url + ", server responded with " + str(status)) 51 | except urllib3.exceptions.MaxRetryError: 52 | logging.warning("[SubversionRepositoryCloner]: Could not reach " + url + ", connection timeout...") 53 | 54 | def clones(self) -> str: 55 | return "svn" 56 | -------------------------------------------------------------------------------- /rosmap/repository_cloners/git_repository_cloner.py: -------------------------------------------------------------------------------- 1 | from .i_repository_cloner import IRepositoryCloner 2 | from git import Repo 3 | from git import GitCommandError 4 | import os 5 | import re 6 | import logging 7 | from shutil import copy 8 | 9 | REGEX_REPO_NAME = '\/([^\/]+?)\/([^\/]+?)\.git' 10 | REGEX_REPO_USER_GROUP = 1 11 | REGEX_REPO_NAME_GROUP = 2 12 | 13 | 14 | class GitRepositoryCloner(IRepositoryCloner): 15 | 16 | def __init__(self, settings: dict): 17 | self.__settings = settings 18 | 19 | def clone_repositories(self, repository_set: set) -> None: 20 | copy(os.path.dirname(os.path.realpath(__file__)) + "/git_askpass.py", self.__settings["analysis_workspace"]) 21 | os.chmod(self.__settings["analysis_workspace"] + "/git_askpass.py", 0o777) 22 | os.environ['GIT_ASKPASS'] = self.__settings["analysis_workspace"] + "/git_askpass.py" 23 | print(os.environ['GIT_ASKPASS']) 24 | os.environ['GIT_USERNAME'] = self.__settings["github_username"] 25 | os.environ['GIT_PASSWORD'] = self.__settings["github_password"] 26 | # Create folder 27 | if not os.path.exists(self.__settings["analysis_workspace"] + self.__settings["repository_folder"] + "git/"): 28 | os.makedirs(self.__settings["analysis_workspace"] + self.__settings["repository_folder"] + "git/") 29 | 30 | for url in repository_set: 31 | # Get repo name 32 | 33 | regex_result = re.search(REGEX_REPO_NAME, url) 34 | if regex_result is not None: 35 | repo_name = regex_result.group(REGEX_REPO_NAME_GROUP) 36 | user_name = regex_result.group(REGEX_REPO_USER_GROUP) 37 | 38 | # Notify user 39 | logging.info("[GitRepositoryCloner]: Cloning repository " + repo_name + " from " + url + "...") 40 | 41 | # Suffix in case a repo with the same name already exists 42 | 43 | try: 44 | # Create directory. 45 | directory = self.__settings["analysis_workspace"] + self.__settings["repository_folder"] + "git/" + user_name + "_" + repo_name 46 | if not os.path.exists(directory): 47 | os.makedirs(directory) 48 | 49 | # Clone into directory. 50 | Repo.clone_from(url, directory) 51 | 52 | except GitCommandError: 53 | logging.warning("[GitRepositoryCloner]: Could not clone repository " + repo_name) 54 | 55 | def clones(self) -> str: 56 | return "git" 57 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/git_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | from .abstract_repository_analyzer import AbstractRepositoryAnalyzer 2 | from git import Repo 3 | from git import InvalidGitRepositoryError 4 | import subprocess 5 | import os 6 | import logging 7 | 8 | 9 | class GitRepositoryAnalyzer(AbstractRepositoryAnalyzer): 10 | """ 11 | Analysis plug-in for Git-Repositories. 12 | """ 13 | 14 | def count_repo_branches(self, repo_path: str, remote: str) -> None: 15 | """ 16 | Counts the repository's branches. 17 | :param repo_path: path to the repository root. 18 | :param remote: remote uri of the branches 19 | :return: None 20 | """ 21 | branches = subprocess.check_output("cd " + repo_path + ";git branch -a | wc -l", shell=True) 22 | self.get_details(remote)["branch_count"] = int(branches) 23 | 24 | def count_repo_contributors(self, repo_path: str, remote: str) -> None: 25 | """ 26 | Counts the repository's contributors. 27 | :param repo_path: path to the repository root. 28 | :param remote: remote uri of the branches 29 | :return: None 30 | """ 31 | contributors = subprocess.check_output("cd " + repo_path + ";git shortlog -s HEAD | wc -l", shell=True) 32 | self.get_details(remote)["contributors"] = int(contributors) 33 | 34 | def extract_last_repo_update(self, repo_path: str, remote: str) -> None: 35 | """ 36 | Extracts the repository's last update-timestamp. 37 | :param repo_path: path to the repository root. 38 | :param remote: remote uri of the branches 39 | :return: None 40 | """ 41 | timestamp = subprocess.check_output("cd " + repo_path + ";git log -1 --format=%ct", shell=True) 42 | self.get_details(remote)["last_update"] = int(timestamp) 43 | 44 | def _analyze(self, path: str, repo_details: dict) -> iter: 45 | self._repo_details = repo_details 46 | for folder in os.listdir(path): 47 | 48 | # Build path and inform user... 49 | current_path = path + "/" + folder + "" 50 | logging.info("[GitRepositoryAnalyzer]: Analyzing:" + current_path) 51 | 52 | # Check if repo is valid. 53 | try: 54 | repo = Repo(path + "/" + folder + "/") 55 | except InvalidGitRepositoryError: 56 | continue 57 | 58 | # Extract origin url. 59 | origin_url = repo.remotes.origin.url 60 | 61 | # Git analysis. 62 | self.count_repo_contributors(current_path, origin_url) 63 | self.count_repo_branches(current_path, origin_url) 64 | self.extract_last_repo_update(current_path, origin_url) 65 | 66 | yield (current_path, origin_url) 67 | 68 | def analyzes(self): 69 | return "git" 70 | -------------------------------------------------------------------------------- /rosmap/repository_parsers/bitbucket_repo_parser.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pyquery import PyQuery 3 | from urllib.error import HTTPError 4 | from .i_repository_parser import IRepositoryParser 5 | 6 | 7 | class BitbucketRepositoryParser(IRepositoryParser): 8 | """ 9 | Parses repository-URLs from Bitbucket using the Bitbucket-search. 10 | """ 11 | 12 | def __init__(self, settings): 13 | """ 14 | Creates a new instance for the BitbucketRepositoryParser-class. 15 | :param settings: Settings dict containing keys bitbucket_repo_page, and bitbucket_repo_search_string. 16 | """ 17 | self.__settings = settings 18 | 19 | def parse_repositories(self, repository_dict: dict) -> None: 20 | links = set() 21 | page_number = 0 22 | previous_length = -1 23 | 24 | # Add links until there aren't any links left to add. 25 | while len(links) != previous_length: 26 | page_number += 1 27 | previous_length = len(links) 28 | 29 | # Get page. 30 | d = PyQuery(url=self.__settings["bitbucket_repo_page"] 31 | + str(page_number) 32 | + '?name=' 33 | + self.__settings["bitbucket_repo_search_string"]) 34 | 35 | # Iterate over every element of class .repo-link 36 | for item in d(".repo-link").items(): 37 | links.add('https://bitbucket.org' + item.attr("href")) 38 | 39 | # Print progress information. 40 | logging.info("[BitbucketRepoParser]: Parsing BitBucket links... [" + str(len(links)) + " items]") 41 | 42 | # extract actual links to the repositories. 43 | progress_counter = 0 44 | for link in links: 45 | progress_counter += 1 46 | try: 47 | # get every item of .clone-url-input class. 48 | d = PyQuery(url=link) 49 | for item in d(".clone-url-input").items(): 50 | # get URL 51 | url = str(item.attr("value")) 52 | 53 | # make sure URL is not a wiki-URL 54 | if url[-4:] != "wiki": 55 | # extract vcs type from the back of the URL. 56 | vcs_type = url.split('.')[-1] 57 | if vcs_type != "git": 58 | vcs_type = "hg" 59 | # add url to the vcs-type's list. 60 | repository_dict[vcs_type].add(url) 61 | 62 | # Print progress information. 63 | logging.info("[BitbucketRepoParser]: Parsing BitBucket clone-URLs... [" 64 | + str(progress_counter) + "/" + str(len(links)) + "]") 65 | except HTTPError as error: 66 | # Notify user of error. 67 | logging.warning("[BitbucketRepoParser]: Could not parse from " + link + ", " + error.reason) 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | **/__pycache__/ 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | .static_storage/ 57 | .media/ 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 108 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 109 | 110 | # User-specific stuff: 111 | .idea/**/workspace.xml 112 | .idea/**/tasks.xml 113 | .idea/ 114 | 115 | # Sensitive or high-churn files: 116 | .idea/**/dataSources/ 117 | .idea/**/dataSources.ids 118 | .idea/**/dataSources.xml 119 | .idea/**/dataSources.local.xml 120 | .idea/**/sqlDataSources.xml 121 | .idea/**/dynamic.xml 122 | .idea/**/uiDesigner.xml 123 | 124 | # Gradle: 125 | .idea/**/gradle.xml 126 | .idea/**/libraries 127 | 128 | # CMake 129 | cmake-build-debug/ 130 | cmake-build-release/ 131 | 132 | # Mongo Explorer plugin: 133 | .idea/**/mongoSettings.xml 134 | 135 | ## File-based project format: 136 | *.iws 137 | 138 | ## Plugin-specific files: 139 | 140 | # IntelliJ 141 | out/ 142 | 143 | # mpeltonen/sbt-idea plugin 144 | .idea_modules/ 145 | 146 | # JIRA plugin 147 | atlassian-ide-plugin.xml 148 | 149 | # Cursive Clojure plugin 150 | .idea/replstate.xml 151 | 152 | # Crashlytics plugin (for Android Studio and IntelliJ) 153 | com_crashlytics_export_strings.xml 154 | crashlytics.properties 155 | crashlytics-build.properties 156 | fabric.properties 157 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/mercurial_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | from .abstract_repository_analyzer import AbstractRepositoryAnalyzer 2 | import subprocess 3 | import os 4 | import logging 5 | 6 | 7 | class MercurialRepositoryAnalyzer(AbstractRepositoryAnalyzer): 8 | """ 9 | Analysis plug-in for mercurial repositories. 10 | """ 11 | 12 | def count_repo_branches(self, repo_path: str, remote: str) -> None: 13 | """ 14 | Counts the repository's branches. 15 | :param repo_path: path to the repository root. 16 | :param remote: remote uri of the branches 17 | :return: None 18 | """ 19 | branches = subprocess.check_output("cd " + repo_path + ";hg branches | wc -l", shell=True) 20 | self.get_details(remote)["branch_count"] = int(branches) 21 | 22 | def count_repo_contributors(self, repo_path: str, remote: str) -> None: 23 | """ 24 | Counts the repository's contributors. 25 | :param repo_path: path to the repository root. 26 | :param remote: remote uri of the branches 27 | :return: None 28 | """ 29 | contributors = subprocess.check_output('cd ' + repo_path + ';hg log --template "{author|person}\n" | sort | uniq | wc -l', shell=True) 30 | self.get_details(remote)["contributors"] = int(contributors) 31 | 32 | def extract_repo_url(self, repo_path: str) -> str: 33 | """ 34 | Extracts the Remote URL from a given SVN repository-path. 35 | :param repo_path: path to the repository root. 36 | :return: Remote URL 37 | """ 38 | try: 39 | return subprocess.check_output("cd " + repo_path + ";hg paths default", shell=True).decode("utf-8").rstrip("\n") 40 | except subprocess.CalledProcessError: 41 | return "" 42 | 43 | def extract_last_repo_update(self, repo_path: str, remote: str) -> None: 44 | """ 45 | Extracts the repository's last update-timestamp. 46 | :param repo_path: path to the repository root. 47 | :param remote: remote uri of the branches 48 | :return: None 49 | """ 50 | timestamp = subprocess.check_output("cd " + repo_path + ";hg log --limit 1 --template '{date(date, \"%s\")}'", shell=True) 51 | self.get_details(remote)["last_update"] = int(timestamp) 52 | 53 | def _analyze(self, path: str, repo_details: dict) -> None: 54 | self._repo_details = repo_details 55 | for folder in os.listdir(path): 56 | 57 | # Build path and inform user... 58 | current_path = path + "/" + folder + "" 59 | logging.info("[MercurialRepositoryAnalyzer]: Analyzing:" + current_path) 60 | 61 | # Extract origin url. 62 | origin_url = self.extract_repo_url(current_path) 63 | 64 | # If origin_url is empty string, then this is not a valid mercurial-repository. 65 | if origin_url != "": 66 | # Mercurial analysis. 67 | self.count_repo_contributors(current_path, origin_url) 68 | self.count_repo_branches(current_path, origin_url) 69 | self.extract_last_repo_update(current_path, origin_url) 70 | 71 | yield (current_path, origin_url) 72 | else: 73 | logging.warning("[MercurialRepositoryAnalyzer]: " + current_path + " is not a valid repository...") 74 | 75 | def analyzes(self): 76 | return "hg" 77 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/online/bitbucket_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | import dateutil 2 | import logging 3 | from .i_scs_analyzer import ISCSRepositoryAnalyzer 4 | from rosmap.api_bindings.bitbucket_api_bindings import BitbucketApiBindings 5 | 6 | 7 | class BitbucketRepositoryAnalyzer(ISCSRepositoryAnalyzer): 8 | """ 9 | Analyzes repositories remotely on Bitbucket and extracts Bitbucket-specific information. 10 | """ 11 | def __init__(self, settings: dict): 12 | """ 13 | Creates a new instance of the BitbucketRepositoryAnalyzer. 14 | :param settings: settings containing the bitbucket_api_rate_limit (requests/hour) 15 | """ 16 | self.__api_bindings = BitbucketApiBindings(settings["bitbucket_api_rate_limit"]) 17 | 18 | @staticmethod 19 | def initialize_values(repo_details: dict) -> None: 20 | """ 21 | Initializes all values that might be needed in order to avoid exceptions. 22 | :param repo_details: details associated with the analyzed repository. 23 | :return: 24 | """ 25 | 26 | if "issue_durations" not in repo_details: 27 | repo_details["issue_durations"] = list() 28 | if "open_pull_requests" not in repo_details: 29 | repo_details["open_pull_requests"] = 0 30 | if "open_issues" not in repo_details: 31 | repo_details["open_issues"] = 0 32 | 33 | def count_stargazers(self, repo_uri: str, details: dict) -> None: 34 | """ 35 | Counts all stargazers. 36 | :param repo_uri: The URL to the repository. 37 | :param details: The Details associated with the URL. 38 | :return: None 39 | """ 40 | details["stars"] = self.__api_bindings.get_stargazer_count(repo_uri) 41 | 42 | def count_issues(self, repo_uri: str, details: dict) -> None: 43 | """ 44 | Counts open and closed issues. 45 | :param repo_uri: The URL to the repository. 46 | :param details: The Details associated with the URL. 47 | :return: None 48 | """ 49 | for issue in self.__api_bindings.get_values(self.__api_bindings.get_issues_api_string(repo_uri)): 50 | if issue["state"] in ["open", "new"]: 51 | details["open_issues"] += 1 52 | else: 53 | elapsed_time = dateutil.parser.parse(issue["updated_on"]) - dateutil.parser.parse(issue["created_on"]) 54 | details["issue_durations"].append(elapsed_time.total_seconds()) 55 | 56 | def count_pull_requests(self, repo_uri: str, details: dict) -> None: 57 | """ 58 | Counts open pull requests. 59 | :param repo_uri: The URL to the repository. 60 | :param details: The Details associated with the URL. 61 | :return: None 62 | """ 63 | for pull_request in self.__api_bindings.get_values(self.__api_bindings.get_pull_requests_api_string(repo_uri)): 64 | if pull_request["state"] == "OPEN": 65 | details["open_pull_requests"] += 1 66 | 67 | def analyze_repositories(self, repo_details: dict) -> None: 68 | for url, details in repo_details.items(): 69 | if "bitbucket" in url: 70 | logging.info("[BitbucketRepositoryAnalyzer]: Fetching data from " + url) 71 | self.count_stargazers(url, details) 72 | self.count_issues(url, details) 73 | self.count_pull_requests(url, details) 74 | 75 | def analyzes(self): 76 | return "bitbucket" -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/abstract_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | from repository_analyzers.offline.i_repository_analyzer import IRepositoryAnalyzer 2 | from abc import ABCMeta, abstractmethod 3 | import os 4 | 5 | 6 | class AbstractRepositoryAnalyzer(IRepositoryAnalyzer): 7 | """ 8 | Abstract base class for repository-analysis plug-ins. 9 | """ 10 | __metaclass__ = ABCMeta 11 | 12 | def __init__(self, package_analyzers, file_analyzers): 13 | """ 14 | Constructor for all classes that continue to implement this class. 15 | :param package_analyzers: 16 | :param file_analyzers: 17 | """ 18 | self._repo_details = dict() 19 | self.package_analyzers = package_analyzers 20 | self.file_analyzers = file_analyzers 21 | 22 | @abstractmethod 23 | def _analyze(self, path, repo_details) -> iter: 24 | """ 25 | Analyzes all repositories based on their repository type, and yield returns the origin URL. 26 | :param path: Path to the repositories. 27 | :param repo_details: Details to the repository. 28 | :return: 29 | """ 30 | raise NotImplementedError 31 | 32 | def get_details(self, remote: str) -> None: 33 | """ 34 | Gets the details of a repository based on its remote URL. 35 | :param remote: Remote URL. 36 | :return: None 37 | """ 38 | if remote not in self._repo_details: 39 | self._repo_details[remote] = dict() 40 | self._repo_details[remote]["url"] = remote 41 | return self._repo_details[remote] 42 | 43 | def initialize_details(self, remote: str) -> None: 44 | """ 45 | Initializes fields for file analyzers. 46 | :param remote: Remote URL. 47 | :return: None 48 | """ 49 | for file_analyzer in self.file_analyzers: 50 | file_analyzer.initialize_fields(self.get_details(remote)) 51 | 52 | def __process_files(self, directory: str, remote: str) -> None: 53 | """ 54 | Analyzes all files inside a directory. 55 | :param directory: Repository root directory. 56 | :param remote: Remote 57 | :return: None. 58 | """ 59 | self.initialize_details(remote) 60 | 61 | # Build file-list. 62 | filelist = list() 63 | for path, subdirectory, files in os.walk(directory): 64 | for name in files: 65 | filelist.append(os.path.join(path, name)) 66 | 67 | for file_anlayzer in self.file_analyzers: 68 | file_anlayzer.analyze_files(filelist, self.get_details(remote)) 69 | 70 | def analyze_repositories(self, path: str, repo_details: dict) -> None: 71 | # Add generic analysis to the tasks that are performed. 72 | for repo_path, remote in self._analyze(path, repo_details): 73 | self.__analyze_packages(repo_path, remote) 74 | self.__process_files(repo_path, remote) 75 | 76 | def __analyze_packages(self, path: str, remote: str) -> None: 77 | """ 78 | Analyze package-files of a repository. 79 | :param path: Path to root containing files to analyze. 80 | :param remote: Remote URL of the repository containing the file. 81 | :return: None 82 | """ 83 | for package_analyzer in self.package_analyzers: 84 | if "packages" not in self.get_details(remote): 85 | self.get_details(remote)["packages"] = list() 86 | self.get_details(remote)["packages"].extend(package_analyzer.analyze(path)) 87 | 88 | -------------------------------------------------------------------------------- /schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type":"array", 3 | "title":"Array that contains all repository-objects.", 4 | "items":{ 5 | "type":"object", 6 | "title":"The repository class.", 7 | "required":[ 8 | "url", 9 | "continuous_integration", 10 | "rosinstall", 11 | "contributors", 12 | "branch_count", 13 | "changelog", 14 | "packages", 15 | "cpplint_errors", 16 | "last_update", 17 | "readme" 18 | ], 19 | "properties":{ 20 | "open_issues":{ 21 | "type":"integer", 22 | "title":"Number of open issues." 23 | }, 24 | "url":{ 25 | "type":"string", 26 | "title":"The origin-url of the repository" 27 | }, 28 | "continuous_integration":{ 29 | "type":"boolean", 30 | "title":"Is there a file present that suggests continuous integration is set up?" 31 | }, 32 | "rosinstall":{ 33 | "type":"boolean", 34 | "title":"Is there a rosinstall file present?" 35 | }, 36 | "open_pull_requests":{ 37 | "type":"integer", 38 | "title":"Number of open pull-requests" 39 | }, 40 | "contributors":{ 41 | "type":"integer", 42 | "title":"Number of contributors" 43 | }, 44 | "branch_count":{ 45 | "type":"integer", 46 | "title":"Number of branches" 47 | }, 48 | "changelog":{ 49 | "type":"boolean", 50 | "title":"Is there a CHANGELOG file present?" 51 | }, 52 | "issue_durations":{ 53 | "type":"array", 54 | "title":"Issue durations in seconds.", 55 | "items":{ 56 | "type":"number" 57 | } 58 | }, 59 | "packages":{ 60 | "type":"array", 61 | "title":"Packages contained in this repository.", 62 | "items":{ 63 | "type":"object", 64 | "title":"The package-class.", 65 | "required":[ 66 | "name", 67 | "dependencies" 68 | ], 69 | "properties":{ 70 | "name":{ 71 | "type":"string", 72 | "title":"The package's name" 73 | }, 74 | "dependencies":{ 75 | "type":"array", 76 | "title":"The package's dependencies (package names of dependencies)", 77 | "items":{ 78 | "type":"string" 79 | } 80 | } 81 | } 82 | } 83 | }, 84 | "closed_pull_requests":{ 85 | "type":"integer", 86 | "title":"Number of closed pull-requests" 87 | }, 88 | "stars":{ 89 | "type":"integer", 90 | "title":"Number of github-stars/bitbucket-watchers" 91 | }, 92 | "cpplint_errors":{ 93 | "type":"integer", 94 | "title":"Number of cpplint errors." 95 | }, 96 | "last_update":{ 97 | "type":"number", 98 | "title":"Time of last repository commit as UNIX-Timestamp" 99 | }, 100 | "readme":{ 101 | "type":"boolean", 102 | "title":"Is there a readme-file present?" 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /rosmap/repository_parsers/rosdistro_repo_parser.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | import git 4 | import logging 5 | from .i_repository_parser import IRepositoryParser 6 | 7 | 8 | class RosdistroRepositoryParser(IRepositoryParser): 9 | """ 10 | Pulls the rosdistro-package and gets all urls from the rosdistro files. 11 | """ 12 | 13 | def __init__(self, settings: dict): 14 | """ 15 | Creates a new instance of the RosdistroRepositoryParser class 16 | :param settings: Settings containing information about rosdistro_workspace and rosdistro_url 17 | """ 18 | self.__settings = settings 19 | 20 | def __get_rosdistro_repository(self) -> None: 21 | """ 22 | Clones the repository from rosdistro_url into rosdistro_workspace (defined in settings) 23 | :return: None 24 | """ 25 | if not os.path.exists(self.__settings["rosdistro_workspace"]): 26 | os.makedirs(self.__settings["rosdistro_workspace"]) 27 | try: 28 | logging.info("[RosdistroRepositoryParser]: Cloning rosdistro repository...") 29 | git.Repo.clone_from(self.__settings["rosdistro_url"], self.__settings["rosdistro_workspace"]) 30 | except git.exc.GitCommandError: 31 | logging.warning("[RosdistroRepositoryParser]: Repository already exists, pulling changes...") 32 | repo = git.Repo(self.__settings["rosdistro_workspace"]) 33 | repo.remotes.origin.pull() 34 | logging.info("[RosdistroRepositoryParser]: Rosdistro up-to-date...") 35 | 36 | def __get_urls_from_file(self, file_path: str, repository_dict: dict) -> None: 37 | """ 38 | Gets the URLs from a distribution.yaml that adheres to rosdistro-specs. 39 | :param file_path: path to a distribution.yaml file 40 | :param repository_dict: dictionary with repository-type (git, svn, hg, ...) as key and the repo-url as value 41 | :return: None 42 | """ 43 | 44 | # Load file. 45 | file = open(file_path, 'r') 46 | rosdistro = yaml.load(file) 47 | 48 | # Iterate repositories and add them to the repository_dict. 49 | for repository in rosdistro["repositories"]: 50 | try: 51 | vcs_type = str(rosdistro["repositories"][repository]["doc"]["type"]) 52 | url = str(rosdistro["repositories"][repository]["doc"]["url"]) 53 | repository_dict[vcs_type].add(url) 54 | except KeyError: 55 | pass 56 | 57 | try: 58 | vcs_type = str(rosdistro["repositories"][repository]["doc"]["type"]) 59 | url = str(rosdistro["repositories"][repository]["source"]["url"]) 60 | repository_dict[vcs_type].add(url) 61 | except KeyError: 62 | pass 63 | 64 | try: 65 | # This has to be a git repository (required by bloom) 66 | repository_dict["git"].add(rosdistro["repositories"][repository]["release"]["url"]) 67 | except KeyError: 68 | pass 69 | 70 | def parse_repositories(self, repository_dict: dict) -> None: 71 | # Actually get the repository 72 | self.__get_rosdistro_repository() 73 | 74 | # Parse index.yaml 75 | index_file = open(self.__settings["rosdistro_workspace"] + "index.yaml", "r") 76 | index_yaml = yaml.load(index_file) 77 | 78 | # Get all urls from all distribution.yaml files 79 | for distribution in index_yaml["distributions"]: 80 | logging.info("Parsing distribution " + index_yaml["distributions"][distribution]["distribution"][0]) 81 | self.__get_urls_from_file(self.__settings["rosdistro_workspace"] 82 | + index_yaml["distributions"][distribution]["distribution"][0], 83 | repository_dict) -------------------------------------------------------------------------------- /rosmap/loaders/module_loader.py: -------------------------------------------------------------------------------- 1 | import pkgutil 2 | import importlib 3 | import inspect 4 | import logging 5 | 6 | class ModuleLoader(object): 7 | """ 8 | Loads modules via reflection and automatically instantiates classes. 9 | """ 10 | @staticmethod 11 | def load_modules(dir_path: str, package: str, ignore_classes: list, class_suffix: str, *args) -> list: 12 | """ 13 | Creates a list of objects from all classes found in a package. 14 | :param dir_path: current path (i.e.: path of the calling file). 15 | :param package: path to package starting from dir_path, 16 | :param ignore_classes: Ignores classes named in this list. (list contains strings) 17 | :param class_suffix: Selects only classes that end with this suffix. 18 | :param args: Arguments for the class' constructor. 19 | """ 20 | 21 | # create list. 22 | objects = list() 23 | 24 | logging.info('[ModuleLoader]: Initializing parsers at ' + dir_path + '/' + package) 25 | 26 | # get modules iterator. 27 | modules = pkgutil.iter_modules(path=[dir_path + '/' + package]) 28 | 29 | # notify if there are no modules inside the folder. 30 | if not modules: 31 | logging.warning('[ModuleLoader]: No modules found at' + dir_path) 32 | return objects 33 | 34 | # iterate over modules. 35 | for loader, mod_name, ispkg in modules: 36 | 37 | # Get actual module path... 38 | module_path = package.replace("/", ".") 39 | 40 | # Import module. 41 | mod = importlib.import_module("rosmap." + module_path + "." + mod_name) 42 | 43 | # Get class names from module and instantiate classes. 44 | for selected_classname in ModuleLoader.get_classnames_from_module(mod, class_suffix, ignore_classes): 45 | try: 46 | objects.append(ModuleLoader.instantiate_class(mod, mod_name, module_path, selected_classname, *args)) 47 | except ValueError as error: 48 | logging.warning("[ModuleLoader]: " + str(error)) 49 | 50 | return objects 51 | 52 | 53 | 54 | @staticmethod 55 | def get_classnames_from_module(module: str, class_suffix: str, ignore_classes: list) -> iter: 56 | """ 57 | Yield returns all applicable class-names in a module... 58 | :param module: The module to search classes in. 59 | :param class_suffix: Selects only classes that end with this suffix. 60 | :param ignore_classes: Ignores classes named in this list. (list contains strings) 61 | :return: iterable of strings containing selected class-names. 62 | """ 63 | # Iterate over all classes in module. 64 | for classname in dir(module): 65 | if classname[-len(class_suffix):] == class_suffix and classname not in ignore_classes: 66 | yield classname 67 | 68 | @staticmethod 69 | def instantiate_class(module: str, module_name: str, module_path: str, class_name: str, *args) -> object: 70 | """ 71 | Instantiates a class based on parameters. 72 | :param module: The module the class is located in. 73 | :param module_name: The name of the module the class is located in. 74 | :param module_path: The path to the module. 75 | :param class_name: The name of the class to be instantiated. 76 | :param args: Arguments for the class' constructor. 77 | :return: instance of the selected class in the selected module. 78 | """ 79 | my_class = getattr(module, class_name) 80 | logging.info("[ModuleLoader]: Instantiating " + class_name + " from " + module_path + "." + module_name) 81 | if inspect.isclass(my_class): 82 | return my_class(*args) 83 | else: 84 | raise ValueError(class_name + " is not a class, skipping.") 85 | -------------------------------------------------------------------------------- /rosmap/repository_analyzers/offline/subversion_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | from .abstract_repository_analyzer import AbstractRepositoryAnalyzer 2 | from xml.etree.cElementTree import fromstring 3 | from xml.etree.cElementTree import ParseError 4 | import subprocess 5 | import os 6 | import dateutil.parser 7 | import logging 8 | 9 | 10 | class SubversionRepositoryAnalyzer(AbstractRepositoryAnalyzer): 11 | """ 12 | Analysis plug-in for Subversion repositories. 13 | """ 14 | 15 | def count_repo_branches(self, repo_path: str, remote: str) -> None: 16 | """ 17 | Counts the repository's branches. 18 | :param repo_path: path to the repository root. 19 | :param remote: remote uri of the branches 20 | :return: None 21 | """ 22 | branches = subprocess.check_output("cd " + repo_path + ";svn ls $(svn info --show-item=repos-root-url)/branches | wc -l", shell=True) 23 | self.get_details(remote)["branch_count"] = int(branches) 24 | 25 | def count_repo_contributors(self, repo_path: str, remote:str) -> None: 26 | """ 27 | Counts the repository's contributors. 28 | :param repo_path: path to the repository root. 29 | :param remote: remote uri of the branches 30 | :return: None 31 | """ 32 | contributors = subprocess.check_output("cd " + repo_path + ";svn log --quiet | awk '/^r/ {print $3}' | sort -u | wc -l", shell=True) 33 | self.get_details(remote)["contributors"] = int(contributors) 34 | 35 | def extract_repo_url(self, repo_path) -> str: 36 | """ 37 | Extracts the Remote URL from a given SVN repository-path. 38 | :param repo_path: path to the repository root. 39 | :return: Remote URL 40 | """ 41 | try: 42 | return subprocess.check_output("cd " + repo_path + ";svn info --show-item=url", shell=True).decode("utf-8").rstrip("\n") 43 | except subprocess.CalledProcessError: 44 | return "" 45 | 46 | def extract_last_repo_update(self, repo_path: str, remote: str) -> None: 47 | """ 48 | Extracts the repository's last update-timestamp. 49 | :param repo_path: path to the repository root. 50 | :param remote: remote uri of the branches 51 | :return: None 52 | """ 53 | 54 | timestamp = subprocess.check_output("cd " + repo_path + ";svn log --limit 1 --incremental --xml --quiet", shell=True) 55 | 56 | # Parse xml 57 | try: 58 | element = fromstring(timestamp) 59 | except ParseError: 60 | logging.warning("[SubversionRepositoryAnalyzer]: Could not parse " + timestamp + "; omitting file.") 61 | return 62 | 63 | # Get actual timestamp 64 | timestamp = element.find('date').text 65 | timestamp = dateutil.parser.parse(timestamp) 66 | 67 | # Insert timestamp into details. 68 | self.get_details(remote)["last_update"] = int(timestamp.timestamp()) 69 | 70 | def _analyze(self, path: str, repo_details: dict) -> None: 71 | self._repo_details = repo_details 72 | for folder in os.listdir(path): 73 | 74 | # Build path and inform user... 75 | current_path = path + "/" + folder + "" 76 | logging.info("[SubversionRepositoryAnalyzer]: Analyzing:" + current_path) 77 | 78 | # Extract origin url. 79 | origin_url = self.extract_repo_url(current_path) 80 | 81 | # If origin_url is "", then this is not a valid svn-repository. 82 | if origin_url != "": 83 | # Subversion analysis. 84 | self.count_repo_contributors(current_path, origin_url) 85 | self.count_repo_branches(current_path, origin_url) 86 | self.extract_last_repo_update(current_path, origin_url) 87 | 88 | yield (current_path, origin_url) 89 | else: 90 | logging.warning("[SubversionRepositoryAnalyzer]: " + current_path + " is not a valid repository...") 91 | 92 | def analyzes(self): 93 | return "svn" -------------------------------------------------------------------------------- /rosmap/repository_analyzers/online/github_repository_analyzer.py: -------------------------------------------------------------------------------- 1 | import dateutil.parser 2 | import logging 3 | from .i_scs_analyzer import ISCSRepositoryAnalyzer 4 | from rosmap.api_bindings.github_api_bindings import GithubApiBindings 5 | 6 | 7 | class GithubRepositoryAnalyzer(ISCSRepositoryAnalyzer): 8 | """ 9 | Analyzes repositories remotely on GitHub and extracts GitHub-specific information. 10 | """ 11 | def __init__(self, settings: dict): 12 | """ 13 | Creates a new instance of the GithubRepositoryAnalyzer class. 14 | :param settings: settings containing github_username, github_password, and github_api_rate_limit 15 | """ 16 | self.__api_bindings = GithubApiBindings(settings["github_username"], 17 | settings["github_password"], 18 | settings["github_api_rate_limit"]) 19 | 20 | @staticmethod 21 | def initialize_values(repo_details: dict) -> None: 22 | """ 23 | Initializes all values that might be needed in order to avoid exceptions. 24 | :param repo_details: details associated with the analyzed repository. 25 | :return: 26 | """ 27 | if "closed_pull_requests" not in repo_details: 28 | repo_details["closed_pull_requests"] = 0 29 | if "issue_durations" not in repo_details: 30 | repo_details["issue_durations"] = list() 31 | if "open_pull_requests" not in repo_details: 32 | repo_details["open_pull_requests"] = 0 33 | if "open_issues" not in repo_details: 34 | repo_details["open_issues"] = 0 35 | 36 | def count_repo_stars(self, url: str, repo_details: dict) -> None: 37 | """ 38 | Counts the stargazers for the repository. 39 | :param url: URL to the repository. 40 | :param repo_details: details of the repository associated with the URL 41 | :return: 42 | """ 43 | repo_details["stars"] = self.__api_bindings.get_stargazer_count(url) 44 | 45 | def count_closed_issues(self, url: str, repo_details: dict) -> None: 46 | """ 47 | Counts all closed issues and calculats how long they were open. Counts closed pull requests. 48 | :param url: URL to the repository. 49 | :param repo_details: details of the repository associated with the URL 50 | :return: None 51 | """ 52 | for issue in self.__api_bindings.get_issues(url, "closed"): 53 | if self.__api_bindings.is_pull_request(issue): 54 | repo_details["closed_pull_requests"] += 1 55 | else: 56 | elapsed_time = dateutil.parser.parse(issue["closed_at"]) - dateutil.parser.parse(issue["created_at"]) 57 | repo_details["issue_durations"].append(elapsed_time.total_seconds()) 58 | 59 | def count_open_issues(self, url: str, repo_details: dict) -> None: 60 | """ 61 | Counts open issues and pull requests. 62 | :param url: URL to the repository to count the open issues. 63 | :param repo_details: details of the repository associated with the URL 64 | :return: None 65 | """ 66 | for issue in self.__api_bindings.get_issues(url, "open"): 67 | if self.__api_bindings.is_pull_request(issue): 68 | repo_details["open_pull_requests"] += 1 69 | else: 70 | repo_details["open_issues"] += 1 71 | 72 | def analyze_repositories(self, repo_details: dict) -> None: 73 | # Iterate over all URLs and their associated detail dicts 74 | for url, details in repo_details.items(): 75 | # Check if it is a GitHub URL. 76 | if "github" in url: 77 | logging.info("[GithubRepositoryAnalyzer]: Fetching data from " + url) 78 | self.initialize_values(details) 79 | self.count_repo_stars(url, details) 80 | self.count_closed_issues(url, details) 81 | self.count_open_issues(url, details) 82 | 83 | def analyzes(self): 84 | return "github" 85 | -------------------------------------------------------------------------------- /rosmap/api_bindings/bitbucket_api_bindings.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import time 4 | import certifi 5 | import urllib3 6 | 7 | 8 | class BitbucketApiBindings: 9 | """ 10 | Wraps Bitbucket API functions. 11 | """ 12 | def __init__(self, rate_limit: int): 13 | self.__rate_limit = rate_limit 14 | 15 | def form_bitbucket_request(self, url: str) -> urllib3.response: 16 | """ 17 | Creates new bitbucket request and returns the response. 18 | :param url: The url to call. 19 | :return: The response resulting from the request. 20 | """ 21 | time.sleep(3600/self.__rate_limit) 22 | http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) 23 | return http.request('GET', 24 | url) 25 | 26 | def get_repo_substring(self, url, provider): 27 | """ 28 | Gets the repo-substring (i.e. url: https://bitbucket.org/osrf/gazebo -> returns: osrf/gazebo 29 | :param url: URL to get the substring from. 30 | :param provider: Part to cut off from the front. 31 | :return: the substring formatted as {|}/{repository_name} 32 | """ 33 | 34 | project_string = url.split(provider)[1] 35 | # This is okay since Mercurial does not have an extension on the back of remote urls. 36 | project_string = project_string.split(".git")[0] 37 | return project_string 38 | 39 | def get_stargazer_count(self, repo_url): 40 | """ 41 | Gets the "stargazer" count for github. Used watchers since stargazers do not exist in Bitbucket. 42 | :param repo_url: URL to the repository. 43 | :return: the amount of watchers on the repository, -1 if request failed. 44 | """ 45 | project_string = self.get_repo_substring(repo_url, "https://bitbucket.org/") 46 | response = self.form_bitbucket_request( 47 | "https://api.bitbucket.org/2.0/repositories/" + project_string + "/watchers") 48 | if response.status == 200: 49 | data = response.data 50 | decoded = json.loads(data.decode('utf-8')) 51 | return decoded["size"] 52 | return -1 53 | 54 | def get_next_url(self, result): 55 | """ 56 | Gets the URL for the next page. 57 | :param result: URL for the next page. 58 | :return: The next url, or empty string, if no next string is available. 59 | """ 60 | if "next" in result: 61 | return result["next"] 62 | else: 63 | return "" 64 | 65 | def get_issues_api_string(self, repo_url): 66 | """ 67 | Returns API url to call for issues associated with the repository. 68 | :param repo_url: Repository URL to get issues from. 69 | :return: API URL for retrieving an issue list. 70 | """ 71 | project_string = self.get_repo_substring(repo_url, "https://bitbucket.org/") 72 | return "https://api.bitbucket.org/2.0/repositories/" + project_string + "/issues" 73 | 74 | def get_pull_requests_api_string(self, repo_uri): 75 | """ 76 | Returns API URL to call for (open) pull requests associated with the repository. 77 | :param repo_uri: Repository URL to get pull requests from. 78 | :return: API URL for retrieving pull request list. 79 | """ 80 | project_string = self.get_repo_substring(repo_uri, "https://bitbucket.org/") 81 | return "https://api.bitbucket.org/2.0/repositories/" + project_string + "/pullrequests?state=OPEN" 82 | 83 | def get_values(self, api_url) -> iter: 84 | """ 85 | Gets the values field from an Bitbucket API result (used for e.g. pull requests, issues, etc..) 86 | :param api_url: API url to call. (see *_api_string) 87 | :return: Yield returns the values from the Bitbucket API. 88 | """ 89 | next_url = api_url 90 | while next_url != "": 91 | response = self.form_bitbucket_request(next_url) 92 | if response.status != 200: 93 | logging.info("[Bitbucket API Connector]: Could not reach " + next_url + ", request returned " + str(response.status)) 94 | next_url = "" 95 | else: 96 | result = json.loads(response.data.decode('utf-8')) 97 | 98 | if "values" in result: 99 | for value in result["values"]: 100 | yield value 101 | 102 | next_url = self.get_next_url(result) 103 | 104 | -------------------------------------------------------------------------------- /rosmap/api_bindings/github_api_bindings.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import time 4 | import certifi 5 | 6 | import urllib3 7 | 8 | PAGE_SIZE = 100 9 | TOPIC_SEARCH_URL = 'https://api.github.com/search/repositories?q=topic%3A' 10 | 11 | 12 | class GithubApiBindings: 13 | """ 14 | Wraps GitHub API functions. 15 | """ 16 | def __init__(self, username: str, password: str, rate_limit: float): 17 | """ 18 | Creates a new instance of the GithubApiBindings Class. 19 | :param username: Username to log into GitHub 20 | :param password: Password to log into GitHub 21 | :param rate_limit: GitHub API rate limit (requests per hour) 22 | """ 23 | self.__username = username 24 | self.__password = password 25 | self.__rate_limit = rate_limit 26 | 27 | def __form_github_request(self, url: str) -> urllib3.response: 28 | """ 29 | Forms a request for the GitHub API. 30 | :param url: Request URL 31 | :return: Response 32 | """ 33 | time.sleep(3600/self.__rate_limit) 34 | http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) 35 | return http.request('GET', 36 | url, 37 | headers=urllib3.util.make_headers(basic_auth=self.__username + ":" + self.__password, 38 | user_agent=self.__username)) 39 | 40 | def __get_page(self, topic: str, pagesize: int, page: int, ascending: bool) -> dict: 41 | """ 42 | Gets a page from GitHub Search API. 43 | :param topic: The topic to serach for. 44 | :param pagesize: Amount of results this page should yield. 45 | :param page: Page number. 46 | :param ascending: sort ascending (true) or descending (false) 47 | :return: deserialized json string as dict. 48 | """ 49 | url = TOPIC_SEARCH_URL + topic + '&per_page=' + str(pagesize) + '&page=' + str(page) 50 | if ascending: 51 | response = self.__form_github_request(url + "&sort=stars&order=asc") 52 | else: 53 | response = self.__form_github_request(url + "&sort=stars&order=dsc") 54 | if response.status == 200: 55 | return json.loads(response.data.decode('utf-8')) 56 | else: 57 | return json.loads('{"items":{}}') 58 | 59 | def get_urls_of_topic(self, topic: str) -> set: 60 | """ 61 | Fetches all repository URLs form GitHub API. 62 | :param topic: The topic to search for. 63 | :return: A set of all repository URLs. 64 | """ 65 | page = 1 66 | logging.info('[Github API Connector]: Fetching all repository-URLs with topic ' + topic + ' from Github...') 67 | 68 | # Fetch first page. 69 | data_asc = self.__get_page(topic, PAGE_SIZE, page, True) 70 | 71 | # Get number of repositories and calculate page size, since non-authenticated GitHub Search API only allows 10 72 | # requests per Minute. 73 | repository_count = data_asc["total_count"] 74 | 75 | if repository_count <= 1000: 76 | logging.info("[Github API Connector]: Number of total repositories: " + str(repository_count)) 77 | else: 78 | logging.warning("[Github API Connector]: " + 79 | "GitHub API only allows <=1k results. " + 80 | "Will try to get all items by iterating multiple times.") 81 | 82 | repositories = set() 83 | 84 | old_repository_count = -1 85 | while len(repositories) > old_repository_count: 86 | old_repository_count = len(repositories) 87 | 88 | # Fetch next page. 89 | logging.info('[Github API Connector]: Parsing GitHub clone-URLs... [' + str(len(repositories)) + '/' + str(repository_count) + ']') 90 | 91 | # Add all items on current page. 92 | for repository in self.__get_page(topic, PAGE_SIZE, page, True)["items"]: 93 | repositories.add(repository["clone_url"]) 94 | 95 | if repository_count > 1000: 96 | # Also iterate descending. 97 | # Add all items on current page. 98 | for repository in self.__get_page(topic, PAGE_SIZE, page, False)["items"]: 99 | repositories.add(repository["clone_url"]) 100 | 101 | page += 1 102 | 103 | logging.info('[Github API Connector]: Progress... [' 104 | + str(len(repositories)) 105 | + '/' + str(repository_count) + ']') 106 | 107 | # Return all repositories. 108 | return repositories 109 | 110 | def __extract_next_url_from_header(self, header: dict) -> str: 111 | """ 112 | Extracts the next page URL from a GitHub API response header. 113 | :param header: The header from a GitHub API request. 114 | :return: Empty string if no next link is available, next link if the link is available. 115 | """ 116 | try: 117 | links = header["Link"].split(",") 118 | for link in links: 119 | if 'rel="next"' in link: 120 | return link.split(">;")[0].split("<")[1] 121 | except: 122 | logging.info("[Github API Connector]: Reached end of pages for this category. Continuing to next.") 123 | return "" 124 | 125 | def __get_repo_substring(self, url, provider) -> str: 126 | """ 127 | Gets the repo-substring (i.e. url: https://github.com/ros/ros_comm.git -> returns: ros/ros_comm 128 | :param url: URL to get the substring from. 129 | :param provider: Part to cut off from the front. 130 | :return: the substring formatted as {|}/{repository_name} 131 | """ 132 | project_string = url.split(provider)[1] 133 | project_string = project_string.split(".git")[0] 134 | return project_string 135 | 136 | def get_issues(self, url: str, issue_state: str) -> iter: 137 | """ 138 | Yield returns all issues with the specified state. 139 | :param url: The url to the repository to get issues from. 140 | :param issue_state: The issue state (most commonly OPEN or CLOSED) 141 | :return: Yield returns all issues with the specified state. 142 | """ 143 | 144 | project_string = self.__get_repo_substring(url, "https://github.com/") 145 | next_uri = "https://api.github.com/repos/" + project_string + "/issues?state=" + issue_state 146 | while next_uri != "": 147 | response = self.__form_github_request(next_uri) 148 | if response.status != 200: 149 | next_uri = "" 150 | logging.warning("[Github API Connector]: Response returned " + str(response.status)) 151 | else: 152 | data = response.data 153 | issues = json.loads(data.decode(response)) 154 | for issue in issues: 155 | yield issue 156 | next_uri = self.__extract_next_url_from_header(response.headers) 157 | 158 | def get_stargazer_count(self, url: str) -> int: 159 | """ 160 | Returns the stargazer count for a repository. 161 | :param url: URL to the repository. 162 | :return: stargazer count for the repository. (-1 if request failed.) 163 | """ 164 | project_string = self.__get_repo_substring(url, "https://github.com/") 165 | response = self.__form_github_request("https://api.github.com/repos/" + project_string) 166 | if response.status == 200: 167 | data = response.data 168 | return json.loads(data.decode('utf-8'))["stargazers_count"] 169 | return -1 170 | 171 | def is_pull_request(self, issue: dict): 172 | """ 173 | returns whether issue is a pull request or not (github treats pull requests as issues) 174 | :param issue: the issue to check. 175 | :return: true if it is a pull request, false if it is not. 176 | """ 177 | return "pull_request" in issue 178 | -------------------------------------------------------------------------------- /rosmap/analyze.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import os 5 | import logging 6 | from rosmap.loaders.module_loader import ModuleLoader 7 | from shutil import copy 8 | 9 | PROGRAM_DESCRIPTION = "" 10 | 11 | 12 | def load_parsers(settings: dict) -> list: 13 | return ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 14 | "repository_parsers", 15 | ["IRepositoryParser"], 16 | "RepositoryParser", 17 | settings) 18 | 19 | 20 | def load_cloners(settings: dict) -> dict: 21 | cloners = dict() 22 | for cloner in ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 23 | "repository_cloners", 24 | ["IRepositoryCloner"], 25 | "RepositoryCloner", 26 | settings): 27 | cloners[cloner.clones()] = cloner 28 | return cloners 29 | 30 | 31 | def load_package_analyzers(settings: dict) -> list: 32 | return ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 33 | "package_analyzers", 34 | ["PackageAnalyzer"], 35 | "Analyzer", 36 | settings) 37 | 38 | 39 | def load_file_analyzers() -> list: 40 | return ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 41 | "file_analyzers", 42 | ["IFileAnalyzer"], 43 | "FileAnalyzer") 44 | 45 | 46 | def load_analyzers(settings: dict) -> dict: 47 | analyzers = dict() 48 | for analyzer in ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 49 | "rosmap/repository_analyzers/offline", 50 | ["IRepositoryAnalyzer", "AbstractRepositoryAnalyzer"], 51 | "RepositoryAnalyzer", 52 | load_package_analyzers(settings), 53 | load_file_analyzers()): 54 | analyzers[analyzer.analyzes()] = analyzer 55 | return analyzers 56 | 57 | 58 | def load_remote_analyzers(settings: dict) -> dict: 59 | remote_analyzers = dict() 60 | for analyzer in ModuleLoader.load_modules(os.path.dirname(os.path.realpath(__file__)), 61 | "rosmap/repository_analyzers/online", 62 | ["ISCSRepositoryAnalyzer"], 63 | "RepositoryAnalyzer", 64 | settings): 65 | remote_analyzers[analyzer.analyzes()] = analyzer 66 | return remote_analyzers 67 | 68 | 69 | def write_to_file(path, repo_details): 70 | output_file = open(path, "w") 71 | output_file.write(json.dumps(list(repo_details.values()))) 72 | output_file.close() 73 | 74 | 75 | def main(): 76 | # Create argument-parser 77 | parser = argparse.ArgumentParser(description=PROGRAM_DESCRIPTION) 78 | parser.add_argument("--config", "-c", help="Add a path to the config.json file that contains, usernames, api-tokens and settings.", default=os.path.dirname(os.path.realpath(__file__)) + "/config/config.json") 79 | parser.add_argument("--load_existing", "-l", help="Use this flag to load previous link-files from workspace.", default=False, action="store_true") 80 | parser.add_argument("--skip_download", "-d", help="Use this flag to skip downloading of repositories to your workspace.", default=False, action="store_true") 81 | parser.add_argument("--output", "-o", help="Add a path to the output file for the analysis. If this path is not defined, analysis will not be performed. ", default="") 82 | parser.add_argument("--generate_config", help="Generates a config file on the given path.") 83 | 84 | # Parse arguments 85 | arguments = parser.parse_args() 86 | 87 | # Set up logger 88 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) 89 | logging.getLogger("urllib3").setLevel(logging.WARNING) 90 | logging.getLogger("requests").setLevel(logging.WARNING) 91 | 92 | if arguments.generate_config is not None: 93 | copy(os.path.dirname(os.path.realpath(__file__)) + "/config/config.json", arguments.generate_config) 94 | return 0 95 | 96 | # Warn user that output has to be set to analyze: 97 | if arguments.output == "": 98 | logging.warning("parameter --output has not been defined, analysis will be skipped, add --output to perform analysis.") 99 | 100 | # Load settings. 101 | configfile = open(arguments.config, "r") 102 | settings = json.loads(configfile.read()) 103 | 104 | # Expand home directories. 105 | settings["analysis_workspace"] = os.path.expanduser(settings["analysis_workspace"]) 106 | settings["rosdistro_workspace"] = os.path.expanduser(settings["rosdistro_workspace"]) 107 | 108 | # Initialize dictionaries. 109 | repositories = dict() 110 | for vcs in settings["version_control_systems"]: 111 | repositories[vcs] = set() 112 | 113 | if not arguments.load_existing: 114 | # Parse repositories 115 | logging.info("[Parser]: Parsing repositories...") 116 | parsers = load_parsers(settings) 117 | for parser in parsers: 118 | parser.parse_repositories(repositories) 119 | 120 | # Create folder 121 | if not os.path.exists(settings["analysis_workspace"]): 122 | os.makedirs(settings["analysis_workspace"]+"links/") 123 | 124 | # Write to file. 125 | logging.info("[Parser]: Writing repository links to file...") 126 | for vcs, repository_set in repositories.items(): 127 | logging.info("[Parser]: Writing file for " + vcs) 128 | with open(settings["analysis_workspace"]+"links/" + vcs, "w+") as output_file: 129 | for repository in repository_set: 130 | output_file.write(repository + "\n") 131 | else: 132 | for vcs in settings["version_control_systems"]: 133 | with open(settings["analysis_workspace"]+"links/" + vcs, "r") as output_file: 134 | for line in output_file: 135 | repositories[vcs].add(line.rstrip("\r\n")) 136 | 137 | if not arguments.skip_download: 138 | cloners = load_cloners(settings) 139 | 140 | logging.info("[Cloner]: Cloning repositories...") 141 | 142 | for vcs in settings["version_control_systems"]: 143 | if vcs in cloners: 144 | cloners[vcs].clone_repositories(repositories[vcs]) 145 | else: 146 | logging.warning("[Cloner]: Cannot clone repositories of type " + vcs + ": No cloner found for this type...") 147 | 148 | if not arguments.output == "": 149 | analyzers = load_analyzers(settings) 150 | repo_details = dict() 151 | for vcs in settings["version_control_systems"]: 152 | if vcs in analyzers: 153 | analyzers[vcs].analyze_repositories(settings["analysis_workspace"] + settings["repository_folder"] + vcs, 154 | repo_details) 155 | else: 156 | logging.warning("Cannot analyze repositories of type " + vcs + ": No analyzer found for this type...") 157 | 158 | write_to_file(arguments.output, repo_details) 159 | 160 | remote_analyzers = load_remote_analyzers(settings) 161 | for scs in settings["social_coding_sites"]: 162 | if scs in remote_analyzers: 163 | remote_analyzers[scs].analyze_repositories(repo_details) 164 | else: 165 | logging.warning("Cannot analyze scs of type " + scs + ": No analyzer found for this type...") 166 | 167 | write_to_file(arguments.output, repo_details) 168 | 169 | logging.info("Actions finished. Exiting.") 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rosmap 2 | 3 | ## What is this repository for? 4 | 5 | This repository contains the implementation of the analysis-tool used in our paper **"Can i depend on you? Mapping the dependency and quality landscape or ROS packages"**, published in the proceedings of the IRC 2019 conference. If you want to cite this repository, please cite the original paper instead: 6 | 7 | ``` 8 | @inproceedings{pichler_can_2019, 9 | title = {Can i depend on you? {Mapping} the dependency and quality landscape or {ROS} packages}, 10 | booktitle = {Proceedings of the 3rd {International} {Conference} on {Robotic} {Computing}}, 11 | publisher = {IEEE}, 12 | author = {Pichler, Marc and Dieber, Bernhard and Pinzger, Martin}, 13 | month = feb, 14 | year = {2019} 15 | } 16 | ``` 17 | 18 | The application contained in this repository provides the means to: 19 | - acquire a set of links to git, mercurial and subversion repositories that are suspected to contain ROS-Packages. 20 | - acquire local copies of all of these repositories for later analysis. 21 | - analyze these repositories by extracting the names of the contained ROS-Packages, their dependencies, as well as amount of Github-Stars or Bitbucket-Watchers, counting how many branches and contributors there are, and more information on the popularity of the project. 22 | 23 | ## How do I use it? 24 | 25 | ### Step 1: Setup 26 | 27 | - Install the rosmap package from PyPI using `sudo pip3 install rosmap` 28 | - Install the version control systems needed for cloning and analyzing the repositories via `sudo apt-get install git subversion mercurial` 29 | - **Note: this tool only supports (and has only been tested on) Ubuntu 16.04 and 19.04;** 30 | 31 | **OR (from source)** 32 | 33 | - Install the prerequisites on your system. 34 | - On Ubuntu 16.04 run (`sudo apt-get install python3.5 python-pip git subversion mercurial`) 35 | - **Note: this tool only supports (and has only been tested on) Ubuntu 16.04 and 19.04;** if you want to tinker with it on other systems see a [list of prerequisites](#system-prerequisites). 36 | - Clone this repository. 37 | - `cd` into the cloned repository. 38 | - Install requirements.txt (`pip install -r requirements.txt`) 39 | 40 | 41 | 42 | ### Step 2: Configuration 43 | 44 | Since this application will use GitHub's API to extract data from it, it is advised to add a GitHub username as well as an API-Token to its config-file. You will find a sample config file at `./config/config.json` inside repository. Much of it is already preconfigured. 45 | 46 | 47 | ``` 48 | { 49 | "github_username": "USERNAME_HERE", 50 | "github_password": "API_TOKEN_HERE", 51 | "github_search_topic": "ros", 52 | "github_search_rate_limit": 1800, 53 | "github_api_rate_limit": 5000, 54 | . 55 | . 56 | . 57 | } 58 | ``` 59 | 60 | Simply replace `USERNAME_HERE` with your account's username, and the `API_TOKEN_HERE` with your API-Token. Alternatively you can also use your password if two-factor authentication is not enabled for your account (not recommended). 61 | 62 | The rate-limits are already preconfigured to the standard rate limits of GitHub's API for authenticated users (5000 requests/hour for the v3 API, configured in `github_api_rate_limit` and 1800 requests/hour for the Search-API, configured in `github_search_rate_limit`). It is also possible to omit authentication, however, the rate-limit will need to be reduced to 60 requests/hour (or 600 requests/hour for search), making analysis of a large amount of repositories practically unfeasible. 63 | ``` 64 | { 65 | ..., 66 | "rosdistro_url": "https://github.com/ros/rosdistro", 67 | "rosdistro_workspace": "~/.rosdistro_workspace/", 68 | ... 69 | } 70 | ``` 71 | 72 | `rosdistro_url` will already be set to the URL of the rosdistro repository. `rosdistro_workspace` will be the folder to clone the rosdistro repository into, by default it is at `~/.rosdistro_workspace`. 73 | 74 | ``` 75 | { 76 | ..., 77 | "bitbucket_repo_page": "https://bitbucket.org/repo/all/", 78 | "bitbucket_repo_search_string": "ros", 79 | "bitbucket_api_rate_limit": 1000, 80 | ... 81 | } 82 | ``` 83 | Since Bitbucket does not provide a search API, the code provided in this repository uses their web-interface and extracts the information from the results-pages. The link to this page is defined in `bitbucket_repo_page`. 84 | 85 | The search term is set to `ros` by default, and can be changed to any other search term by changing the value of `bitbucket_repo_search_string`. 86 | 87 | Bitbucket does not require users to be logged in to use their API, however their rate limit is 1000 requests/hour, which is already preconfigured. 88 | 89 | ``` 90 | { 91 | ..., 92 | "version_control_systems": ["hg", "git", "svn"], 93 | "analysis_workspace" : "~/.analysis_workspace/", 94 | "repository_folder": "repositories/", 95 | "social_coding_sites": ["bitbucket", "github"], 96 | "package_xml_dependency_tags": ["build_depend", 97 | "run_depend", 98 | "depend", 99 | "buildtool_depend", 100 | "build_export_depend", 101 | "exec_depend", 102 | "test_depend", 103 | "doc_depend"], 104 | "manifest_xml_dependency_tags": ["depend"] 105 | } 106 | ``` 107 | 108 | `version_control_systems` provides the possibility to add or remove all repositories of a type form analysis. The available types are `git`, `svn` (Subversion), and `hg` (Mercurial). 109 | 110 | `analysis_workspace` will be the directory in which the list of repositories is saved, as well as the place where all repositories will be cloned to. `repository_folder` is the subfolder in `analysis_workspace` that will be used to clone the different repsitories to. 111 | 112 | `social_coding_sites` is a list of social coding sites that can be analyzed, `github` and `bitbucket` are currently implemented. 113 | 114 | `package_xml_dependency_tags` is the list of tags that are considered a dependency in a `package.xml` file. By default, we scan for every dependency tag that exists, but the list can be modified at will, the content of the tags will show up in the output file as package dependencies. 115 | 116 | `manifest_xml_dependency tags` serves the same purpose as `package_xml_dependency_tags`, but for the legacy rosbuild `manifest.xml` files. 117 | 118 | ### Step 3: Running the program 119 | 120 | You can either run a [full analysis](#step-3a-full-analysis), [skip certain steps](#step-3b-partial-analysis), or run an [analysis just for your repositories](#). 121 | 122 | #### Step 3.a Full Analysis 123 | 124 | To run a full analysis, either move your config file to the config folder and replace the default `config.json`, or provide it as a parameter to the program. 125 | 126 | To perform the full analysis run `./analyze.py --config /path/to/your/config.file --output /path/to/output.file`. 127 | If you modified or replaced the `./config/config.json`, it will load it automatically, you do not need to provide the `--config` parameter, you can simply run `./analyze.py --output /path/to/output.file` (**NOTE: if the `--output` parameter is not provided, only the parse and download steps will be performed, for running analysis only or skipping steps see [Step 3.b](#step-3b-partial-analysis)**). 128 | 129 | A full analysis will include: 130 | - **gathering repository URLs** from github- and bitbucket-searches as well as the official ROS Index found in the rosdistro-repository. The URLs will be written to your `analysis_workspace`, in the subfolder `links/`. For each type, there will be one file named accordingly. Re-running the analysis will parse all URLs again. 131 | - **cloning ALL repositories** found while gathering URLs to your machine. **(NOTE: This operation requires a significant amount of disk space, our analysis resulted in well over 70GB worth of repositories, make sure you have the space for it in advance.)** 132 | - **Analyze all repositories** for contained packages, their dependencies, cpplint-issues, github stars (bitbucket watchers), branch count, issue count and duration, last updated time, and contributors. 133 | 134 | #### Step 3.b: Partial analysis 135 | 136 | You can also skip steps by using these in alone or in combination: 137 | - `--load_existing`, which will load previously existing repository URLs from the file 138 | - `--skip_download`, which will skip the cloning process. 139 | - by omission of `--output ./path/to/output.file`, which will skip the analysis step. 140 | 141 | #### Step 3.c: Analyze just your local repositories 142 | 143 | You can skip the downloading of files, and just use your local repositories for analysis. To do this, you can run `.analysis.py --skip_download --load_existing --output /path/to/your/output.file` after moving your repositories to the repositories folder specified in your configuration file (`/`). 144 | 145 | ### Step 4: Inspect output 146 | 147 | After running the analysis, the output will be written to your defined output file. The results will follow this JSON-Schema, meaning of the different parts is given in the "title" fields: 148 | 149 | ```json 150 | { 151 | "type":"array", 152 | "title":"Array that contains all repository-objects.", 153 | "items":{ 154 | "type":"object", 155 | "title":"The repository class.", 156 | "required":[ 157 | "url", 158 | "continuous_integration", 159 | "rosinstall", 160 | "contributors", 161 | "branch_count", 162 | "changelog", 163 | "packages", 164 | "cpplint_errors", 165 | "last_update", 166 | "readme" 167 | ], 168 | "properties":{ 169 | "open_issues":{ 170 | "type":"integer", 171 | "title":"Number of open issues." 172 | }, 173 | "url":{ 174 | "type":"string", 175 | "title":"The origin-url of the repository" 176 | }, 177 | "continuous_integration":{ 178 | "type":"boolean", 179 | "title":"Is there a file present that suggests continuous integration is set up?" 180 | }, 181 | "rosinstall":{ 182 | "type":"boolean", 183 | "title":"Is there a rosinstall file present?" 184 | }, 185 | "open_pull_requests":{ 186 | "type":"integer", 187 | "title":"Number of open pull-requests" 188 | }, 189 | "contributors":{ 190 | "type":"integer", 191 | "title":"Number of contributors" 192 | }, 193 | "branch_count":{ 194 | "type":"integer", 195 | "title":"Number of branches" 196 | }, 197 | "changelog":{ 198 | "type":"boolean", 199 | "title":"Is there a CHANGELOG file present?" 200 | }, 201 | "issue_durations":{ 202 | "type":"array", 203 | "title":"Issue durations in seconds.", 204 | "items":{ 205 | "type":"number" 206 | } 207 | }, 208 | "packages":{ 209 | "type":"array", 210 | "title":"Packages contained in this repository.", 211 | "items":{ 212 | "type":"object", 213 | "title":"The package-class.", 214 | "required":[ 215 | "name", 216 | "dependencies" 217 | ], 218 | "properties":{ 219 | "name":{ 220 | "type":"string", 221 | "title":"The package's name" 222 | }, 223 | "dependencies":{ 224 | "type":"array", 225 | "title":"The package's dependencies (package names of dependencies)", 226 | "items":{ 227 | "type":"string" 228 | } 229 | } 230 | } 231 | } 232 | }, 233 | "closed_pull_requests":{ 234 | "type":"integer", 235 | "title":"Number of closed pull-requests" 236 | }, 237 | "stars":{ 238 | "type":"integer", 239 | "title":"Number of github-stars/bitbucket-watchers" 240 | }, 241 | "cpplint_errors":{ 242 | "type":"integer", 243 | "title":"Number of cpplint errors." 244 | }, 245 | "last_update":{ 246 | "type":"number", 247 | "title":"Time of last repository commit as UNIX-Timestamp" 248 | }, 249 | "readme":{ 250 | "type":"boolean", 251 | "title":"Is there a readme-file present?" 252 | } 253 | } 254 | } 255 | } 256 | ``` 257 | 258 | ## System Prerequisites 259 | - Python 3.5 260 | - pip 261 | - git 262 | - subversion 263 | - mercurial 264 | 265 | ## Python Prerequisites 266 | - GitPython (2.1.8) 267 | - pyyaml (4.2b1) 268 | - pyquery (1.4.0) 269 | - urllib3 270 | - python-hglib (2.6.1) 271 | - svn (0.3.46) 272 | - python-dateutil (2.7.5) 273 | - cpplint 274 | --------------------------------------------------------------------------------