├── .dockerignore ├── src ├── test_data │ ├── lei-test.csv │ └── rr-test.csv ├── app_test.py ├── algorithms │ ├── graph_cocacola_test.py │ ├── graph_a2_test.py │ ├── graph_builder.py │ ├── graph_samsung_test.py │ ├── graph_builder_test.py │ ├── graph.py │ └── graph_test.py └── app.py ├── .gitignore ├── test.sh ├── Dockerfile ├── docker-compose.yml ├── LICENSE ├── data ├── download.sh └── download_mac.sh ├── requirements.txt └── README.md /.dockerignore: -------------------------------------------------------------------------------- 1 | ./data -------------------------------------------------------------------------------- /src/test_data/lei-test.csv: -------------------------------------------------------------------------------- 1 | LEI,Entity.LegalName 2 | LEI_1,company1 3 | LEI_2,company2 4 | LEI_3,company3 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.zip 3 | \.pytest_cache/ 4 | data/gleif_rr\.csv 5 | data/gleif_lei\.csv 6 | __pycache__/ 7 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | echo "testing inside docker" 2 | docker run --rm -it \ 3 | -v "$(pwd)/data":/data \ 4 | --entrypoint "pytest" \ 5 | gleif-backend:latest --rootdir=/ 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tiangolo/uvicorn-gunicorn-fastapi:python3.6 2 | 3 | ADD requirements.txt /requirements.txt 4 | 5 | RUN pip install -r /requirements.txt 6 | 7 | ADD src /src 8 | 9 | WORKDIR /src 10 | 11 | CMD ["uvicorn", "--host", "0.0.0.0", "app:api"] -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | backend: 4 | image: gleif-backend:latest 5 | build: . 6 | restart: unless-stopped 7 | networks: 8 | - gleif 9 | volumes: 10 | - ./data:/data:ro 11 | ports: 12 | - "8000:8000" 13 | networks: 14 | gleif: -------------------------------------------------------------------------------- /src/app_test.py: -------------------------------------------------------------------------------- 1 | from starlette.testclient import TestClient 2 | # from app import api 3 | 4 | # client = TestClient(api) 5 | 6 | # def test_read_main(): 7 | # response = client.get("/") 8 | # assert response.status_code == 200 9 | # assert response.json() == {"msg": "Hello World"} 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GLEIF Level-2 Server 2 | Written in 2019 by CorrelAid & Global Legal Entity Identifier Foundation (GLEIF) - correlaid.org, gleif.org 3 | 4 | To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. 5 | You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see . 6 | -------------------------------------------------------------------------------- /data/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -u 4 | set -e 5 | 6 | wget https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211553/20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip 7 | wget https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211598/20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip 8 | unzip 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip 9 | unzip 20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip 10 | 11 | cat 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv | cut -d',' -f1,2 | sed 's/"//g' > gleif_lei.csv 12 | mv 20190719-0000-gleif-goldencopy-rr-golden-copy.csv gleif_rr.csv 13 | 14 | rm *.zip 15 | -------------------------------------------------------------------------------- /data/download_mac.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -u 4 | set -e 5 | 6 | curl -O https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211553/20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip 7 | curl -O https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211598/20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip 8 | unzip 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip 9 | unzip 20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip 10 | 11 | cat 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv | cut -d',' -f1,2 | sed 's/"//g' > gleif_lei.csv 12 | mv 20190719-0000-gleif-goldencopy-rr-golden-copy.csv gleif_rr.csv 13 | 14 | rm *.zip 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.4.0 2 | aniso8601==6.0.0 3 | certifi==2019.6.16 4 | chardet==3.0.4 5 | Click==7.0 6 | decorator==4.4.0 7 | dnspython==1.16.0 8 | email-validator==1.0.4 9 | fastapi==0.33.0 10 | graphene==2.1.7 11 | graphql-core==2.2 12 | graphql-relay==2.0.0 13 | h11==0.8.1 14 | httptools==0.0.13 15 | idna==2.8 16 | itsdangerous==1.1.0 17 | Jinja2==2.10.1 18 | MarkupSafe==1.1.1 19 | numpy==1.16.4 20 | pandas==0.24.2 21 | promise==2.2.1 22 | pydantic==0.30 23 | python-dateutil==2.8.0 24 | python-multipart==0.0.5 25 | pytz==2019.1 26 | PyYAML==5.1.1 27 | requests==2.22.0 28 | Rx==3.0.0 29 | six==1.12.0 30 | starlette==0.12.0 31 | ujson==1.35 32 | urllib3==1.25.3 33 | uvicorn==0.8.4 34 | uvloop==0.12.2 35 | websockets==7.0 36 | networkx==2.3 37 | pytest==5.0.1 38 | pytest-watch==4.2.0 39 | -------------------------------------------------------------------------------- /src/algorithms/graph_cocacola_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder 4 | from graph import Graph 5 | 6 | 7 | @pytest.fixture(scope="class") 8 | def setup(request): 9 | rr_csv = os.path.join(request.config.rootdir, "data", "gleif_rr.csv") 10 | lookup_csv = os.path.join(request.config.rootdir, "data", "gleif_lei.csv") 11 | lei = "UWJKFUJFZ02DKWI3RY53" 12 | builder = DirectNodeGraphWithParentNetworkBuilder() 13 | 14 | glei_network = Graph.from_csv(f=rr_csv, limit=None) 15 | Graph.set_lookup_table(f=lookup_csv) 16 | 17 | parent_graph, _ = builder.build(glei_network, lei) 18 | 19 | structure = parent_graph.set_levels(lei).to_array() 20 | return structure, lei 21 | 22 | 23 | def test_nodes_edges_more_than_0(setup): 24 | structure, _ = setup 25 | 26 | assert len(structure["nodes"]) > 0 27 | assert len(structure["edges"]) > 0 28 | 29 | 30 | def test_lei_in_nodes(setup): 31 | structure, lei = setup 32 | cocacolacompany_node = [n for n in structure["nodes"] if n["id"] == lei][0] 33 | 34 | assert cocacolacompany_node["level"] == 0 35 | 36 | 37 | def test_direct_children(setup): 38 | structure, _ = setup 39 | # direct children 40 | direct_children = [n for n in structure["nodes"] if n["level"] == 1] 41 | assert len(direct_children) == 8 42 | 43 | -------------------------------------------------------------------------------- /src/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fastapi import FastAPI 3 | from starlette.middleware.cors import CORSMiddleware 4 | 5 | from algorithms.graph import Graph 6 | from algorithms.graph_builder import DirectNodeGraphWithParentNetworkBuilder as Builder 7 | 8 | origins = ["*"] 9 | 10 | api = FastAPI() 11 | api.add_middleware( 12 | CORSMiddleware, allow_origins=origins, allow_methods=["*"], allow_headers=["*"] 13 | ) 14 | ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") 15 | DATA_PATH = os.path.join(ROOT_DIR, "data") 16 | 17 | relationship_data_path = os.path.join(DATA_PATH, "gleif_rr.csv") 18 | lei_lookup_data_path = os.path.join(DATA_PATH, "gleif_lei.csv") 19 | 20 | glei_network = Graph.from_csv(f=relationship_data_path, limit=None) 21 | Graph.set_lookup_table(f=lei_lookup_data_path) 22 | 23 | 24 | @api.get("/company/{node_id}/structure") 25 | def get_company_structure(node_id: str): 26 | """ 27 | This endpoint returns the complete holding structure based on a single node id. 28 | :param node_id: 29 | :return: 30 | """ 31 | builder = Builder() 32 | parent_graph, parent_node = builder.build(glei_network, node_id) 33 | 34 | if parent_node is None: 35 | # no ultimate parent 36 | return parent_graph.set_levels(node_id).to_array() 37 | else: 38 | return parent_graph.set_levels(parent_node).to_array() 39 | -------------------------------------------------------------------------------- /src/algorithms/graph_a2_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder 4 | from graph import Graph 5 | 6 | 7 | @pytest.fixture(scope="class") 8 | def setup(request): 9 | rr_csv = os.path.join(request.config.rootdir, "data", "gleif_rr.csv") 10 | lookup_csv = os.path.join(request.config.rootdir, "data", "gleif_lei.csv") 11 | lei = "969500WU8KVE8U3TL824" 12 | builder = DirectNodeGraphWithParentNetworkBuilder() 13 | 14 | glei_network = Graph.from_csv(f=rr_csv, limit=None) 15 | Graph.set_lookup_table(f=lookup_csv) 16 | 17 | parent_graph, ultimate_parent = builder.build(glei_network, lei) 18 | 19 | if ultimate_parent: 20 | structure = parent_graph.set_levels(ultimate_parent).to_array() 21 | else: 22 | structure = parent_graph.set_levels(lei).to_array() 23 | 24 | return structure, lei 25 | 26 | 27 | def test_structure_exists(setup): 28 | 29 | structure, _ = setup 30 | 31 | assert len(structure["nodes"]) > 0 32 | # this LEI does not have children nor a parent 33 | assert len(structure["edges"]) == 0 34 | 35 | 36 | def test_a2_node_is_level_0(setup): 37 | # this LEI does not have an ultimate parent 38 | 39 | structure, lei = setup 40 | 41 | a2_node = [n for n in structure["nodes"] if n["id"] == lei][0] 42 | 43 | assert a2_node["level"] == 0 44 | 45 | -------------------------------------------------------------------------------- /src/algorithms/graph_builder.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Union 2 | 3 | from algorithms.graph import RR, Graph 4 | 5 | 6 | class DirectNodeGraphWithParentNetworkBuilder: 7 | def __init__(self): 8 | pass 9 | 10 | def build(self, g: Graph, node: str) -> Tuple[Graph, str]: 11 | """ 12 | For given node: 13 | - build the "direct" graph of node 14 | - merge with "direct" graph of ultimate parent, if exists 15 | 16 | The result might be a network with disjunct graphs. 17 | 18 | 19 | "Direct" graph is the graph that connects nodes only via direct parent relationships (in all directions) 20 | """ 21 | g = g.deepcopy() 22 | 23 | parent_graph, parent_node = self.ultimate_parent_direct_graph(g, node) 24 | node_graph = self.node_direct_graph(g, node) 25 | 26 | return parent_graph.merge(node_graph), parent_node 27 | 28 | def node_direct_graph(self, g: Graph, node: str) -> Graph: 29 | g = g.deepcopy() 30 | return g.remove_edge_type(RR.ULTIMATE).sub(node) 31 | 32 | def ultimate_parent_direct_graph(self, g: Graph, node: str) -> Tuple[Graph, Union[str, None]]: 33 | """for given node and its full graph, get the sub graph of the ultimate parent 34 | 35 | Arguments: 36 | g {Graph} -- graph of node 37 | node {str} -- lei of node 38 | 39 | Returns: 40 | [tuple] -- sub graph of ultimate parent and its lei 41 | """ 42 | g = g.deepcopy() 43 | parent = g.get_ultimate_parent(node) 44 | 45 | # if there is no ultimate parent, we return an empty graph 46 | if parent is None: 47 | return Graph([]), parent 48 | 49 | # first remove ultimate edge 50 | g_without_ultimate_edge = g.remove_edge_type(RR.ULTIMATE) 51 | 52 | # get graph for parent 53 | parent_sub = g_without_ultimate_edge.sub(parent) 54 | 55 | # then subgraph for parent 56 | return parent_sub, parent 57 | -------------------------------------------------------------------------------- /src/algorithms/graph_samsung_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder 4 | from graph import Graph 5 | 6 | 7 | @pytest.fixture 8 | def builder(): 9 | return DirectNodeGraphWithParentNetworkBuilder() 10 | 11 | 12 | @pytest.fixture 13 | def rr_csv(request): 14 | return os.path.join(request.config.rootdir, "data", "gleif_rr.csv") 15 | 16 | 17 | @pytest.fixture 18 | def lookup_csv(request): 19 | return os.path.join(request.config.rootdir, "data", "gleif_lei.csv") 20 | 21 | 22 | def test_samsung_ultimate_parent(builder, lookup_csv, rr_csv): 23 | samsung_lei = "549300KYVNLA5XR0HT53" 24 | ultimate_parent_lei = "9884007ER46L6N7EI764" 25 | 26 | glei_network = Graph.from_csv(f=rr_csv, limit=None) 27 | Graph.set_lookup_table(f=lookup_csv) 28 | 29 | parent_graph, _ = builder.build(glei_network, samsung_lei) 30 | structure = parent_graph.set_levels(ultimate_parent_lei).to_array() 31 | 32 | samsung_node = [n for n in structure["nodes"] if n["id"] == samsung_lei][0] 33 | ultimate_parent_node = [ 34 | n for n in structure["nodes"] if n["id"] == ultimate_parent_lei 35 | ][0] 36 | 37 | # nodes between ultimate parent and samsung 38 | intermediate_nodes = [n for n in structure["nodes"] if n["level"] == 1] 39 | 40 | assert ultimate_parent_node["level"] == 0 41 | assert len(intermediate_nodes) > 0 42 | assert samsung_node["level"] == 2 43 | 44 | # assert that the correct edges exist 45 | # samsung gmbh -> samsung holding gmbh 46 | assert ( 47 | find_edge(structure["edges"], samsung_lei, "549300CWESV5NI78YL42") is not None 48 | ) 49 | # samsung holding gmbh -> korean samsung / ultimate parent 50 | assert ( 51 | find_edge(structure["edges"], "549300CWESV5NI78YL42", ultimate_parent_lei) 52 | is not None 53 | ) 54 | 55 | 56 | def find_edge(edges, from_lei, to_lei): 57 | for e in edges: 58 | if e["from"] == from_lei and e["to"] == to_lei: 59 | return e 60 | return None 61 | 62 | -------------------------------------------------------------------------------- /src/test_data/rr-test.csv: -------------------------------------------------------------------------------- 1 | Relationship.StartNode.NodeID,Relationship.StartNode.NodeIDType,Relationship.EndNode.NodeID,Relationship.EndNode.NodeIDType,Relationship.RelationshipType,Relationship.RelationshipStatus,Registration.InitialRegistrationDate,Registration.LastUpdateDate,Registration.RegistrationStatus,Registration.NextRenewalDate,Registration.ManagingLOU,Registration.ValidationSources,Registration.ValidationDocuments,Registration.ValidationReference,Relationship.Period.1.startDate,Relationship.Period.1.endDate,Relationship.Period.1.periodType,Relationship.Period.2.startDate,Relationship.Period.2.endDate,Relationship.Period.2.periodType,Relationship.Period.3.startDate,Relationship.Period.3.endDate,Relationship.Period.3.periodType,Relationship.Period.4.startDate,Relationship.Period.4.endDate,Relationship.Period.4.periodType,Relationship.Period.5.startDate,Relationship.Period.5.endDate,Relationship.Period.5.periodType,Relationship.Qualifiers.1.QualifierDimension,Relationship.Qualifiers.1.QualifierCategory,Relationship.Qualifiers.2.QualifierDimension,Relationship.Qualifiers.2.QualifierCategory,Relationship.Qualifiers.3.QualifierDimension,Relationship.Qualifiers.3.QualifierCategory,Relationship.Qualifiers.4.QualifierDimension,Relationship.Qualifiers.4.QualifierCategory,Relationship.Qualifiers.5.QualifierDimension,Relationship.Qualifiers.5.QualifierCategory,Relationship.Qualifiers.1.MeasurementMethod,Relationship.Qualifiers.1.QuantifierAmount,Relationship.Qualifiers.1.QuantifierUnits,Relationship.Qualifiers.2.MeasurementMethod,Relationship.Qualifiers.2.QuantifierAmount,Relationship.Qualifiers.2.QuantifierUnits,Relationship.Qualifiers.3.MeasurementMethod,Relationship.Qualifiers.3.QuantifierAmount,Relationship.Qualifiers.3.QuantifierUnits,Relationship.Qualifiers.4.MeasurementMethod,Relationship.Qualifiers.4.QuantifierAmount,Relationship.Qualifiers.4.QuantifierUnits,Relationship.Qualifiers.5.MeasurementMethod,Relationship.Qualifiers.5.QuantifierAmount,Relationship.Qualifiers.5.QuantifierUnits 2 | LEI_1,LEI,DIRECT_PARENT_LEI,LEI,IS_DIRECTLY_CONSOLIDATED_BY,ACTIVE,2012-11-29T16:33:00.000Z,2019-06-18T14:32:00.000Z,PUBLISHED,2020-06-14T10:17:00.000Z,EVK05KS7XY1DEII3R011,ENTITY_SUPPLIED_ONLY,SUPPORTING_DOCUMENTS,,2017-01-01T00:00:00.000Z,2017-12-31T00:00:00.000Z,ACCOUNTING_PERIOD,2018-06-15T00:00:00.000Z,,RELATIONSHIP_PERIOD,,,,,,,,,,ACCOUNTING_STANDARD,,,,,,,,,,,,,,,,,,,,,,,, 3 | LEI_1,LEI,ULTIMATE_PARENT_LEI,LEI,IS_ULTIMATELY_CONSOLIDATED_BY,ACTIVE,2012-11-29T16:33:00.000Z,2019-06-18T14:32:00.000Z,PUBLISHED,2020-06-14T10:17:00.000Z,EVK05KS7XY1DEII3R011,ENTITY_SUPPLIED_ONLY,SUPPORTING_DOCUMENTS,,2017-01-01T00:00:00.000Z,2017-12-31T00:00:00.000Z,ACCOUNTING_PERIOD,2018-06-15T00:00:00.000Z,,RELATIONSHIP_PERIOD,,,,,,,,,,ACCOUNTING_STANDARD,,,,,,,,,,,,,,,,,,,,,,,, -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GLEIF Level-2 Visualization (Server) 2 | 3 | The code was written during a hackathon between [GLEIF](https://www.gleif.org/) and [CorrelAid](https://www.correlaid.org). :rocket: The goal of the project was to visualize relational data between legal entitles that are registered at GLEIF. More information about the open data available at GLEIF can be found here: [https://www.gleif.org/en/lei-data/access-and-use-lei-data](https://www.gleif.org/en/lei-data/access-and-use-lei-data). Furthermore, an introductory blog post can be found here: [https://correlaid.org/en/blog/gleif-hackathon/](https://correlaid.org/en/blog/gleif-hackathon/). 4 | 5 | This is the server of the project. The client can be found here: [https://github.com/CorrelAid/gleif-level2-client](https://github.com/CorrelAid/gleif-level2-client). 6 | 7 | ## Limitations 8 | 9 | Due to the nature of hackathons the current version of the tool includes the following limitations: 10 | 11 | - The documentation of the code might be partially incomplete. If you come across an issue, please file an issue within this repository. 12 | - The backend reacts quite slowly due to the focus on functionality during the hackathon. 13 | - There might be some edge-cases that are not yet handled. 14 | 15 | ## License 16 | 17 | The tool is licensed under CC0. 18 | 19 | ## Prerequisites 20 | 21 | You need a `data` directory with the following files: 22 | 23 | - `gleif_lei.csv` (*LEI-CDF*) 24 | - `gleif_rr.csv` (*RR-CDF*) 25 | 26 | If you're on Linux, you can use the `data/download.sh` script, for Mac users there is the `data/download_mac.sh` script. They are to be executed in the `data` directory. Both scripts will download the current files from the [GLEIF website](https://www.gleif.org/en/lei-data/gleif-golden-copy/download-the-golden-copy/#/) and remove most of the columns from the `lei` dataset in order to make it small enough for most local RAMs. 27 | If you're on Windows operating system, you'll need to [download the files manually](https://www.gleif.org/en/lei-data/gleif-golden-copy/download-the-golden-copy/#/) and find a way to reduce the file size of the `lei` dataset. 28 | 29 | 30 | ## API docs 31 | 32 | You can find the Swagger API docs under [http://localhost:8000/docs](http://localhost:8000/docs) after you have started the app either directly on your machine or with docker (see below). 33 | 34 | 35 | ## Local development 36 | 37 | ### without docker 38 | 39 | #### run 40 | 41 | e.g. in conda / venv: 42 | -> with reload enabled 43 | 44 | ``` 45 | uvicorn app:api --reload --root-path src 46 | ``` 47 | 48 | This makes the API available under [http://localhost:8000/](http://localhost:8000/). 49 | 50 | #### test 51 | 52 | ``` 53 | pytest 54 | ``` 55 | 56 | ### with docker 57 | 58 | #### build 59 | 60 | ``` 61 | docker-compose build 62 | ``` 63 | 64 | #### test 65 | 66 | run the tests within a docker container: 67 | 68 | ``` 69 | ./test.sh 70 | ``` 71 | 72 | #### run 73 | 74 | to run: 75 | 76 | ``` 77 | docker-compose up 78 | ``` 79 | 80 | or demonized: 81 | 82 | ``` 83 | docker-compose up -d 84 | ``` 85 | 86 | This makes the API available under [http://localhost:8000/](http://localhost:8000/). 87 | 88 | ### logs 89 | 90 | ``` 91 | docker-compose logs -f 92 | ``` 93 | 94 | ## server 95 | 96 | For development purposes, we had a server / virtual machine in the Azure cloud. Things should work similarly on your server. 97 | 98 | ### clone repository to your server 99 | 100 | ``` 101 | git clone git@github.com:CorrelAid/gleif-level2-server.git 102 | ``` 103 | 104 | or 105 | 106 | ``` 107 | git clone https://github.com/CorrelAid/gleif-level2-server.git 108 | ``` 109 | 110 | depending on your GitHub authentication preferences. 111 | 112 | 113 | ### build docker images 114 | 115 | ``` 116 | docker-compose build 117 | ``` 118 | 119 | ### start stack 120 | 121 | ``` 122 | docker-compose up -d 123 | ``` 124 | 125 | This makes the API available under [http://localhost:8000/](http://localhost:8000/) on your server. Configure a reverse proxy (e.g. [Nginx](https://www.nginx.com/)) to make the API available to other machines. 126 | 127 | 128 | ### logs 129 | 130 | ``` 131 | docker-compose logs -f 132 | ``` 133 | 134 | 135 | ### stop stack 136 | 137 | ``` 138 | docker-compose down 139 | ``` 140 | -------------------------------------------------------------------------------- /src/algorithms/graph_builder_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from graph import RR, Graph 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder 4 | 5 | @pytest.fixture 6 | def builder(): 7 | return DirectNodeGraphWithParentNetworkBuilder() 8 | 9 | def test_parent_subgraphs_are_copies(builder): 10 | 11 | # UP <** 12 | # | * 13 | # UP:C1 * 14 | # * 15 | # * 16 | # P1 * 17 | # | * 18 | # ROI*** 19 | 20 | g = Graph([ 21 | RR('ROI', 'P1', RR.DIRECT), 22 | RR('ROI', 'UP', RR.ULTIMATE), 23 | 24 | RR('UP:C1', 'UP', RR.DIRECT), 25 | ]) 26 | 27 | parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI') 28 | roi_graph = builder.node_direct_graph(g, 'ROI') 29 | 30 | assert sorted(list(parent_graph.nodes)) == ['UP', 'UP:C1'] 31 | assert sorted(list(roi_graph.nodes)) == ['P1', 'ROI'] 32 | assert sorted(list(g.nodes)) == ['P1', 'ROI', 'UP', 'UP:C1'] 33 | 34 | 35 | def test_parent_graph_connected_with_ROI(builder): 36 | 37 | # UP:P1 <-- should not happen! 38 | # / | 39 | # UP:P1:C1 | 40 | # --UP <** 41 | # / | * 42 | # UP:C1 P1 * 43 | # | * 44 | # ROI*** 45 | # / \ 46 | # C1 C2 47 | 48 | g = Graph([ 49 | RR('ROI', 'UP', RR.ULTIMATE), 50 | 51 | RR('C1', 'ROI', RR.DIRECT), 52 | RR('C2', 'ROI', RR.DIRECT), 53 | RR('ROI', 'P1', RR.DIRECT), 54 | RR('P1', 'UP', RR.DIRECT), 55 | RR('UP:C1', 'UP', RR.DIRECT), 56 | RR('UP', 'UP:P1', RR.DIRECT), 57 | RR('UP:P1:C1', 'UP:P1', RR.DIRECT), 58 | ]) 59 | 60 | parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI') 61 | assert sorted(list(parent_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI', 'UP', 'UP:C1', 'UP:P1', 'UP:P1:C1'] 62 | 63 | 64 | def test_graph_with_ROI_and_ultimate_parent_not_connected_via_direct_relationships(builder): 65 | 66 | # UP:P1 <-- should not happen! 67 | # / | 68 | # UP:P1:C1 | 69 | # --UP <** 70 | # / ^ * 71 | # UP:C1 * * 72 | # * * 73 | # * * 74 | # P1 * 75 | # | * 76 | # ROI*** 77 | # / \ 78 | # C1 C2 79 | 80 | g = Graph([ 81 | RR('ROI', 'UP', RR.ULTIMATE), 82 | RR('ROI', 'P1', RR.DIRECT), 83 | RR('P1', 'UP', RR.ULTIMATE), # <-- decoy; should also not make the graphs connected 84 | 85 | RR('C1', 'ROI', RR.DIRECT), 86 | RR('C2', 'ROI', RR.DIRECT), 87 | RR('UP:C1', 'UP', RR.DIRECT), 88 | RR('UP', 'UP:P1', RR.DIRECT), 89 | RR('UP:P1:C1', 'UP:P1', RR.DIRECT), 90 | ]) 91 | 92 | # Sanity check; parent graph is only: 93 | parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI') 94 | assert sorted(list(parent_graph.nodes)) == ['UP', 'UP:C1', 'UP:P1', 'UP:P1:C1'] 95 | assert sorted(list(parent_graph.edges)) == [ 96 | ('UP', 'UP:P1', 0), 97 | ('UP:C1', 'UP', 0), 98 | ('UP:P1:C1', 'UP:P1', 0), 99 | ] 100 | 101 | roi_graph = builder.node_direct_graph(g, 'ROI') 102 | # assert sorted(list(roi_graph.nodes)) == ['UP', 'UP:C1', 'UP:P1', 'UP:P1:C1'] 103 | assert sorted(list(roi_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI'] 104 | assert sorted(list(roi_graph.edges(data='type'))) == [ 105 | # ROI edges 106 | ('C1', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'), 107 | ('C2', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'), 108 | ('ROI', 'P1', 'IS_DIRECTLY_CONSOLIDATED_BY'), 109 | ] 110 | 111 | merged_graph, _ = builder.build(g, 'ROI') 112 | assert sorted(list(merged_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI', 'UP', 'UP:C1', 'UP:P1', 'UP:P1:C1'] 113 | # assert sorted(list(merged_graph.edges(data='type'))) == [ 114 | # # ROI edges 115 | # ('C1', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'), 116 | # ('C2', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'), 117 | # ('ROI', 'P1', 'IS_DIRECTLY_CONSOLIDATED_BY'), 118 | 119 | # # TODO: Return direct graph except for ROI -> ultimate parent edge? 120 | # # ('ROI', 'UP', 'IS_ULTIMATELY_CONSOLIDATED_BY'), 121 | 122 | # # Ultimate Parent edges 123 | # ('UP', 'UP:P1', 'IS_DIRECTLY_CONSOLIDATED_BY'), 124 | # ('UP:C1', 'UP', 'IS_DIRECTLY_CONSOLIDATED_BY'), 125 | # ('UP:P1:C1', 'UP:P1', 'IS_DIRECTLY_CONSOLIDATED_BY'), 126 | # ] 127 | 128 | 129 | 130 | def test_ROI_without_ultimate_parent(builder): 131 | 132 | # CASE: No Ultimate Parent 133 | # P2 134 | # | 135 | # -----P1 136 | # / | 137 | # P1:C1 ROI C2:P1 C2:UP1 138 | # / \ / / 139 | # C1 C2------ 140 | 141 | g = Graph([ 142 | RR('C1', 'ROI', RR.DIRECT), 143 | RR('C2', 'ROI', RR.DIRECT), 144 | RR('C2', 'C2:P1', RR.DIRECT), 145 | RR('C2', 'C2:UP1', RR.ULTIMATE), 146 | RR('P1:C1', 'P1', RR.DIRECT), 147 | RR('ROI', 'P1', RR.DIRECT), 148 | RR('P1', 'P2', RR.DIRECT), 149 | ]) 150 | 151 | sub, _ = builder.build(g, 'ROI') 152 | 153 | # assert sub.nodes == ['P1'] 154 | # assert sub.('type') == 'x' 155 | 156 | 157 | 158 | 159 | # TODO CASE: Multiuple Ultimate Parent? 160 | 161 | # CASE: 162 | # UP 163 | # \ 164 | # | 165 | # | 166 | # | 167 | # | 168 | # | 169 | 170 | # RR('A', 'B', RR.DIRECT), 171 | # RR('C', 'B', RR.DIRECT), 172 | # RR('C', 'D', RR.ULTIMATE), 173 | # RR('E', 'C', RR.ULTIMATE), 174 | # RR('X', 'C', RR.DIRECT), 175 | # RR('X', 'F', RR.DIRECT), 176 | # RR('G', 'X', RR.BRANCH), 177 | # RR('H', 'X', RR.DIRECT), 178 | # RR('H', 'K', RR.DIRECT), 179 | # RR('I', 'G', RR.DIRECT), 180 | # RR('I', 'J', RR.DIRECT), 181 | 182 | # print('hey') 183 | assert True -------------------------------------------------------------------------------- /src/algorithms/graph.py: -------------------------------------------------------------------------------- 1 | import json 2 | import csv 3 | import copy 4 | from typing import Iterator, KeysView, Union 5 | 6 | import networkx as nx 7 | import pandas as pd 8 | 9 | 10 | def iter_csv(f: str, limit: int = None): 11 | """ 12 | Convenience function to retrieve rows from a csv file. 13 | """ 14 | with open(f) as csvfile: 15 | reader = csv.DictReader(csvfile, delimiter=',', quotechar='"') 16 | for i, row in enumerate(reader): 17 | if limit is not None and limit < i + 1: 18 | return 19 | yield row 20 | 21 | 22 | class RR: 23 | DIRECT = 'IS_DIRECTLY_CONSOLIDATED_BY' 24 | # DIRECT_CHILD = 'direct_child' 25 | 26 | ULTIMATE = 'IS_ULTIMATELY_CONSOLIDATED_BY' 27 | # ULTIMATE_CHILD = 'ultimate_child' 28 | 29 | BRANCH = 'IS_INTERNATIONAL_BRANCH_OF' 30 | 31 | # HEADQUARTERS = 'headquarters' 32 | 33 | def __init__(self, start: str, end: str, rel_type: str): 34 | self.start = start 35 | self.end = end 36 | self.rel_type = rel_type 37 | 38 | @staticmethod 39 | def from_csv_row(row: dict) -> 'RR': 40 | return RR( 41 | row['Relationship.StartNode.NodeID'], 42 | row['Relationship.EndNode.NodeID'], 43 | row['Relationship.RelationshipType'] 44 | ) 45 | 46 | 47 | class Graph: 48 | lookup_table = pd.DataFrame() 49 | 50 | def __init__(self, rr: Iterator[RR]): 51 | self.g = nx.MultiDiGraph() 52 | self.__load_rr(rr) 53 | 54 | def __str__(self): 55 | return self.to_json() 56 | 57 | @property 58 | def nodes(self): 59 | return self.g.nodes 60 | 61 | @property 62 | def edges(self): 63 | return self.g.edges 64 | 65 | @property 66 | def out_edges(self): 67 | return self.g.out_edges 68 | 69 | @property 70 | def in_edges(self): 71 | return self.g.in_edges 72 | 73 | def __load_rr(self, rr: Iterator[RR]): 74 | """ 75 | This helper function is used to build the graph from csv files. 76 | It reads the individual rows from a tuple generator representing lines 77 | in the csv file. 78 | """ 79 | 80 | def mk_edge(rr: RR): 81 | """ 82 | Edge transformation function to bring the edge format from custom 83 | class RR to networkx tuple form (start, end, data). 84 | """ 85 | return rr.start, rr.end, {'type': rr.rel_type} 86 | 87 | self.g.add_edges_from(map(mk_edge, list(rr))) 88 | 89 | def deepcopy(self) -> 'Graph': 90 | return copy.deepcopy(self) 91 | 92 | def merge(self, other_graph: 'Graph') -> 'Graph': 93 | """ 94 | Wrapper function to merge the Networkx graph attributes 95 | of the custom Graph class. 96 | """ 97 | return Graph.from_graph(nx.compose(self.g, other_graph.g)) 98 | 99 | def get_edge_data(self, u: str, v: str, key: str = None, default: dict = None): 100 | """ 101 | Wrapper function to retrieve data associated with the specified edge. 102 | """ 103 | default = {} if not default else default 104 | return self.g.get_edge_data(u, v, key, default) 105 | 106 | def get_edge_types(self, u: str, v: str) -> list: 107 | """ 108 | Wrapper function to retrieve edge type. 109 | """ 110 | return [e['type'] for e in self.get_edge_data(u, v).values()] 111 | 112 | def get_direct_parent(self, node: str) -> str: 113 | """ 114 | This function retrieves the direct parent of a given edge 115 | based on the edge relation type. 116 | """ 117 | for e in self.out_edges(node): 118 | if 'IS_DIRECTLY_CONSOLIDATED_BY' in self.get_edge_types(e[0], e[1]): 119 | return e[1] 120 | 121 | def get_ultimate_parent(self, node: str) -> str: 122 | """ 123 | This function retrieves the ultimate parent of a given edge 124 | based on the edge relation type. 125 | """ 126 | for e in self.out_edges(node): 127 | if 'IS_ULTIMATELY_CONSOLIDATED_BY' in self.get_edge_types(e[0], e[1]): 128 | return e[1] 129 | 130 | def remove_edge_type(self, rel_type: str): 131 | """ 132 | This function removes all edges of a given type from the graph. 133 | It updates the graph object inplace. 134 | """ 135 | remove = [(u, v, key) for (u, v, key) in self.edges if self.get_edge_data(u, v, key=key)['type'] == rel_type] 136 | self.g.remove_edges_from(remove) 137 | return self 138 | 139 | def has_direct_parent(self, node: str) -> bool: 140 | """ 141 | Convenience function to check if a node has a direct parent. 142 | """ 143 | return self.get_direct_parent(node) is not None 144 | 145 | def has_ultimate_parent(self, node: str) -> bool: 146 | """ 147 | Convenience function to check if a node has a ultimate parent. 148 | """ 149 | return self.get_ultimate_parent(node) is not None 150 | 151 | def connected_nodes(self, lei: str) -> KeysView: 152 | """ 153 | This function transforms the graph to a undirected version of itself 154 | and finds all connected nodes for a given LEI identifier, based on all 155 | computable paths from the LEI node. 156 | """ 157 | # NOTE: Convertinv graph to undirected, in order to easily get all connected nodes regardless of edge direction 158 | # (i.e. including inbound connections) 159 | return nx.single_source_shortest_path(self.g.to_undirected(), lei).keys() 160 | 161 | def get_shortest_direct_parent_path_lengths(self, reference_node: str) -> dict: 162 | """ 163 | This function computes the path lengths from a given reference to all 164 | other via direct parent edges reachable nodes. It does NOT convert the graph to a undirected 165 | version before and respects directions. Dict form is {node_id: distance}. 166 | """ 167 | g = self.deepcopy().remove_edge_type(RR.ULTIMATE) # TODO: What about BRANCH? 168 | return dict(nx.single_target_shortest_path_length(g.g, reference_node)) 169 | 170 | def sub(self, lei: str) -> 'Graph': 171 | """ 172 | This function subsets the graph based on the nodes connected with the 173 | given LEI node. 174 | """ 175 | self.g.add_node(lei) # Add dummy node 176 | nodes = self.connected_nodes(lei) 177 | return Graph.from_graph(self.g.subgraph(nodes)) 178 | 179 | def get_node_label(self, lei: str) -> str: 180 | """ 181 | Wrapper function to retrieve the legal name of an entity based 182 | on its LEI from the lookup table attached to the graph. 183 | """ 184 | try: 185 | return self.lookup_table.loc[lei]['Entity.LegalName'] 186 | except KeyError: 187 | return 'id not found' 188 | 189 | def transform_node(self, node: dict) -> dict: 190 | """ 191 | Convenience function to rename node dictionary keys for final return array. 192 | """ 193 | return { 194 | 'id': node['id'], 195 | 'title': node['id'], 196 | 'label': self.get_node_label(node['id']), 197 | 'level': node.get('level'), 198 | 'no_parent': node.get('no_parent'), 199 | } 200 | 201 | def transform_link(self, link: dict) -> dict: 202 | """ 203 | Convenience function to rename edge dictionary keys for final return array. 204 | """ 205 | return { 206 | 'from': link['source'], 207 | 'to': link['target'], 208 | 'label': link['type'], 209 | } 210 | 211 | def to_array(self) -> dict: 212 | """ 213 | Convenience function for preparing the graph data to json dump. 214 | """ 215 | data = nx.node_link_data(self.g) 216 | return { 217 | 'nodes': list(map(self.transform_node, data['nodes'])), 218 | 'edges': list(map(self.transform_link, data['links'])), 219 | } 220 | 221 | def to_json(self): 222 | return json.dumps(self.to_array(), indent=2) 223 | 224 | def set_levels(self, parent: str = None) -> 'Graph': 225 | """ 226 | This function sets the levels on a graph as a node attribute. 227 | """ 228 | subgraph = self 229 | if parent: 230 | distances = self._level_computation(subgraph=subgraph, root_node=parent) 231 | distances = { 232 | node: { 233 | 'level': distances[node] if distances[node] is not 'no_parent' else 1, 234 | 'no_parent': False if distances[node] is not 'no_parent' else True 235 | } for node in distances 236 | } 237 | nx.set_node_attributes(subgraph.g, distances) 238 | else: 239 | print('No parent found. TODO') 240 | raise ValueError 241 | return subgraph 242 | 243 | @staticmethod 244 | def _level_computation(subgraph, root_node: str) -> dict: 245 | """ 246 | This function computes the levels with respect to the given root node by using 247 | single target shortest path algorithm from networkx. It returns a dictionary 248 | of node ids with the computed depth in the graph. 249 | """ 250 | compute_graph = subgraph.remove_edge_type(rel_type=RR.ULTIMATE) 251 | distances = nx.single_target_shortest_path_length(compute_graph.g, target=root_node) 252 | distances = dict(distances) 253 | distances.update({node: 'no_parent' for node in subgraph.nodes if not distances.get(node) and 254 | node is not root_node}) 255 | distances.update({root_node: 0}) 256 | return distances 257 | 258 | @staticmethod 259 | def from_graph(_g: nx.MultiDiGraph) -> 'Graph': 260 | g = Graph([]) 261 | g.g = copy.deepcopy(_g) 262 | return g 263 | 264 | @staticmethod 265 | def set_lookup_table(f): 266 | Graph.lookup_table = pd.read_csv(f, index_col=["LEI"], usecols=["LEI", "Entity.LegalName"]) 267 | 268 | @staticmethod 269 | def from_csv(f: str, limit: int = None) -> 'Graph': 270 | return Graph(RR.from_csv_row(row) for row in iter_csv(f, limit)) 271 | -------------------------------------------------------------------------------- /src/algorithms/graph_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from os import path 3 | from graph import RR, Graph 4 | import pandas as pd 5 | 6 | @pytest.fixture 7 | def rr_test_csv(request): 8 | return path.join(request.config.rootdir, 'src/test_data', 'rr-test.csv') 9 | 10 | @pytest.fixture 11 | def lookup_test_csv(request): 12 | return path.join(request.config.rootdir, 'src/test_data', 'lei-test.csv') 13 | 14 | def test_RR(): 15 | rr = RR('LEI_1', 'LEI_2', RR.DIRECT) 16 | 17 | assert rr.start == 'LEI_1' 18 | assert rr.end == 'LEI_2' 19 | assert rr.rel_type == 'IS_DIRECTLY_CONSOLIDATED_BY' 20 | 21 | direct = RR('LEI_1', 'LEI_2', RR.DIRECT) 22 | assert direct.rel_type == 'IS_DIRECTLY_CONSOLIDATED_BY' 23 | 24 | ultimate = RR('LEI_1', 'LEI_2', RR.ULTIMATE) 25 | assert ultimate.rel_type == 'IS_ULTIMATELY_CONSOLIDATED_BY' 26 | 27 | branch = RR('LEI_1', 'LEI_2', RR.BRANCH) 28 | assert branch.rel_type == 'IS_INTERNATIONAL_BRANCH_OF' 29 | 30 | def test_Graph_from_file(rr_test_csv): 31 | g = Graph.from_csv(rr_test_csv) 32 | 33 | assert list(g.nodes) == ['LEI_1', 'DIRECT_PARENT_LEI', 'ULTIMATE_PARENT_LEI'] 34 | assert list(g.edges) == [ 35 | ('LEI_1', 'DIRECT_PARENT_LEI', 0), 36 | ('LEI_1', 'ULTIMATE_PARENT_LEI', 0), 37 | ] 38 | 39 | def test_node_get_direct_and_ultimate_parent(): 40 | g = Graph([]) 41 | 42 | assert g.get_direct_parent('ROI') is None 43 | assert g.get_ultimate_parent('ROI') is None 44 | 45 | g = Graph([ 46 | RR('ROI', 'P1', RR.DIRECT), 47 | ]) 48 | assert g.get_direct_parent('ROI') == 'P1' 49 | assert g.get_ultimate_parent('ROI') is None 50 | 51 | g = Graph([ 52 | RR('ROI', 'UP1', RR.ULTIMATE), 53 | ]) 54 | 55 | assert g.get_direct_parent('ROI') is None 56 | assert g.get_ultimate_parent('ROI') == 'UP1' 57 | 58 | g = Graph([ 59 | RR('ROI', 'P1', RR.DIRECT), 60 | RR('ROI', 'UP1', RR.ULTIMATE), 61 | ]) 62 | 63 | assert g.get_direct_parent('ROI') == 'P1' 64 | assert g.get_ultimate_parent('ROI') == 'UP1' 65 | 66 | # Catches inconsistency (more than one direct/ultimate parent) 67 | # TODO: Do while initializing graph? 68 | 69 | # g = Graph([ 70 | # RR('ROI', 'P1', RR.DIRECT), 71 | # RR('ROI', 'P1', RR.DIRECT), 72 | # ]) 73 | # try: 74 | # g.get_direct_parent('ROI') 75 | # except Exception as e: 76 | # assert str(e) == 'Found more than one Direct Parent for node ROI' 77 | # return 78 | # assert False, 'Expected Exception due to multiple direct parents' 79 | 80 | def test_node_has_direct_and_ultimate_parent(): 81 | g = Graph([]) 82 | 83 | assert not g.has_direct_parent('ROI') 84 | assert not g.has_ultimate_parent('ROI') 85 | 86 | g = Graph([ 87 | RR('ROI', 'P1', RR.DIRECT), 88 | RR('ROI', 'UP1', RR.ULTIMATE), 89 | ]) 90 | 91 | assert g.has_direct_parent('ROI') 92 | assert g.has_ultimate_parent('ROI') 93 | 94 | def test_remove_edge_type(): 95 | g = Graph([ 96 | RR('ROI', 'P1', RR.DIRECT), 97 | RR('ROI', 'UP1', RR.ULTIMATE), 98 | RR('A', 'B', RR.ULTIMATE), 99 | RR('C', 'A', RR.ULTIMATE), 100 | RR('C', 'A', RR.DIRECT), 101 | RR('A', 'C', RR.ULTIMATE), 102 | ]) 103 | 104 | g.remove_edge_type(RR.ULTIMATE) 105 | assert sorted(list(g.edges)) == [ 106 | ('C', 'A', 1), 107 | ('ROI', 'P1', 0), 108 | ] 109 | 110 | def test_lookup_read_in(lookup_test_csv): 111 | g = Graph([]) 112 | Graph.set_lookup_table(lookup_test_csv) 113 | 114 | assert isinstance(g.lookup_table, pd.DataFrame) 115 | 116 | def test_lookup(rr_test_csv, lookup_test_csv): 117 | g = Graph.from_csv(rr_test_csv) 118 | Graph.set_lookup_table(lookup_test_csv) 119 | assert g.lookup_table.shape[0] == 3 120 | assert g.get_node_label("LEI_1") == "company1" 121 | 122 | def test_node_not_found_in_G(rr_test_csv, lookup_test_csv): 123 | g = Graph.from_csv(rr_test_csv) 124 | Graph.set_lookup_table(lookup_test_csv) 125 | 126 | a = g.sub('LEI_2').to_array() 127 | nodes = a['nodes'] 128 | edges = a['edges'] 129 | 130 | assert g.lookup_table.shape[0] == 3 131 | assert edges == [] 132 | assert nodes == [{ 133 | 'label': 'company2', 134 | 'title': 'LEI_2', 135 | 'id': 'LEI_2', 136 | 'level': None, 137 | 'no_parent': None, 138 | }] 139 | 140 | def test_node_not_found_in_G_and_lookup(rr_test_csv, lookup_test_csv): 141 | g = Graph.from_csv(rr_test_csv) 142 | g.set_lookup_table(lookup_test_csv) 143 | 144 | a = g.sub('LEI_NOT_FOUND').to_array() 145 | nodes = a['nodes'] 146 | edges = a['edges'] 147 | 148 | assert edges == [] 149 | assert nodes == [{ 150 | 'label': 'id not found', 151 | 'title': 'LEI_NOT_FOUND', 152 | 'id': 'LEI_NOT_FOUND', 153 | 'level': None, 154 | 'no_parent': None, 155 | }] 156 | 157 | def test_node_found_in_G(rr_test_csv, lookup_test_csv): 158 | g = Graph.from_csv(rr_test_csv) 159 | g.set_lookup_table(lookup_test_csv) 160 | a = g.sub('LEI_1').to_array() 161 | nodes = a['nodes'] 162 | edges = a['edges'] 163 | 164 | labels = [n['label'] for n in nodes] 165 | assert len(edges) == 2 166 | assert len(nodes) == 3 167 | assert 'company1' in labels 168 | 169 | def test_Graph_to_array(lookup_test_csv): 170 | g = Graph([ 171 | RR('LEI_1', 'LEI_2', RR.DIRECT), 172 | # RR('LEI_1', 'LEI_3', RR.ULTIMATE), 173 | 174 | # TODO: More cases 175 | 176 | # RR('LEI_2', 'LEI_X', RR.DIRECT), 177 | # RR('LEI_X', 'LEI_1', RR.ULTIMATE), 178 | 179 | # RR('LEI_A', 'LEI_X', RR.DIRECT), 180 | # RR('LEI_A', 'LEI_X', RR.ULTIMATE), 181 | 182 | # RR('LEI_B', 'LEI_Z', RR.DIRECT), 183 | # RR('LEI_B', 'LEI_I', RR.ULTIMATE), 184 | ]) 185 | print(lookup_test_csv) 186 | Graph.set_lookup_table(lookup_test_csv) 187 | 188 | a = g.to_array() 189 | nodes = a['nodes'] 190 | edges = a['edges'] 191 | 192 | assert type(nodes) is list and len(nodes) > 0 193 | assert type(edges) is list and len(edges) > 0 194 | 195 | assert nodes == [ 196 | { 197 | 'id': 'LEI_1', 198 | 'title': 'LEI_1', 199 | 'label': 'company1', 200 | 'level': None, 201 | 'no_parent': None, 202 | }, 203 | { 204 | 'id': 'LEI_2', 205 | 'title': 'LEI_2', 206 | 'label': 'company2', 207 | 'level': None, 208 | 'no_parent': None, 209 | } 210 | ] 211 | 212 | assert edges == [ 213 | { 214 | 'from': 'LEI_1', 215 | 'to': 'LEI_2', 216 | 'label': 'IS_DIRECTLY_CONSOLIDATED_BY', 217 | }, 218 | ] 219 | 220 | def test_Graph_subgraphs(): 221 | g = Graph([ 222 | 223 | # Parent chain 224 | RR('LEI_1', 'LEI_2', RR.DIRECT), 225 | RR('LEI_2', 'LEI_3', RR.DIRECT), 226 | RR('LEI_3', 'LEI_4', RR.DIRECT), 227 | 228 | # Isolated 2-node-graph 229 | RR('LEI_SOLO_A', 'LEI_SOLO_B', RR.DIRECT), 230 | 231 | # Multiple/duplicate edges 232 | RR('LEI_A', 'LEI_B', RR.DIRECT), 233 | RR('LEI_A', 'LEI_B', RR.DIRECT), # duplicates will end up unique 234 | RR('LEI_A', 'LEI_B', RR.DIRECT), # duplicates will end up unique 235 | RR('LEI_A', 'LEI_B', RR.DIRECT), # duplicates will end up unique 236 | 237 | RR('LEI_B', 'LEI_A', RR.DIRECT), # opposite direction 238 | 239 | RR('LEI_A', 'LEI_B', RR.ULTIMATE), # ultimate same as direct 240 | 241 | # multiple (some same) ultimate/direct 242 | RR('LEI_A', 'LEI_B', RR.DIRECT), 243 | RR('LEI_A', 'LEI_C', RR.DIRECT), 244 | RR('LEI_A', 'LEI_D', RR.ULTIMATE), 245 | 246 | RR('LEI_D', 'LEI_E', RR.BRANCH), 247 | 248 | 249 | # Complex... 250 | # B D 251 | # / \ / 252 | # A C F 253 | # / \ / 254 | # E (X) K 255 | # / \ / 256 | # J G H 257 | # \ / 258 | # I 259 | RR('A', 'B', RR.DIRECT), 260 | RR('C', 'B', RR.DIRECT), 261 | RR('C', 'D', RR.ULTIMATE), 262 | RR('E', 'C', RR.ULTIMATE), 263 | RR('X', 'C', RR.DIRECT), 264 | RR('X', 'F', RR.DIRECT), 265 | RR('G', 'X', RR.BRANCH), 266 | RR('H', 'X', RR.DIRECT), 267 | RR('H', 'K', RR.DIRECT), 268 | RR('I', 'G', RR.DIRECT), 269 | RR('I', 'J', RR.DIRECT), 270 | 271 | ]) 272 | 273 | # CASE: Grab start of chain 274 | a = g.sub('LEI_1').to_array() 275 | assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4'] 276 | assert sorted([(node['from'], node['to']) for node in a['edges']]) == [ 277 | 278 | # NOTE: Always two edges (e.g. direct parent / direct child) 279 | 280 | ('LEI_1', 'LEI_2'), 281 | # ('LEI_2', 'LEI_1'), 282 | 283 | ('LEI_2', 'LEI_3'), 284 | # ('LEI_3', 'LEI_2'), 285 | 286 | ('LEI_3', 'LEI_4'), 287 | # ('LEI_4', 'LEI_3'), 288 | ] 289 | 290 | # CASE: Grab middle of chain 291 | a = g.sub('LEI_2').to_array() 292 | assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4'] 293 | 294 | # CASE: Grab end of chain 295 | a = g.sub('LEI_4').to_array() 296 | assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4'] 297 | 298 | # CASE: Grab different isolated subgraph 299 | a = g.sub('LEI_SOLO_A').to_array() 300 | # assert sorted([node['id'] for node in a['nodes']]) == ['LEI_SOLO_A', 'LEI_SOLO_B'] 301 | 302 | # CASE: Mixed 303 | a = g.sub('LEI_A').to_array() 304 | assert sorted([node['id'] for node in a['nodes']]) == ['LEI_A', 'LEI_B', 'LEI_C', 'LEI_D', 'LEI_E'] 305 | 306 | # CASE: Complex 307 | # B D 308 | # / \ / 309 | # A C F 310 | # / \ / 311 | # E (X) K 312 | # / \ / 313 | # J G H 314 | # \ / 315 | # I 316 | a = g.sub('X').to_array() 317 | assert sorted([node['id'] for node in a['nodes']]) == ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'X'] 318 | assert sorted([(node['from'], node['to']) for node in a['edges']]) == [ 319 | 320 | ('A', 'B'), 321 | ('C', 'B'), 322 | ('C', 'D'), 323 | ('E', 'C'), 324 | ('G', 'X'), 325 | ('H', 'K'), 326 | ('H', 'X'), 327 | ('I', 'G'), 328 | ('I', 'J'), 329 | ('X', 'C'), 330 | ('X', 'F'), 331 | ] 332 | 333 | # TODO: Test edges 334 | 335 | @pytest.mark.skip("Direction not implemented") 336 | def test_Graph_direction(): 337 | assert False, "TODO: Implement MultiDiGraph" 338 | --------------------------------------------------------------------------------