├── .dockerignore
├── src
    ├── test_data
    │   ├── lei-test.csv
    │   └── rr-test.csv
    ├── app_test.py
    ├── algorithms
    │   ├── graph_cocacola_test.py
    │   ├── graph_a2_test.py
    │   ├── graph_builder.py
    │   ├── graph_samsung_test.py
    │   ├── graph_builder_test.py
    │   ├── graph.py
    │   └── graph_test.py
    └── app.py
├── .gitignore
├── test.sh
├── Dockerfile
├── docker-compose.yml
├── LICENSE
├── data
    ├── download.sh
    └── download_mac.sh
├── requirements.txt
└── README.md


/.dockerignore:
--------------------------------------------------------------------------------
1 | ./data


--------------------------------------------------------------------------------
/src/test_data/lei-test.csv:
--------------------------------------------------------------------------------
1 | LEI,Entity.LegalName
2 | LEI_1,company1
3 | LEI_2,company2
4 | LEI_3,company3


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.zip
3 | \.pytest_cache/
4 | data/gleif_rr\.csv
5 | data/gleif_lei\.csv
6 | __pycache__/
7 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | echo "testing inside docker"
2 | docker run --rm -it \
3 |     -v "$(pwd)/data":/data \
4 |     --entrypoint "pytest" \
5 |     gleif-backend:latest --rootdir=/
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tiangolo/uvicorn-gunicorn-fastapi:python3.6
 2 | 
 3 | ADD requirements.txt /requirements.txt
 4 | 
 5 | RUN pip install -r /requirements.txt
 6 | 
 7 | ADD src /src
 8 | 
 9 | WORKDIR /src
10 | 
11 | CMD ["uvicorn", "--host",  "0.0.0.0", "app:api"]


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   backend:
 4 |     image: gleif-backend:latest
 5 |     build: . 
 6 |     restart: unless-stopped
 7 |     networks: 
 8 |       - gleif
 9 |     volumes:
10 |       - ./data:/data:ro
11 |     ports:
12 |       - "8000:8000"
13 | networks:
14 |   gleif:


--------------------------------------------------------------------------------
/src/app_test.py:
--------------------------------------------------------------------------------
 1 | from starlette.testclient import TestClient
 2 | #  from app import api
 3 | 
 4 | #  client = TestClient(api)
 5 | 
 6 | #  def test_read_main():
 7 | #      response = client.get("/")
 8 | #      assert response.status_code == 200
 9 | #      assert response.json() == {"msg": "Hello World"}
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GLEIF Level-2 Server
2 | Written in 2019 by CorrelAid & Global Legal Entity Identifier Foundation (GLEIF) - correlaid.org, gleif.org
3 | 
4 | To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
5 | You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
6 | 


--------------------------------------------------------------------------------
/data/download.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -u
 4 | set -e
 5 | 
 6 | wget https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211553/20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip
 7 | wget https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211598/20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip
 8 | unzip 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip
 9 | unzip 20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip
10 | 
11 | cat 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv | cut -d',' -f1,2 | sed 's/"//g' > gleif_lei.csv
12 | mv 20190719-0000-gleif-goldencopy-rr-golden-copy.csv gleif_rr.csv
13 | 
14 | rm *.zip
15 | 


--------------------------------------------------------------------------------
/data/download_mac.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -u
 4 | set -e
 5 | 
 6 | curl -O https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211553/20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip
 7 | curl -O https://leidata-preview.gleif.org/storage/golden-copy-files/2019/07/19/211598/20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip
 8 | unzip 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv.zip
 9 | unzip 20190719-0000-gleif-goldencopy-rr-golden-copy.csv.zip
10 | 
11 | cat 20190719-0000-gleif-goldencopy-lei2-golden-copy.csv | cut -d',' -f1,2 | sed 's/"//g' > gleif_lei.csv
12 | mv 20190719-0000-gleif-goldencopy-rr-golden-copy.csv gleif_rr.csv
13 | 
14 | rm *.zip
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.4.0
 2 | aniso8601==6.0.0
 3 | certifi==2019.6.16
 4 | chardet==3.0.4
 5 | Click==7.0
 6 | decorator==4.4.0
 7 | dnspython==1.16.0
 8 | email-validator==1.0.4
 9 | fastapi==0.33.0
10 | graphene==2.1.7
11 | graphql-core==2.2
12 | graphql-relay==2.0.0
13 | h11==0.8.1
14 | httptools==0.0.13
15 | idna==2.8
16 | itsdangerous==1.1.0
17 | Jinja2==2.10.1
18 | MarkupSafe==1.1.1
19 | numpy==1.16.4
20 | pandas==0.24.2
21 | promise==2.2.1
22 | pydantic==0.30
23 | python-dateutil==2.8.0
24 | python-multipart==0.0.5
25 | pytz==2019.1
26 | PyYAML==5.1.1
27 | requests==2.22.0
28 | Rx==3.0.0
29 | six==1.12.0
30 | starlette==0.12.0
31 | ujson==1.35
32 | urllib3==1.25.3
33 | uvicorn==0.8.4
34 | uvloop==0.12.2
35 | websockets==7.0
36 | networkx==2.3
37 | pytest==5.0.1
38 | pytest-watch==4.2.0
39 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_cocacola_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder
 4 | from graph import Graph
 5 | 
 6 | 
 7 | @pytest.fixture(scope="class")
 8 | def setup(request):
 9 |     rr_csv = os.path.join(request.config.rootdir, "data", "gleif_rr.csv")
10 |     lookup_csv = os.path.join(request.config.rootdir, "data", "gleif_lei.csv")
11 |     lei = "UWJKFUJFZ02DKWI3RY53"
12 |     builder = DirectNodeGraphWithParentNetworkBuilder()
13 | 
14 |     glei_network = Graph.from_csv(f=rr_csv, limit=None)
15 |     Graph.set_lookup_table(f=lookup_csv)
16 | 
17 |     parent_graph, _ = builder.build(glei_network, lei)
18 | 
19 |     structure = parent_graph.set_levels(lei).to_array()
20 |     return structure, lei
21 | 
22 | 
23 | def test_nodes_edges_more_than_0(setup):
24 |     structure, _ = setup
25 | 
26 |     assert len(structure["nodes"]) > 0
27 |     assert len(structure["edges"]) > 0
28 | 
29 | 
30 | def test_lei_in_nodes(setup):
31 |     structure, lei = setup
32 |     cocacolacompany_node = [n for n in structure["nodes"] if n["id"] == lei][0]
33 | 
34 |     assert cocacolacompany_node["level"] == 0
35 | 
36 | 
37 | def test_direct_children(setup):
38 |     structure, _ = setup
39 |     # direct children
40 |     direct_children = [n for n in structure["nodes"] if n["level"] == 1]
41 |     assert len(direct_children) == 8
42 | 
43 | 


--------------------------------------------------------------------------------
/src/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from fastapi import FastAPI
 3 | from starlette.middleware.cors import CORSMiddleware
 4 | 
 5 | from algorithms.graph import Graph
 6 | from algorithms.graph_builder import DirectNodeGraphWithParentNetworkBuilder as Builder
 7 | 
 8 | origins = ["*"]
 9 | 
10 | api = FastAPI()
11 | api.add_middleware(
12 |     CORSMiddleware, allow_origins=origins, allow_methods=["*"], allow_headers=["*"]
13 | )
14 | ROOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
15 | DATA_PATH = os.path.join(ROOT_DIR, "data")
16 | 
17 | relationship_data_path = os.path.join(DATA_PATH, "gleif_rr.csv")
18 | lei_lookup_data_path = os.path.join(DATA_PATH, "gleif_lei.csv")
19 | 
20 | glei_network = Graph.from_csv(f=relationship_data_path, limit=None)
21 | Graph.set_lookup_table(f=lei_lookup_data_path)
22 | 
23 | 
24 | @api.get("/company/{node_id}/structure")
25 | def get_company_structure(node_id: str):
26 |     """
27 |     This endpoint returns the complete holding structure based on a single node id.
28 |     :param node_id:
29 |     :return:
30 |     """
31 |     builder = Builder()
32 |     parent_graph, parent_node = builder.build(glei_network, node_id)
33 | 
34 |     if parent_node is None:
35 |         # no ultimate parent 
36 |         return parent_graph.set_levels(node_id).to_array()
37 |     else: 
38 |         return parent_graph.set_levels(parent_node).to_array()
39 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_a2_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder
 4 | from graph import Graph
 5 | 
 6 | 
 7 | @pytest.fixture(scope="class")
 8 | def setup(request):
 9 |     rr_csv = os.path.join(request.config.rootdir, "data", "gleif_rr.csv")
10 |     lookup_csv = os.path.join(request.config.rootdir, "data", "gleif_lei.csv")
11 |     lei = "969500WU8KVE8U3TL824"
12 |     builder = DirectNodeGraphWithParentNetworkBuilder()
13 | 
14 |     glei_network = Graph.from_csv(f=rr_csv, limit=None)
15 |     Graph.set_lookup_table(f=lookup_csv)
16 | 
17 |     parent_graph, ultimate_parent = builder.build(glei_network, lei)
18 | 
19 |     if ultimate_parent:
20 |         structure = parent_graph.set_levels(ultimate_parent).to_array()
21 |     else:
22 |         structure = parent_graph.set_levels(lei).to_array()
23 | 
24 |     return structure, lei
25 | 
26 | 
27 | def test_structure_exists(setup):
28 | 
29 |     structure, _ = setup
30 | 
31 |     assert len(structure["nodes"]) > 0
32 |     # this LEI does not have children nor a parent
33 |     assert len(structure["edges"]) == 0
34 | 
35 | 
36 | def test_a2_node_is_level_0(setup):
37 |     # this LEI does not have an ultimate parent
38 | 
39 |     structure, lei = setup
40 | 
41 |     a2_node = [n for n in structure["nodes"] if n["id"] == lei][0]
42 | 
43 |     assert a2_node["level"] == 0
44 | 
45 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_builder.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Union
 2 | 
 3 | from algorithms.graph import RR, Graph
 4 | 
 5 | 
 6 | class DirectNodeGraphWithParentNetworkBuilder:
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def build(self, g: Graph, node: str) -> Tuple[Graph, str]:
11 |         """
12 |         For given node:
13 |             - build the "direct" graph of node
14 |             - merge with "direct" graph of ultimate parent, if exists
15 | 
16 |         The result might be a network with disjunct graphs.
17 | 
18 | 
19 |         "Direct" graph is the graph that connects nodes only via direct parent relationships (in all directions)
20 |         """
21 |         g = g.deepcopy()
22 | 
23 |         parent_graph, parent_node = self.ultimate_parent_direct_graph(g, node)
24 |         node_graph = self.node_direct_graph(g, node)
25 | 
26 |         return parent_graph.merge(node_graph), parent_node
27 | 
28 |     def node_direct_graph(self, g: Graph, node: str) -> Graph:
29 |         g = g.deepcopy()
30 |         return g.remove_edge_type(RR.ULTIMATE).sub(node)
31 | 
32 |     def ultimate_parent_direct_graph(self, g: Graph, node: str) -> Tuple[Graph, Union[str, None]]:
33 |         """for given node and its full graph, get the sub graph of the ultimate parent
34 |         
35 |         Arguments:
36 |             g {Graph} -- graph of node
37 |             node {str} -- lei of node 
38 |         
39 |         Returns:
40 |             [tuple] -- sub graph of ultimate parent and its lei
41 |         """
42 |         g = g.deepcopy()
43 |         parent = g.get_ultimate_parent(node)
44 | 
45 |         # if there is no ultimate parent, we return an empty graph
46 |         if parent is None:
47 |             return Graph([]), parent
48 | 
49 |         # first remove ultimate edge
50 |         g_without_ultimate_edge = g.remove_edge_type(RR.ULTIMATE)
51 | 
52 |         # get graph for parent
53 |         parent_sub = g_without_ultimate_edge.sub(parent)
54 | 
55 |         # then subgraph for parent
56 |         return parent_sub, parent
57 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_samsung_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder
 4 | from graph import Graph
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def builder():
 9 |     return DirectNodeGraphWithParentNetworkBuilder()
10 | 
11 | 
12 | @pytest.fixture
13 | def rr_csv(request):
14 |     return os.path.join(request.config.rootdir, "data", "gleif_rr.csv")
15 | 
16 | 
17 | @pytest.fixture
18 | def lookup_csv(request):
19 |     return os.path.join(request.config.rootdir, "data", "gleif_lei.csv")
20 | 
21 | 
22 | def test_samsung_ultimate_parent(builder, lookup_csv, rr_csv):
23 |     samsung_lei = "549300KYVNLA5XR0HT53"
24 |     ultimate_parent_lei = "9884007ER46L6N7EI764"
25 | 
26 |     glei_network = Graph.from_csv(f=rr_csv, limit=None)
27 |     Graph.set_lookup_table(f=lookup_csv)
28 | 
29 |     parent_graph, _ = builder.build(glei_network, samsung_lei)
30 |     structure = parent_graph.set_levels(ultimate_parent_lei).to_array()
31 | 
32 |     samsung_node = [n for n in structure["nodes"] if n["id"] == samsung_lei][0]
33 |     ultimate_parent_node = [
34 |         n for n in structure["nodes"] if n["id"] == ultimate_parent_lei
35 |     ][0]
36 | 
37 |     # nodes between ultimate parent and samsung
38 |     intermediate_nodes = [n for n in structure["nodes"] if n["level"] == 1]
39 | 
40 |     assert ultimate_parent_node["level"] == 0
41 |     assert len(intermediate_nodes) > 0
42 |     assert samsung_node["level"] == 2
43 | 
44 |     # assert that the correct edges exist
45 |     # samsung gmbh -> samsung holding gmbh
46 |     assert (
47 |         find_edge(structure["edges"], samsung_lei, "549300CWESV5NI78YL42") is not None
48 |     )
49 |     # samsung holding gmbh -> korean samsung / ultimate parent
50 |     assert (
51 |         find_edge(structure["edges"], "549300CWESV5NI78YL42", ultimate_parent_lei)
52 |         is not None
53 |     )
54 | 
55 | 
56 | def find_edge(edges, from_lei, to_lei):
57 |     for e in edges:
58 |         if e["from"] == from_lei and e["to"] == to_lei:
59 |             return e
60 |     return None
61 | 
62 | 


--------------------------------------------------------------------------------
/src/test_data/rr-test.csv:
--------------------------------------------------------------------------------
1 | Relationship.StartNode.NodeID,Relationship.StartNode.NodeIDType,Relationship.EndNode.NodeID,Relationship.EndNode.NodeIDType,Relationship.RelationshipType,Relationship.RelationshipStatus,Registration.InitialRegistrationDate,Registration.LastUpdateDate,Registration.RegistrationStatus,Registration.NextRenewalDate,Registration.ManagingLOU,Registration.ValidationSources,Registration.ValidationDocuments,Registration.ValidationReference,Relationship.Period.1.startDate,Relationship.Period.1.endDate,Relationship.Period.1.periodType,Relationship.Period.2.startDate,Relationship.Period.2.endDate,Relationship.Period.2.periodType,Relationship.Period.3.startDate,Relationship.Period.3.endDate,Relationship.Period.3.periodType,Relationship.Period.4.startDate,Relationship.Period.4.endDate,Relationship.Period.4.periodType,Relationship.Period.5.startDate,Relationship.Period.5.endDate,Relationship.Period.5.periodType,Relationship.Qualifiers.1.QualifierDimension,Relationship.Qualifiers.1.QualifierCategory,Relationship.Qualifiers.2.QualifierDimension,Relationship.Qualifiers.2.QualifierCategory,Relationship.Qualifiers.3.QualifierDimension,Relationship.Qualifiers.3.QualifierCategory,Relationship.Qualifiers.4.QualifierDimension,Relationship.Qualifiers.4.QualifierCategory,Relationship.Qualifiers.5.QualifierDimension,Relationship.Qualifiers.5.QualifierCategory,Relationship.Qualifiers.1.MeasurementMethod,Relationship.Qualifiers.1.QuantifierAmount,Relationship.Qualifiers.1.QuantifierUnits,Relationship.Qualifiers.2.MeasurementMethod,Relationship.Qualifiers.2.QuantifierAmount,Relationship.Qualifiers.2.QuantifierUnits,Relationship.Qualifiers.3.MeasurementMethod,Relationship.Qualifiers.3.QuantifierAmount,Relationship.Qualifiers.3.QuantifierUnits,Relationship.Qualifiers.4.MeasurementMethod,Relationship.Qualifiers.4.QuantifierAmount,Relationship.Qualifiers.4.QuantifierUnits,Relationship.Qualifiers.5.MeasurementMethod,Relationship.Qualifiers.5.QuantifierAmount,Relationship.Qualifiers.5.QuantifierUnits
2 | LEI_1,LEI,DIRECT_PARENT_LEI,LEI,IS_DIRECTLY_CONSOLIDATED_BY,ACTIVE,2012-11-29T16:33:00.000Z,2019-06-18T14:32:00.000Z,PUBLISHED,2020-06-14T10:17:00.000Z,EVK05KS7XY1DEII3R011,ENTITY_SUPPLIED_ONLY,SUPPORTING_DOCUMENTS,,2017-01-01T00:00:00.000Z,2017-12-31T00:00:00.000Z,ACCOUNTING_PERIOD,2018-06-15T00:00:00.000Z,,RELATIONSHIP_PERIOD,,,,,,,,,,ACCOUNTING_STANDARD,,,,,,,,,,,,,,,,,,,,,,,,
3 | LEI_1,LEI,ULTIMATE_PARENT_LEI,LEI,IS_ULTIMATELY_CONSOLIDATED_BY,ACTIVE,2012-11-29T16:33:00.000Z,2019-06-18T14:32:00.000Z,PUBLISHED,2020-06-14T10:17:00.000Z,EVK05KS7XY1DEII3R011,ENTITY_SUPPLIED_ONLY,SUPPORTING_DOCUMENTS,,2017-01-01T00:00:00.000Z,2017-12-31T00:00:00.000Z,ACCOUNTING_PERIOD,2018-06-15T00:00:00.000Z,,RELATIONSHIP_PERIOD,,,,,,,,,,ACCOUNTING_STANDARD,,,,,,,,,,,,,,,,,,,,,,,,


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GLEIF Level-2 Visualization (Server)
  2 | 
  3 | The code was written during a hackathon between [GLEIF](https://www.gleif.org/) and [CorrelAid](https://www.correlaid.org). :rocket: The goal of the project was to visualize relational data between legal entitles that are registered at GLEIF. More information about the open data available at GLEIF can be found here: [https://www.gleif.org/en/lei-data/access-and-use-lei-data](https://www.gleif.org/en/lei-data/access-and-use-lei-data). Furthermore, an introductory blog post can be found here: [https://correlaid.org/en/blog/gleif-hackathon/](https://correlaid.org/en/blog/gleif-hackathon/).
  4 | 
  5 | This is the server of the project. The client can be found here: [https://github.com/CorrelAid/gleif-level2-client](https://github.com/CorrelAid/gleif-level2-client).
  6 | 
  7 | ## Limitations
  8 | 
  9 | Due to the nature of hackathons the current version of the tool includes the following limitations:
 10 | 
 11 | - The documentation of the code might be partially incomplete. If you come across an issue, please file an issue within this repository.
 12 | - The backend reacts quite slowly due to the focus on functionality during the hackathon.
 13 | - There might be some edge-cases that are not yet handled.
 14 | 
 15 | ## License
 16 | 
 17 | The tool is licensed under CC0.
 18 | 
 19 | ## Prerequisites 
 20 | 
 21 | You need a `data` directory with the following files:
 22 | 
 23 | - `gleif_lei.csv` (*LEI-CDF*)
 24 | - `gleif_rr.csv` (*RR-CDF*)
 25 | 
 26 | If you're on Linux, you can use the `data/download.sh` script, for Mac users there is the `data/download_mac.sh` script. They are to be executed in the `data` directory. Both scripts will download the current files from the [GLEIF website](https://www.gleif.org/en/lei-data/gleif-golden-copy/download-the-golden-copy/#/) and remove most of the columns from the `lei` dataset in order to make it small enough for most local RAMs.  
 27 | If you're on Windows operating system, you'll need to [download the files manually](https://www.gleif.org/en/lei-data/gleif-golden-copy/download-the-golden-copy/#/) and find a way to reduce the file size of the `lei` dataset.
 28 | 
 29 | 
 30 | ## API docs
 31 | 
 32 | You can find the Swagger API docs under [http://localhost:8000/docs](http://localhost:8000/docs) after you have started the app either directly on your machine or with docker (see below).
 33 | 
 34 | 
 35 | ## Local development
 36 | 
 37 | ### without docker
 38 | 
 39 | #### run
 40 | 
 41 | e.g. in conda / venv:
 42 | -> with reload enabled
 43 | 
 44 | ```
 45 | uvicorn app:api --reload --root-path src
 46 | ```
 47 | 
 48 | This makes the API available under [http://localhost:8000/](http://localhost:8000/).
 49 | 
 50 | #### test
 51 | 
 52 | ```
 53 | pytest
 54 | ```
 55 | 
 56 | ### with docker
 57 | 
 58 | #### build
 59 | 
 60 | ```
 61 | docker-compose build
 62 | ```
 63 | 
 64 | #### test
 65 | 
 66 | run the tests within a docker container:
 67 | 
 68 | ```
 69 | ./test.sh
 70 | ```
 71 | 
 72 | #### run
 73 | 
 74 | to run:
 75 | 
 76 | ```
 77 | docker-compose up
 78 | ```
 79 | 
 80 | or demonized:
 81 | 
 82 | ```
 83 | docker-compose up -d
 84 | ```
 85 | 
 86 | This makes the API available under [http://localhost:8000/](http://localhost:8000/).
 87 | 
 88 | ### logs 
 89 | 
 90 | ```
 91 | docker-compose logs -f
 92 | ```
 93 | 
 94 | ## server
 95 | 
 96 | For development purposes, we had a server / virtual machine in the Azure cloud. Things should work similarly on your server.
 97 | 
 98 | ### clone repository to your server
 99 | 
100 | ```
101 | git clone git@github.com:CorrelAid/gleif-level2-server.git
102 | ```
103 | 
104 | or
105 | 
106 | ```
107 | git clone https://github.com/CorrelAid/gleif-level2-server.git
108 | ```
109 | 
110 | depending on your GitHub authentication preferences.
111 | 
112 | 
113 | ### build docker images
114 | 
115 | ```
116 | docker-compose build 
117 | ```
118 | 
119 | ### start stack
120 | 
121 | ```
122 | docker-compose up -d
123 | ```
124 | 
125 | This makes the API available under [http://localhost:8000/](http://localhost:8000/) on your server. Configure a reverse proxy (e.g. [Nginx](https://www.nginx.com/)) to make the API available to other machines. 
126 | 
127 | 
128 | ### logs 
129 | 
130 | ```
131 | docker-compose logs -f
132 | ```
133 | 
134 | 
135 | ### stop stack
136 | 
137 | ```
138 | docker-compose down
139 | ```
140 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_builder_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from graph import RR, Graph
  3 | from graph_builder import DirectNodeGraphWithParentNetworkBuilder
  4 | 
  5 | @pytest.fixture
  6 | def builder():
  7 |     return DirectNodeGraphWithParentNetworkBuilder()
  8 | 
  9 | def test_parent_subgraphs_are_copies(builder):
 10 | 
 11 |     #           UP <**
 12 |     #           |    *
 13 |     #         UP:C1  *
 14 |     #                *
 15 |     #                *
 16 |     #           P1   *
 17 |     #           |    *
 18 |     #          ROI***
 19 | 
 20 |     g = Graph([
 21 |         RR('ROI', 'P1', RR.DIRECT),
 22 |         RR('ROI', 'UP', RR.ULTIMATE),
 23 | 
 24 |         RR('UP:C1', 'UP', RR.DIRECT),
 25 |     ])
 26 | 
 27 |     parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI')
 28 |     roi_graph = builder.node_direct_graph(g, 'ROI')
 29 | 
 30 |     assert sorted(list(parent_graph.nodes)) == ['UP', 'UP:C1']
 31 |     assert sorted(list(roi_graph.nodes)) == ['P1', 'ROI']
 32 |     assert sorted(list(g.nodes)) == ['P1', 'ROI', 'UP', 'UP:C1']
 33 | 
 34 | 
 35 | def test_parent_graph_connected_with_ROI(builder):
 36 | 
 37 |     #         UP:P1   <-- should not happen!
 38 |     #        /  |
 39 |     # UP:P1:C1  |
 40 |     #         --UP <**
 41 |     #        /  |    *
 42 |     #    UP:C1  P1   *
 43 |     #           |    *
 44 |     #          ROI***
 45 |     #          / \
 46 |     #         C1  C2
 47 | 
 48 |     g = Graph([
 49 |         RR('ROI', 'UP', RR.ULTIMATE),
 50 | 
 51 |         RR('C1', 'ROI', RR.DIRECT),
 52 |         RR('C2', 'ROI', RR.DIRECT),
 53 |         RR('ROI', 'P1', RR.DIRECT),
 54 |         RR('P1', 'UP', RR.DIRECT),
 55 |         RR('UP:C1', 'UP', RR.DIRECT),
 56 |         RR('UP', 'UP:P1', RR.DIRECT),
 57 |         RR('UP:P1:C1', 'UP:P1', RR.DIRECT),
 58 |     ])
 59 | 
 60 |     parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI')
 61 |     assert sorted(list(parent_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI', 'UP', 'UP:C1', 'UP:P1', 'UP:P1:C1']
 62 | 
 63 | 
 64 | def test_graph_with_ROI_and_ultimate_parent_not_connected_via_direct_relationships(builder):
 65 | 
 66 |     #         UP:P1   <-- should not happen!
 67 |     #        /  |
 68 |     # UP:P1:C1  |
 69 |     #         --UP <**
 70 |     #        /  ^    *
 71 |     #    UP:C1  *    *
 72 |     #           *    *
 73 |     #           *    *
 74 |     #           P1   *
 75 |     #           |    *
 76 |     #          ROI***
 77 |     #          / \
 78 |     #         C1  C2
 79 | 
 80 |     g = Graph([
 81 |         RR('ROI', 'UP', RR.ULTIMATE),
 82 |         RR('ROI', 'P1', RR.DIRECT),
 83 |         RR('P1', 'UP', RR.ULTIMATE),  # <-- decoy; should also not make the graphs connected
 84 | 
 85 |         RR('C1', 'ROI', RR.DIRECT),
 86 |         RR('C2', 'ROI', RR.DIRECT),
 87 |         RR('UP:C1', 'UP', RR.DIRECT),
 88 |         RR('UP', 'UP:P1', RR.DIRECT),
 89 |         RR('UP:P1:C1', 'UP:P1', RR.DIRECT),
 90 |     ])
 91 | 
 92 |     # Sanity check; parent graph is only:
 93 |     parent_graph, _ = builder.ultimate_parent_direct_graph(g, 'ROI')
 94 |     assert sorted(list(parent_graph.nodes)) == ['UP', 'UP:C1', 'UP:P1', 'UP:P1:C1']
 95 |     assert sorted(list(parent_graph.edges)) == [
 96 |         ('UP', 'UP:P1', 0),
 97 |         ('UP:C1', 'UP', 0),
 98 |         ('UP:P1:C1', 'UP:P1', 0),
 99 |     ]
100 | 
101 |     roi_graph = builder.node_direct_graph(g, 'ROI')
102 |     #  assert sorted(list(roi_graph.nodes)) == ['UP', 'UP:C1', 'UP:P1', 'UP:P1:C1']
103 |     assert sorted(list(roi_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI']
104 |     assert sorted(list(roi_graph.edges(data='type'))) == [
105 |         # ROI edges
106 |         ('C1', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'),
107 |         ('C2', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'),
108 |         ('ROI', 'P1', 'IS_DIRECTLY_CONSOLIDATED_BY'),
109 |     ]
110 | 
111 |     merged_graph, _ = builder.build(g, 'ROI')
112 |     assert sorted(list(merged_graph.nodes)) == ['C1', 'C2', 'P1', 'ROI', 'UP', 'UP:C1', 'UP:P1', 'UP:P1:C1']
113 |     #  assert sorted(list(merged_graph.edges(data='type'))) == [
114 |     #      # ROI edges
115 |     #      ('C1', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'),
116 |     #      ('C2', 'ROI', 'IS_DIRECTLY_CONSOLIDATED_BY'),
117 |     #      ('ROI', 'P1', 'IS_DIRECTLY_CONSOLIDATED_BY'),
118 | 
119 |     #      # TODO: Return direct graph except for ROI -> ultimate parent edge?
120 |     #      #  ('ROI', 'UP', 'IS_ULTIMATELY_CONSOLIDATED_BY'),
121 | 
122 |     #      # Ultimate Parent edges
123 |     #      ('UP', 'UP:P1', 'IS_DIRECTLY_CONSOLIDATED_BY'),
124 |     #      ('UP:C1', 'UP', 'IS_DIRECTLY_CONSOLIDATED_BY'),
125 |     #      ('UP:P1:C1', 'UP:P1', 'IS_DIRECTLY_CONSOLIDATED_BY'),
126 |     #  ]
127 | 
128 | 
129 | 
130 | def test_ROI_without_ultimate_parent(builder):
131 | 
132 |     # CASE: No Ultimate Parent
133 |     #          P2
134 |     #          |
135 |     #     -----P1
136 |     #    /     |
137 |     # P1:C1   ROI  C2:P1  C2:UP1
138 |     #         / \  /     /
139 |     #        C1  C2------
140 | 
141 |     g = Graph([
142 |         RR('C1', 'ROI', RR.DIRECT),
143 |         RR('C2', 'ROI', RR.DIRECT),
144 |         RR('C2', 'C2:P1', RR.DIRECT),
145 |         RR('C2', 'C2:UP1', RR.ULTIMATE),
146 |         RR('P1:C1', 'P1', RR.DIRECT),
147 |         RR('ROI', 'P1', RR.DIRECT),
148 |         RR('P1', 'P2', RR.DIRECT),
149 |     ])
150 | 
151 |     sub, _ = builder.build(g, 'ROI')
152 | 
153 |     #  assert sub.nodes == ['P1']
154 |     #  assert sub.('type') == 'x'
155 | 
156 | 
157 | 
158 | 
159 |     # TODO CASE: Multiuple Ultimate Parent?
160 | 
161 |     # CASE:
162 |     #    UP
163 |     #      \
164 |     #       |
165 |     #       |
166 |     #       |
167 |     #       |
168 |     #       |
169 | 
170 |     #  RR('A', 'B', RR.DIRECT),
171 |     #  RR('C', 'B', RR.DIRECT),
172 |     #  RR('C', 'D', RR.ULTIMATE),
173 |     #  RR('E', 'C', RR.ULTIMATE),
174 |     #  RR('X', 'C', RR.DIRECT),
175 |     #  RR('X', 'F', RR.DIRECT),
176 |     #  RR('G', 'X', RR.BRANCH),
177 |     #  RR('H', 'X', RR.DIRECT),
178 |     #  RR('H', 'K', RR.DIRECT),
179 |     #  RR('I', 'G', RR.DIRECT),
180 |     #  RR('I', 'J', RR.DIRECT),
181 | 
182 |     #  print('hey')
183 |     assert True


--------------------------------------------------------------------------------
/src/algorithms/graph.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import csv
  3 | import copy
  4 | from typing import Iterator, KeysView, Union
  5 | 
  6 | import networkx as nx
  7 | import pandas as pd
  8 | 
  9 | 
 10 | def iter_csv(f: str, limit: int = None):
 11 |     """
 12 |     Convenience function to retrieve rows from a csv file.
 13 |     """
 14 |     with open(f) as csvfile:
 15 |         reader = csv.DictReader(csvfile, delimiter=',', quotechar='"')
 16 |         for i, row in enumerate(reader):
 17 |             if limit is not None and limit < i + 1:
 18 |                 return
 19 |             yield row
 20 | 
 21 | 
 22 | class RR:
 23 |     DIRECT = 'IS_DIRECTLY_CONSOLIDATED_BY'
 24 |     #  DIRECT_CHILD = 'direct_child'
 25 | 
 26 |     ULTIMATE = 'IS_ULTIMATELY_CONSOLIDATED_BY'
 27 |     #  ULTIMATE_CHILD = 'ultimate_child'
 28 | 
 29 |     BRANCH = 'IS_INTERNATIONAL_BRANCH_OF'
 30 | 
 31 |     #  HEADQUARTERS = 'headquarters'
 32 | 
 33 |     def __init__(self, start: str, end: str, rel_type: str):
 34 |         self.start = start
 35 |         self.end = end
 36 |         self.rel_type = rel_type
 37 | 
 38 |     @staticmethod
 39 |     def from_csv_row(row: dict) -> 'RR':
 40 |         return RR(
 41 |             row['Relationship.StartNode.NodeID'],
 42 |             row['Relationship.EndNode.NodeID'],
 43 |             row['Relationship.RelationshipType']
 44 |         )
 45 | 
 46 | 
 47 | class Graph:
 48 |     lookup_table = pd.DataFrame()
 49 | 
 50 |     def __init__(self, rr: Iterator[RR]):
 51 |         self.g = nx.MultiDiGraph()
 52 |         self.__load_rr(rr)
 53 | 
 54 |     def __str__(self):
 55 |         return self.to_json()
 56 | 
 57 |     @property
 58 |     def nodes(self):
 59 |         return self.g.nodes
 60 | 
 61 |     @property
 62 |     def edges(self):
 63 |         return self.g.edges
 64 | 
 65 |     @property
 66 |     def out_edges(self):
 67 |         return self.g.out_edges
 68 | 
 69 |     @property
 70 |     def in_edges(self):
 71 |         return self.g.in_edges
 72 | 
 73 |     def __load_rr(self, rr: Iterator[RR]):
 74 |         """
 75 |         This helper function is used to build the graph from csv files.
 76 |         It reads the individual rows from a tuple generator representing lines
 77 |         in the csv file.
 78 |         """
 79 | 
 80 |         def mk_edge(rr: RR):
 81 |             """
 82 |             Edge transformation function to bring the edge format from custom
 83 |             class RR to networkx tuple form (start, end, data).
 84 |             """
 85 |             return rr.start, rr.end, {'type': rr.rel_type}
 86 | 
 87 |         self.g.add_edges_from(map(mk_edge, list(rr)))
 88 | 
 89 |     def deepcopy(self) -> 'Graph':
 90 |         return copy.deepcopy(self)
 91 | 
 92 |     def merge(self, other_graph: 'Graph') -> 'Graph':
 93 |         """
 94 |         Wrapper function to merge the Networkx graph attributes
 95 |         of the custom Graph class.
 96 |         """
 97 |         return Graph.from_graph(nx.compose(self.g, other_graph.g))
 98 | 
 99 |     def get_edge_data(self, u: str, v: str, key: str = None, default: dict = None):
100 |         """
101 |         Wrapper function to retrieve data associated with the specified edge.
102 |         """
103 |         default = {} if not default else default
104 |         return self.g.get_edge_data(u, v, key, default)
105 | 
106 |     def get_edge_types(self, u: str, v: str) -> list:
107 |         """
108 |         Wrapper function to retrieve edge type.
109 |         """
110 |         return [e['type'] for e in self.get_edge_data(u, v).values()]
111 | 
112 |     def get_direct_parent(self, node: str) -> str:
113 |         """
114 |         This function retrieves the direct parent of a given edge
115 |         based on the edge relation type.
116 |         """
117 |         for e in self.out_edges(node):
118 |             if 'IS_DIRECTLY_CONSOLIDATED_BY' in self.get_edge_types(e[0], e[1]):
119 |                 return e[1]
120 | 
121 |     def get_ultimate_parent(self, node: str) -> str:
122 |         """
123 |         This function retrieves the ultimate parent of a given edge
124 |         based on the edge relation type.
125 |         """
126 |         for e in self.out_edges(node):
127 |             if 'IS_ULTIMATELY_CONSOLIDATED_BY' in self.get_edge_types(e[0], e[1]):
128 |                 return e[1]
129 | 
130 |     def remove_edge_type(self, rel_type: str):
131 |         """
132 |         This function removes all edges of a given type from the graph.
133 |         It updates the graph object inplace.
134 |         """
135 |         remove = [(u, v, key) for (u, v, key) in self.edges if self.get_edge_data(u, v, key=key)['type'] == rel_type]
136 |         self.g.remove_edges_from(remove)
137 |         return self
138 | 
139 |     def has_direct_parent(self, node: str) -> bool:
140 |         """
141 |         Convenience function to check if a node has a direct parent.
142 |         """
143 |         return self.get_direct_parent(node) is not None
144 | 
145 |     def has_ultimate_parent(self, node: str) -> bool:
146 |         """
147 |         Convenience function to check if a node has a ultimate parent.
148 |         """
149 |         return self.get_ultimate_parent(node) is not None
150 | 
151 |     def connected_nodes(self, lei: str) -> KeysView:
152 |         """
153 |         This function transforms the graph to a undirected version of itself
154 |         and finds all connected nodes for a given LEI identifier, based on all
155 |         computable paths from the LEI node.
156 |         """
157 |         # NOTE: Convertinv graph to undirected, in order to easily get all connected nodes regardless of edge direction
158 |         #       (i.e. including inbound connections)
159 |         return nx.single_source_shortest_path(self.g.to_undirected(), lei).keys()
160 | 
161 |     def get_shortest_direct_parent_path_lengths(self, reference_node: str) -> dict:
162 |         """
163 |         This function computes the path lengths from a given reference to all
164 |         other via direct parent edges reachable nodes. It does NOT convert the graph to a undirected
165 |         version before and respects directions. Dict form is {node_id: distance}.
166 |         """
167 |         g = self.deepcopy().remove_edge_type(RR.ULTIMATE)  # TODO: What about BRANCH?
168 |         return dict(nx.single_target_shortest_path_length(g.g, reference_node))
169 | 
170 |     def sub(self, lei: str) -> 'Graph':
171 |         """
172 |         This function subsets the graph based on the nodes connected with the
173 |         given LEI node.
174 |         """
175 |         self.g.add_node(lei)  # Add dummy node
176 |         nodes = self.connected_nodes(lei)
177 |         return Graph.from_graph(self.g.subgraph(nodes))
178 | 
179 |     def get_node_label(self, lei: str) -> str:
180 |         """
181 |         Wrapper function to retrieve the legal name of an entity based
182 |         on its LEI from the lookup table attached to the graph.
183 |         """
184 |         try:
185 |             return self.lookup_table.loc[lei]['Entity.LegalName']
186 |         except KeyError:
187 |             return 'id not found'
188 | 
189 |     def transform_node(self, node: dict) -> dict:
190 |         """
191 |         Convenience function to rename node dictionary keys for final return array.
192 |         """
193 |         return {
194 |             'id': node['id'],
195 |             'title': node['id'],
196 |             'label': self.get_node_label(node['id']),
197 |             'level': node.get('level'),
198 |             'no_parent': node.get('no_parent'),
199 |         }
200 | 
201 |     def transform_link(self, link: dict) -> dict:
202 |         """
203 |         Convenience function to rename edge dictionary keys for final return array.
204 |         """
205 |         return {
206 |             'from': link['source'],
207 |             'to': link['target'],
208 |             'label': link['type'],
209 |         }
210 | 
211 |     def to_array(self) -> dict:
212 |         """
213 |         Convenience function for preparing the graph data to json dump.
214 |         """
215 |         data = nx.node_link_data(self.g)
216 |         return {
217 |             'nodes': list(map(self.transform_node, data['nodes'])),
218 |             'edges': list(map(self.transform_link, data['links'])),
219 |         }
220 | 
221 |     def to_json(self):
222 |         return json.dumps(self.to_array(), indent=2)
223 | 
224 |     def set_levels(self, parent: str = None) -> 'Graph':
225 |         """
226 |         This function sets the levels on a graph as a node attribute.
227 |         """
228 |         subgraph = self
229 |         if parent:
230 |             distances = self._level_computation(subgraph=subgraph, root_node=parent)
231 |             distances = {
232 |                 node: {
233 |                     'level': distances[node] if distances[node] is not 'no_parent' else 1,
234 |                     'no_parent': False if distances[node] is not 'no_parent' else True
235 |                 } for node in distances
236 |             }
237 |             nx.set_node_attributes(subgraph.g, distances)
238 |         else:
239 |             print('No parent found. TODO')
240 |             raise ValueError
241 |         return subgraph
242 | 
243 |     @staticmethod
244 |     def _level_computation(subgraph, root_node: str) -> dict:
245 |         """
246 |         This function computes the levels with respect to the given root node by using
247 |         single target shortest path algorithm from networkx. It returns a dictionary
248 |         of node ids with the computed depth in the graph.
249 |         """
250 |         compute_graph = subgraph.remove_edge_type(rel_type=RR.ULTIMATE)
251 |         distances = nx.single_target_shortest_path_length(compute_graph.g, target=root_node)
252 |         distances = dict(distances)
253 |         distances.update({node: 'no_parent' for node in subgraph.nodes if not distances.get(node) and
254 |                           node is not root_node})
255 |         distances.update({root_node: 0})
256 |         return distances
257 | 
258 |     @staticmethod
259 |     def from_graph(_g: nx.MultiDiGraph) -> 'Graph':
260 |         g = Graph([])
261 |         g.g = copy.deepcopy(_g)
262 |         return g
263 | 
264 |     @staticmethod
265 |     def set_lookup_table(f):
266 |         Graph.lookup_table = pd.read_csv(f, index_col=["LEI"], usecols=["LEI", "Entity.LegalName"])
267 | 
268 |     @staticmethod
269 |     def from_csv(f: str, limit: int = None) -> 'Graph':
270 |         return Graph(RR.from_csv_row(row) for row in iter_csv(f, limit))
271 | 


--------------------------------------------------------------------------------
/src/algorithms/graph_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from os import path
  3 | from graph import RR, Graph
  4 | import pandas as pd 
  5 | 
  6 | @pytest.fixture
  7 | def rr_test_csv(request):
  8 |     return path.join(request.config.rootdir, 'src/test_data', 'rr-test.csv')
  9 | 
 10 | @pytest.fixture
 11 | def lookup_test_csv(request):
 12 |     return path.join(request.config.rootdir, 'src/test_data', 'lei-test.csv')
 13 | 
 14 | def test_RR():
 15 |     rr = RR('LEI_1', 'LEI_2', RR.DIRECT)
 16 | 
 17 |     assert rr.start == 'LEI_1'
 18 |     assert rr.end == 'LEI_2'
 19 |     assert rr.rel_type == 'IS_DIRECTLY_CONSOLIDATED_BY'
 20 | 
 21 |     direct = RR('LEI_1', 'LEI_2', RR.DIRECT)
 22 |     assert direct.rel_type == 'IS_DIRECTLY_CONSOLIDATED_BY'
 23 | 
 24 |     ultimate = RR('LEI_1', 'LEI_2', RR.ULTIMATE)
 25 |     assert ultimate.rel_type == 'IS_ULTIMATELY_CONSOLIDATED_BY'
 26 | 
 27 |     branch = RR('LEI_1', 'LEI_2', RR.BRANCH)
 28 |     assert branch.rel_type == 'IS_INTERNATIONAL_BRANCH_OF'
 29 | 
 30 | def test_Graph_from_file(rr_test_csv):
 31 |     g = Graph.from_csv(rr_test_csv)
 32 | 
 33 |     assert list(g.nodes) == ['LEI_1', 'DIRECT_PARENT_LEI', 'ULTIMATE_PARENT_LEI']
 34 |     assert list(g.edges) == [
 35 |             ('LEI_1', 'DIRECT_PARENT_LEI', 0),
 36 |             ('LEI_1', 'ULTIMATE_PARENT_LEI', 0),
 37 |     ]
 38 | 
 39 | def test_node_get_direct_and_ultimate_parent():
 40 |     g = Graph([])
 41 | 
 42 |     assert g.get_direct_parent('ROI') is None
 43 |     assert g.get_ultimate_parent('ROI') is None
 44 | 
 45 |     g = Graph([
 46 |         RR('ROI', 'P1', RR.DIRECT),
 47 |     ])
 48 |     assert g.get_direct_parent('ROI') == 'P1'
 49 |     assert g.get_ultimate_parent('ROI') is None
 50 | 
 51 |     g = Graph([
 52 |         RR('ROI', 'UP1', RR.ULTIMATE),
 53 |     ])
 54 | 
 55 |     assert g.get_direct_parent('ROI') is None
 56 |     assert g.get_ultimate_parent('ROI') == 'UP1'
 57 | 
 58 |     g = Graph([
 59 |         RR('ROI', 'P1', RR.DIRECT),
 60 |         RR('ROI', 'UP1', RR.ULTIMATE),
 61 |     ])
 62 | 
 63 |     assert g.get_direct_parent('ROI') == 'P1'
 64 |     assert g.get_ultimate_parent('ROI') == 'UP1'
 65 | 
 66 |     # Catches inconsistency (more than one direct/ultimate parent)
 67 |     # TODO: Do while initializing graph?
 68 | 
 69 |     #  g = Graph([
 70 |     #      RR('ROI', 'P1', RR.DIRECT),
 71 |     #      RR('ROI', 'P1', RR.DIRECT),
 72 |     #  ])
 73 |     #  try:
 74 |     #      g.get_direct_parent('ROI')
 75 |     #  except Exception as e:
 76 |     #      assert str(e) == 'Found more than one Direct Parent for node ROI'
 77 |     #      return
 78 |     #  assert False, 'Expected Exception due to multiple direct parents'
 79 | 
 80 | def test_node_has_direct_and_ultimate_parent():
 81 |     g = Graph([])
 82 | 
 83 |     assert not g.has_direct_parent('ROI')
 84 |     assert not g.has_ultimate_parent('ROI')
 85 | 
 86 |     g = Graph([
 87 |         RR('ROI', 'P1', RR.DIRECT),
 88 |         RR('ROI', 'UP1', RR.ULTIMATE),
 89 |     ])
 90 | 
 91 |     assert g.has_direct_parent('ROI')
 92 |     assert g.has_ultimate_parent('ROI')
 93 | 
 94 | def test_remove_edge_type():
 95 |     g = Graph([
 96 |         RR('ROI', 'P1', RR.DIRECT),
 97 |         RR('ROI', 'UP1', RR.ULTIMATE),
 98 |         RR('A', 'B', RR.ULTIMATE),
 99 |         RR('C', 'A', RR.ULTIMATE),
100 |         RR('C', 'A', RR.DIRECT),
101 |         RR('A', 'C', RR.ULTIMATE),
102 |     ])
103 | 
104 |     g.remove_edge_type(RR.ULTIMATE)
105 |     assert sorted(list(g.edges)) == [
106 |         ('C', 'A', 1),
107 |         ('ROI', 'P1', 0),
108 |     ]
109 | 
110 | def test_lookup_read_in(lookup_test_csv):
111 |     g = Graph([])
112 |     Graph.set_lookup_table(lookup_test_csv)
113 | 
114 |     assert isinstance(g.lookup_table, pd.DataFrame)
115 | 
116 | def test_lookup(rr_test_csv, lookup_test_csv):
117 |     g = Graph.from_csv(rr_test_csv)
118 |     Graph.set_lookup_table(lookup_test_csv)
119 |     assert g.lookup_table.shape[0] == 3
120 |     assert g.get_node_label("LEI_1") == "company1"
121 | 
122 | def test_node_not_found_in_G(rr_test_csv, lookup_test_csv):
123 |     g = Graph.from_csv(rr_test_csv)
124 |     Graph.set_lookup_table(lookup_test_csv)
125 | 
126 |     a = g.sub('LEI_2').to_array()
127 |     nodes = a['nodes']
128 |     edges = a['edges']
129 | 
130 |     assert g.lookup_table.shape[0] == 3    
131 |     assert edges == []
132 |     assert nodes == [{
133 |         'label': 'company2',
134 |         'title': 'LEI_2',
135 |         'id': 'LEI_2',
136 |         'level': None,
137 |         'no_parent': None,
138 |     }]
139 | 
140 | def test_node_not_found_in_G_and_lookup(rr_test_csv, lookup_test_csv):
141 |     g = Graph.from_csv(rr_test_csv)
142 |     g.set_lookup_table(lookup_test_csv)
143 | 
144 |     a = g.sub('LEI_NOT_FOUND').to_array()
145 |     nodes = a['nodes']
146 |     edges = a['edges']
147 | 
148 |     assert edges == []
149 |     assert nodes == [{
150 |         'label': 'id not found',
151 |         'title': 'LEI_NOT_FOUND',
152 |         'id': 'LEI_NOT_FOUND',
153 |         'level': None,
154 |         'no_parent': None,
155 |     }]
156 | 
157 | def test_node_found_in_G(rr_test_csv, lookup_test_csv):
158 |     g = Graph.from_csv(rr_test_csv)
159 |     g.set_lookup_table(lookup_test_csv)
160 |     a = g.sub('LEI_1').to_array()
161 |     nodes = a['nodes']
162 |     edges = a['edges']
163 | 
164 |     labels = [n['label'] for n in nodes]
165 |     assert len(edges) == 2
166 |     assert len(nodes) == 3
167 |     assert 'company1' in labels
168 | 
169 | def test_Graph_to_array(lookup_test_csv):
170 |     g = Graph([
171 |         RR('LEI_1', 'LEI_2', RR.DIRECT),
172 |         #  RR('LEI_1', 'LEI_3', RR.ULTIMATE),
173 | 
174 |         # TODO: More cases
175 | 
176 |         #  RR('LEI_2', 'LEI_X', RR.DIRECT),
177 |         #  RR('LEI_X', 'LEI_1', RR.ULTIMATE),
178 | 
179 |         #  RR('LEI_A', 'LEI_X', RR.DIRECT),
180 |         #  RR('LEI_A', 'LEI_X', RR.ULTIMATE),
181 | 
182 |         #  RR('LEI_B', 'LEI_Z', RR.DIRECT),
183 |         #  RR('LEI_B', 'LEI_I', RR.ULTIMATE),
184 |     ])
185 |     print(lookup_test_csv)
186 |     Graph.set_lookup_table(lookup_test_csv)
187 | 
188 |     a = g.to_array()
189 |     nodes = a['nodes']
190 |     edges = a['edges']
191 | 
192 |     assert type(nodes) is list and len(nodes) > 0
193 |     assert type(edges) is list and len(edges) > 0
194 | 
195 |     assert nodes == [
196 |         {
197 |             'id': 'LEI_1',
198 |             'title': 'LEI_1',
199 |             'label': 'company1',
200 |             'level': None,
201 |             'no_parent': None,
202 |         },
203 |         {
204 |             'id': 'LEI_2',
205 |             'title': 'LEI_2',
206 |             'label': 'company2',
207 |             'level': None,
208 |             'no_parent': None,
209 |         }
210 |     ]
211 | 
212 |     assert edges == [
213 |         {
214 |             'from': 'LEI_1',
215 |             'to': 'LEI_2',
216 |             'label': 'IS_DIRECTLY_CONSOLIDATED_BY',
217 |         },
218 |     ]
219 | 
220 | def test_Graph_subgraphs():
221 |     g = Graph([
222 | 
223 |         # Parent chain
224 |         RR('LEI_1', 'LEI_2', RR.DIRECT),
225 |         RR('LEI_2', 'LEI_3', RR.DIRECT),
226 |         RR('LEI_3', 'LEI_4', RR.DIRECT),
227 | 
228 |         # Isolated 2-node-graph
229 |         RR('LEI_SOLO_A', 'LEI_SOLO_B', RR.DIRECT),
230 | 
231 |         # Multiple/duplicate edges
232 |         RR('LEI_A', 'LEI_B', RR.DIRECT),
233 |         RR('LEI_A', 'LEI_B', RR.DIRECT),  # duplicates will end up unique
234 |         RR('LEI_A', 'LEI_B', RR.DIRECT),  # duplicates will end up unique
235 |         RR('LEI_A', 'LEI_B', RR.DIRECT),  # duplicates will end up unique
236 | 
237 |         RR('LEI_B', 'LEI_A', RR.DIRECT),  # opposite direction
238 | 
239 |         RR('LEI_A', 'LEI_B', RR.ULTIMATE),  # ultimate same as direct
240 | 
241 |         # multiple (some same) ultimate/direct
242 |         RR('LEI_A', 'LEI_B', RR.DIRECT),
243 |         RR('LEI_A', 'LEI_C', RR.DIRECT),
244 |         RR('LEI_A', 'LEI_D', RR.ULTIMATE),
245 | 
246 |         RR('LEI_D', 'LEI_E', RR.BRANCH),
247 | 
248 | 
249 |         # Complex...
250 |         #     B   D
251 |         #    / \ /
252 |         #   A   C   F
253 |         #      / \ /
254 |         #     E  (X)  K
255 |         #        / \ /
256 |         #   J   G   H
257 |         #    \ /
258 |         #     I
259 |         RR('A', 'B', RR.DIRECT),
260 |         RR('C', 'B', RR.DIRECT),
261 |         RR('C', 'D', RR.ULTIMATE),
262 |         RR('E', 'C', RR.ULTIMATE),
263 |         RR('X', 'C', RR.DIRECT),
264 |         RR('X', 'F', RR.DIRECT),
265 |         RR('G', 'X', RR.BRANCH),
266 |         RR('H', 'X', RR.DIRECT),
267 |         RR('H', 'K', RR.DIRECT),
268 |         RR('I', 'G', RR.DIRECT),
269 |         RR('I', 'J', RR.DIRECT),
270 | 
271 |     ])
272 | 
273 |     # CASE: Grab start of chain
274 |     a = g.sub('LEI_1').to_array()
275 |     assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4']
276 |     assert sorted([(node['from'], node['to']) for node in a['edges']]) == [
277 | 
278 |         # NOTE: Always two edges (e.g. direct parent / direct child)
279 | 
280 |         ('LEI_1', 'LEI_2'),
281 |         #  ('LEI_2', 'LEI_1'),
282 | 
283 |         ('LEI_2', 'LEI_3'),
284 |         #  ('LEI_3', 'LEI_2'),
285 | 
286 |         ('LEI_3', 'LEI_4'),
287 |         #  ('LEI_4', 'LEI_3'),
288 |     ]
289 | 
290 |     # CASE: Grab middle of chain
291 |     a = g.sub('LEI_2').to_array()
292 |     assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4']
293 | 
294 |     # CASE: Grab end of chain
295 |     a = g.sub('LEI_4').to_array()
296 |     assert sorted([node['id'] for node in a['nodes']]) == ['LEI_1', 'LEI_2', 'LEI_3', 'LEI_4']
297 | 
298 |     # CASE: Grab different isolated subgraph
299 |     a = g.sub('LEI_SOLO_A').to_array()
300 |     #  assert sorted([node['id'] for node in a['nodes']]) == ['LEI_SOLO_A', 'LEI_SOLO_B']
301 | 
302 |     # CASE: Mixed
303 |     a = g.sub('LEI_A').to_array()
304 |     assert sorted([node['id'] for node in a['nodes']]) == ['LEI_A', 'LEI_B', 'LEI_C', 'LEI_D', 'LEI_E']
305 | 
306 |     # CASE: Complex
307 |     #     B   D
308 |     #    / \ /
309 |     #   A   C   F
310 |     #      / \ /
311 |     #     E  (X)  K
312 |     #        / \ /
313 |     #   J   G   H
314 |     #    \ /
315 |     #     I
316 |     a = g.sub('X').to_array()
317 |     assert sorted([node['id'] for node in a['nodes']]) == ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'X']
318 |     assert sorted([(node['from'], node['to']) for node in a['edges']]) == [
319 | 
320 |         ('A', 'B'),
321 |         ('C', 'B'),
322 |         ('C', 'D'),
323 |         ('E', 'C'),
324 |         ('G', 'X'),
325 |         ('H', 'K'),
326 |         ('H', 'X'),
327 |         ('I', 'G'),
328 |         ('I', 'J'),
329 |         ('X', 'C'),
330 |         ('X', 'F'),
331 |     ]
332 | 
333 |     # TODO: Test edges
334 | 
335 | @pytest.mark.skip("Direction not implemented")
336 | def test_Graph_direction():
337 |     assert False, "TODO: Implement MultiDiGraph"
338 | 


--------------------------------------------------------------------------------