├── .gitattributes
├── datasheets
    ├── DAWN
    │   ├── stats.png
    │   └── README.md
    ├── InVS13
    │   └── stats.png
    ├── InVS15
    │   └── stats.png
    ├── diseasome
    │   ├── stats.png
    │   └── README_diseasome.md
    ├── email-eu
    │   ├── stats.png
    │   └── README_email-eu.md
    ├── NDC-classes
    │   ├── stats.png
    │   └── README.md
    ├── arxiv-kaggle
    │   └── stats.png
    ├── coauth-dblp
    │   ├── stats.png
    │   └── README_coauth-DBLP.md
    ├── disgenenet
    │   ├── stats.png
    │   └── README_disgenenet.md
    ├── email-enron
    │   ├── stats.png
    │   └── README_email-enron.md
    ├── house-bills
    │   └── stats.png
    ├── recipe-rec
    │   └── stats.png
    ├── senate-bills
    │   └── stats.png
    ├── tags-math-sx
    │   ├── stats.png
    │   └── README_tags-math-sx.md
    ├── NDC-substances
    │   ├── stats.png
    │   └── README-NDC-substances.md
    ├── congress-bills
    │   ├── stats.png
    │   └── README_congress-bills.md
    ├── hospital-lyon
    │   ├── stats.png
    │   └── README_hospital-lyon.md
    ├── malawi-village
    │   └── stats.png
    ├── house-committees
    │   └── stats.png
    ├── science-gallery
    │   └── stats.png
    ├── senate-committees
    │   └── stats.png
    ├── sfhh-conference
    │   └── stats.png
    ├── tags-ask-ubuntu
    │   ├── stats.png
    │   └── README_tags-ask-ubuntu.md
    ├── threads-math-sx
    │   └── stats.png
    ├── coauth-mag-geology
    │   ├── stats.png
    │   └── README_coauth-MAG-Geology.md
    ├── coauth-mag-history
    │   ├── stats.png
    │   └── README_coauth-MAG-History.md
    ├── contact-high-school
    │   ├── stats.png
    │   └── README_contact-high-school.md
    ├── eventernote-events
    │   └── stats.png
    ├── eventernote-places
    │   └── stats.png
    ├── tags-stack-overflow
    │   ├── stats.png
    │   └── README_tags-stack-overflow.md
    ├── threads-ask-ubuntu
    │   └── stats.png
    ├── contact-primary-school
    │   ├── stats.png
    │   └── README_contact-primary-school.md
    ├── hypertext-conference
    │   └── stats.png
    ├── kaggle-whats-cooking
    │   └── stats.png
    ├── threads-stack-overflow
    │   └── stats.png
    ├── plant-pollinator-mpl-014
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-014.md
    ├── plant-pollinator-mpl-015
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-015.md
    ├── plant-pollinator-mpl-016
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-016.md
    ├── plant-pollinator-mpl-021
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-021.md
    ├── plant-pollinator-mpl-034
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-034.md
    ├── plant-pollinator-mpl-044
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-044.md
    ├── plant-pollinator-mpl-046
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-046.md
    ├── plant-pollinator-mpl-049
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-049.md
    ├── plant-pollinator-mpl-057
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-057.md
    └── plant-pollinator-mpl-062
    │   ├── stats.png
    │   └── README_plant-pollinator-mpl-062md
├── code
    ├── import_recipe-rec.py
    ├── import_arxiv-kaggle.py
    ├── import_plant-pollinator-mpl-014.py
    ├── import_plant-pollinator-mpl-015.py
    ├── import_plant-pollinator-mpl-016.py
    ├── import_plant-pollinator-mpl-021.py
    ├── import_plant-pollinator-mpl-034.py
    ├── import_plant-pollinator-mpl-044.py
    ├── import_plant-pollinator-mpl-046.py
    ├── import_plant-pollinator-mpl-049.py
    ├── import_plant-pollinator-mpl-057.py
    ├── import_plant-pollinator-mpl-062.py
    ├── import_InVS15.py
    ├── import_SFHH-conference.py
    ├── import_hypertext-conference.py
    ├── import_email-eu.py
    ├── import_contact-high-school.py
    ├── import_contact-primary-school.py
    ├── import_InVS13.py
    ├── import_hospital-lyon.py
    ├── import_malawi-village.py
    ├── import_NDC-substances.py
    ├── import_threads-ask-ubuntu.py
    ├── import_threads-math-sx.py
    ├── import_science-gallery.py
    ├── import_hyperbard.py
    ├── import_threads-stack-overflow.py
    ├── import_email-enron.py
    ├── import_DAWN.py
    ├── import_tags-ask-ubuntu.py
    ├── import_coauth-DBLP.py
    ├── import_congress-bills.py
    ├── import_kaggle-whats-cooking.py
    ├── import_tags-math-sx.py
    ├── import_tags-stack-overflow.py
    ├── import_coauth-MAG-Geology.py
    ├── import_coauth-MAG-History.py
    ├── inspect_json.py
    ├── import_house-bills.py
    ├── import_senate-bills.py
    ├── import_house-committees.py
    ├── import_senate-committees.py
    ├── import_NDC-classes.py
    ├── import_diseasome.py
    ├── utilities.py
    ├── import_eventernote-events.py
    └── import_eventernote-places.py
├── LICENSE
├── CITATION.cff
├── HOW_TO_CONTRIBUTE.md
├── .gitignore
├── README.md
├── get_stats.ipynb
└── index.json


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/datasheets/DAWN/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/DAWN/stats.png


--------------------------------------------------------------------------------
/datasheets/InVS13/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/InVS13/stats.png


--------------------------------------------------------------------------------
/datasheets/InVS15/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/InVS15/stats.png


--------------------------------------------------------------------------------
/datasheets/diseasome/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/diseasome/stats.png


--------------------------------------------------------------------------------
/datasheets/email-eu/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/email-eu/stats.png


--------------------------------------------------------------------------------
/datasheets/NDC-classes/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/NDC-classes/stats.png


--------------------------------------------------------------------------------
/datasheets/arxiv-kaggle/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/arxiv-kaggle/stats.png


--------------------------------------------------------------------------------
/datasheets/coauth-dblp/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/coauth-dblp/stats.png


--------------------------------------------------------------------------------
/datasheets/disgenenet/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/disgenenet/stats.png


--------------------------------------------------------------------------------
/datasheets/email-enron/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/email-enron/stats.png


--------------------------------------------------------------------------------
/datasheets/house-bills/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/house-bills/stats.png


--------------------------------------------------------------------------------
/datasheets/recipe-rec/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/recipe-rec/stats.png


--------------------------------------------------------------------------------
/datasheets/senate-bills/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/senate-bills/stats.png


--------------------------------------------------------------------------------
/datasheets/tags-math-sx/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/tags-math-sx/stats.png


--------------------------------------------------------------------------------
/datasheets/NDC-substances/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/NDC-substances/stats.png


--------------------------------------------------------------------------------
/datasheets/congress-bills/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/congress-bills/stats.png


--------------------------------------------------------------------------------
/datasheets/hospital-lyon/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/hospital-lyon/stats.png


--------------------------------------------------------------------------------
/datasheets/malawi-village/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/malawi-village/stats.png


--------------------------------------------------------------------------------
/datasheets/house-committees/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/house-committees/stats.png


--------------------------------------------------------------------------------
/datasheets/science-gallery/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/science-gallery/stats.png


--------------------------------------------------------------------------------
/datasheets/senate-committees/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/senate-committees/stats.png


--------------------------------------------------------------------------------
/datasheets/sfhh-conference/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/sfhh-conference/stats.png


--------------------------------------------------------------------------------
/datasheets/tags-ask-ubuntu/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/tags-ask-ubuntu/stats.png


--------------------------------------------------------------------------------
/datasheets/threads-math-sx/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/threads-math-sx/stats.png


--------------------------------------------------------------------------------
/datasheets/coauth-mag-geology/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/coauth-mag-geology/stats.png


--------------------------------------------------------------------------------
/datasheets/coauth-mag-history/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/coauth-mag-history/stats.png


--------------------------------------------------------------------------------
/datasheets/contact-high-school/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/contact-high-school/stats.png


--------------------------------------------------------------------------------
/datasheets/eventernote-events/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/eventernote-events/stats.png


--------------------------------------------------------------------------------
/datasheets/eventernote-places/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/eventernote-places/stats.png


--------------------------------------------------------------------------------
/datasheets/tags-stack-overflow/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/tags-stack-overflow/stats.png


--------------------------------------------------------------------------------
/datasheets/threads-ask-ubuntu/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/threads-ask-ubuntu/stats.png


--------------------------------------------------------------------------------
/datasheets/contact-primary-school/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/contact-primary-school/stats.png


--------------------------------------------------------------------------------
/datasheets/hypertext-conference/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/hypertext-conference/stats.png


--------------------------------------------------------------------------------
/datasheets/kaggle-whats-cooking/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/kaggle-whats-cooking/stats.png


--------------------------------------------------------------------------------
/datasheets/threads-stack-overflow/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/threads-stack-overflow/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-014/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-014/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-015/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-015/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-016/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-016/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-021/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-021/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-034/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-034/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-044/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-044/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-046/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-046/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-049/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-049/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-057/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-057/stats.png


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-062/stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xgi-org/xgi-data/HEAD/datasheets/plant-pollinator-mpl-062/stats.png


--------------------------------------------------------------------------------
/code/import_recipe-rec.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import xgi
 4 | 
 5 | data_folder = "data"
 6 | dataset_name = "recipe-rec"
 7 | filename = "recipes_weighted_and_USDAmapped.json"
 8 | new_dataset_name = "recipe-rec"
 9 | 
10 | with open(f"{data_folder}/{dataset_name}/{filename}") as file:
11 |     jsondata = json.loads(file.read())
12 | 
13 | H = xgi.Hypergraph()
14 | for entry in jsondata:
15 |     e = set()
16 |     for item in entry["ingredients"]:
17 |         e.add(item["text"])
18 |     H.add_edge(e, id=entry["id"], name=entry["title"], url=entry["url"])
19 | 
20 | H["name"] = "recipe-rec"
21 | 
22 | xgi.write_hif(H, f"{data_folder}/{dataset_name}/{new_dataset_name}.json")
23 | 


--------------------------------------------------------------------------------
/code/import_arxiv-kaggle.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import xgi
 4 | 
 5 | H = xgi.Hypergraph()
 6 | with open("arxiv-metadata-oai-snapshot.json") as f:
 7 |     for line in f:
 8 |         try:
 9 |             entry = json.loads(line)
10 |         except:
11 |             break
12 |         edge = []
13 |         for author in entry["authors_parsed"]:
14 |             edge.append(
15 |                 f"{author[1]} {author[2] + " " if author[2] else ""}{author[0]}"
16 |             )
17 |         H.add_edge(
18 |             edge,
19 |             idx=entry["id"],
20 |             submitter=entry["submitter"],
21 |             title=entry["title"],
22 |             comments=entry["comments"],
23 |             categories=entry["categories"].split(" "),
24 |             abstract=entry["abstract"],
25 |             date=entry["update_date"],
26 |         )
27 | 
28 | xgi.write_hif(H, "arxiv.json")
29 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-014.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-014"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_014.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-015.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-015"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_015.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-016.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-016"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_016.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-021.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-021"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_021.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-034.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-034"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_034.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-044.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-044"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_044.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-046.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-046"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_046.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-049.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-049"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_049.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-057.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-057"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_057.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | H = xgi.Hypergraph(incidence_matrix_df.values)
20 | 
21 | plant_dict = {i: species for i, species in enumerate(plants)}
22 | 
23 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
24 | 
25 | H.set_node_attributes(plant_dict, name="plant")
26 | H.set_edge_attributes(pollinator_dict, name="pollinator")
27 | 
28 | H["name"] = dataset_name
29 | 
30 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
31 | 


--------------------------------------------------------------------------------
/code/import_plant-pollinator-mpl-062.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | data_folder = "data/"
 9 | dataset_name = "plant-pollinator-mpl-062"
10 | dataset_folder = f"{data_folder}{dataset_name}/"
11 | 
12 | incidence_file = f"{dataset_folder}/M_PL_062.csv"
13 | 
14 | incidence_matrix_df = pd.read_csv(incidence_file, index_col=0)
15 | 
16 | plants = list(incidence_matrix_df.index)
17 | pollinators = list(incidence_matrix_df.columns)
18 | 
19 | 
20 | H = xgi.Hypergraph(incidence_matrix_df.values)
21 | 
22 | plant_dict = {i: species for i, species in enumerate(plants)}
23 | 
24 | pollinator_dict = {i: species for i, species in enumerate(pollinators)}
25 | 
26 | H.set_node_attributes(plant_dict, name="plant")
27 | H.set_edge_attributes(pollinator_dict, name="pollinator")
28 | 
29 | H["name"] = dataset_name
30 | 
31 | xgi.write_json(H, f"{dataset_folder}{dataset_name}.json")
32 | 


--------------------------------------------------------------------------------
/code/import_InVS15.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | dataset_name = "InVS15"
 9 | data = pd.read_csv(
10 |     "data/InVS15/tij_InVS15.dat",
11 |     sep=" ",
12 |     header=0,
13 |     names=["time", "node1", "node2"],
14 | )
15 | 
16 | H = xgi.Hypergraph()
17 | H["name"] = "InVS15"
18 | 
19 | node_labels = utilities.readScHoLPLabels("data/InVS15/metadata_InVS15.txt", "\t")
20 | 
21 | H.add_nodes_from(node_labels)
22 | 
23 | H.set_node_attributes(node_labels, name="department")
24 | start_time = datetime(2013, 6, 24)
25 | 
26 | for t in data["time"].unique():
27 |     time = timedelta(seconds=int(t))
28 |     d = data[data.time == t]
29 |     links = d[["node1", "node2"]].astype(str).values.tolist()
30 |     G = nx.Graph(links)
31 |     for e in nx.find_cliques(G):
32 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
33 | 
34 | xgi.write_json(H, "data/InVS15/InVS15.json")
35 | 


--------------------------------------------------------------------------------
/code/import_SFHH-conference.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import xgi
 6 | 
 7 | dataset_name = "SFHH-conference"
 8 | data = pd.read_csv(
 9 |     "data/SFHH/tij_SFHH.dat",
10 |     sep=" ",
11 |     header=0,
12 |     names=["time", "node1", "node2"],
13 | )
14 | 
15 | H = xgi.Hypergraph()
16 | H["name"] = "SFHH-conference"
17 | 
18 | nodes1 = data["node1"].values.tolist()
19 | nodes2 = data["node2"].values.tolist()
20 | nodes = set()
21 | nodes.update(set(nodes1))
22 | nodes.update(set(nodes2))
23 | 
24 | H.add_nodes_from(nodes)
25 | 
26 | start_time = datetime(2009, 6, 4)
27 | 
28 | for t in data["time"].unique():
29 |     time = timedelta(seconds=int(t))
30 |     d = data[data.time == t]
31 |     links = d[["node1", "node2"]].values.tolist()
32 |     G = nx.Graph(links)
33 |     for e in nx.find_cliques(G):
34 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
35 | 
36 | 
37 | xgi.write_json(H, "data/SFHH/sfhh-conference.json")
38 | 


--------------------------------------------------------------------------------
/code/import_hypertext-conference.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import xgi
 6 | 
 7 | dataset_name = "Hypertext-conference"
 8 | data = pd.read_csv(
 9 |     "data/Hypertext2009/ht09_contact_list.dat",
10 |     sep="\t",
11 |     header=0,
12 |     names=["time", "node1", "node2"],
13 | )
14 | 
15 | H = xgi.Hypergraph()
16 | H["name"] = dataset_name
17 | 
18 | nodes1 = data["node1"].values.tolist()
19 | nodes2 = data["node2"].values.tolist()
20 | nodes = set()
21 | nodes.update(set(nodes1))
22 | nodes.update(set(nodes2))
23 | 
24 | H.add_nodes_from(nodes)
25 | 
26 | start_time = datetime(2009, 6, 29, 8, 0, 0)
27 | 
28 | for t in data["time"].unique():
29 |     time = timedelta(seconds=int(t))
30 |     d = data[data.time == t]
31 |     links = d[["node1", "node2"]].values.tolist()
32 |     G = nx.Graph(links)
33 |     for e in nx.find_cliques(G):
34 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
35 | 
36 | 
37 | xgi.write_json(H, f"data/Hypertext2009/hypertext-conference.json")
38 | 


--------------------------------------------------------------------------------
/code/import_email-eu.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "email-Eu"
10 | 
11 | dataset_folder = "email-Eu"
12 | size_file = "email-Eu-full-nverts.txt"
13 | member_file = "email-Eu-full-simplices.txt"
14 | times_file = "email-Eu-full-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
19 | 
20 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
21 | 
22 | H = xgi.Hypergraph(edgelist)
23 | H["name"] = dataset_name
24 | 
25 | delimiter = "\t"
26 | 
27 | edge_times = utilities.read_SCHOLP_dates(
28 |     edge_times_file, reference_time=datetime(1970, 1, 1), time_unit="seconds"
29 | )
30 | 
31 | for label, date in edge_times.items():
32 |     H.edges[label].update({"timestamp": date})
33 | 
34 | 
35 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
36 | 


--------------------------------------------------------------------------------
/datasheets/coauth-dblp/README_coauth-DBLP.md:
--------------------------------------------------------------------------------
 1 | # coauth-DBLP
 2 | 
 3 | ## Summary 
 4 | 
 5 | This is a temporal higher-order network dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. In this dataset, nodes are authors, and a hyperedge is a publication recorded on DBLP. Timestamps are the year of publication. 
 6 | 
 7 | ## Statistics
 8 | 
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 1,924,991
11 | * number of timestamped hyperedges: 3,700,067
12 | * number of unique hyperedges: 2,599,087
13 | 
14 | ## Changelog
15 | 
16 | * v0.1: fixed year format with PR  
17 | * v0: initial version
18 | 
19 | ## Source of original data
20 | 
21 | Source: [coauth-DBLP dataset](https://www.cs.cornell.edu/~arb/data/coauth-DBLP/)
22 | 
23 | ## References
24 | 
25 | If you use this data, please cite the following paper:
26 | 
27 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). 
28 | Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. 
29 | Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/code/import_contact-high-school.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "contact-high-school"
10 | 
11 | dataset_folder = "contact-high-school"
12 | size_file = "contact-high-school-nverts.txt"
13 | member_file = "contact-high-school-simplices.txt"
14 | times_file = "contact-high-school-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
19 | 
20 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
21 | 
22 | H = xgi.Hypergraph(edgelist)
23 | H["name"] = "contact-high-school"
24 | 
25 | edge_times = utilities.read_SCHOLP_dates(
26 |     edge_times_file, reference_time=datetime(1970, 1, 1), time_unit="seconds"
27 | )
28 | 
29 | for label, date in edge_times.items():
30 |     H.edges[label].update({"timestamp": date})
31 | 
32 | 
33 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
34 | 


--------------------------------------------------------------------------------
/code/import_contact-primary-school.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "contact-primary-school"
10 | 
11 | dataset_folder = "contact-primary-school"
12 | size_file = "contact-primary-school-nverts.txt"
13 | member_file = "contact-primary-school-simplices.txt"
14 | times_file = "contact-primary-school-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
19 | 
20 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
21 | 
22 | H = xgi.Hypergraph(edgelist)
23 | H["name"] = dataset_name
24 | 
25 | edge_times = utilities.read_SCHOLP_dates(
26 |     edge_times_file, reference_time=datetime(1970, 1, 1), time_unit="seconds"
27 | )
28 | 
29 | for label, date in edge_times.items():
30 |     H.edges[label].update({"timestamp": date})
31 | 
32 | 
33 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
34 | 


--------------------------------------------------------------------------------
/datasheets/disgenenet/README_disgenenet.md:
--------------------------------------------------------------------------------
 1 | # disgenenet
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a dataset of genes and the diseases associated with them. In this dataset, a gene is a node and a disease is a hyperedge. The "symbol" attribute of the nodes gives the gene name and the "name" attribute of the edges gives the disease name.
 6 | 
 7 | ## Statistics
 8 | Some basic statistics of this dataset are:
 9 | * number of nodes: 12368
10 | * number of hyperedges: 2261
11 | * The dataset is connected
12 | * degree and edge size distributions:
13 | <center>
14 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
15 | </center>
16 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
17 | 
18 | ## Source of original data
19 | Source: [DisGeNET](https://www.disgenet.org/)
20 | 
21 | ## References
22 | If you use this dataset, please cite these references:
23 | * [The DisGeNET knowledge platform for disease genomics: 2019 update](https://doi.org/10.1093/nar/gkz1021). Janet Piñero, Juan Manuel Ramírez-Anguita, Josep Saüch-Pitarch, Francesco Ronzano, Emilio Centeno, Ferran Sanz, Laura I Furlong. Nucleic Acids Research, 2019.


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-049/README_plant-pollinator-mpl-049.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-049
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Denmark (latitude: 56.066667, longitude: 10.216667).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 37
11 | * number of hyperedges: 225
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 37 | 1 |
18 | </center>
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_049.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Bek S (2006). A pollination network from a Danish forest meadow. MSc thesis (Univ of Aarhus, Aarhus, Denmark).
32 | 


--------------------------------------------------------------------------------
/code/import_InVS13.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import utilities
 6 | import xgi
 7 | 
 8 | dataset_name = "InVS13"
 9 | data = pd.read_csv(
10 |     "data/InVS13/tij_InVS.dat",
11 |     sep=" ",
12 |     header=0,
13 |     names=["time", "node1", "node2"],
14 | )
15 | 
16 | H = xgi.Hypergraph()
17 | H["name"] = "InVS13"
18 | 
19 | nodes1 = data["node1"].astype(str).values.tolist()
20 | nodes2 = data["node2"].astype(str).values.tolist()
21 | nodes = set()
22 | nodes.update(set(nodes1))
23 | nodes.update(set(nodes2))
24 | 
25 | H.add_nodes_from(nodes)
26 | node_labels = utilities.readScHoLPLabels("data/InVS13/metadata_InVS13.txt", "\t")
27 | H.set_node_attributes(node_labels, name="department")
28 | 
29 | start_time = datetime(2013, 6, 24)
30 | 
31 | for t in data["time"].unique():
32 |     time = timedelta(seconds=int(t))
33 |     d = data[data.time == t]
34 |     links = d[["node1", "node2"]].astype(str).values.tolist()
35 |     G = nx.Graph(links)
36 |     for e in nx.find_cliques(G):
37 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
38 | 
39 | xgi.write_json(H, "data/InVS13/InVS13.json")
40 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-046/README_plant-pollinator-mpl-046.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-046
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Denmark (latitude: 56.066667 longitude: 10.233333).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 16
11 | * number of hyperedges: 44
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 16 | 1 |
18 | |||
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_046.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Bundgaard, M. (2003). Tidslig og rumlig variation i et plante-bestøvernetværk. Msc thesis. University of Aarhus. Aarhus, Denmark.
32 | 


--------------------------------------------------------------------------------
/datasheets/tags-stack-overflow/README_tags-stack-overflow.md:
--------------------------------------------------------------------------------
 1 | # tags-stack-overflow
 2 | 
 3 | ## Summary
 4 | 
 5 | This dataset is derived from tags on Stack Overflow posts. The raw data was
 6 | downloaded from
 7 | https://archive.org/details/stackexchange
 8 | 
 9 | Each simplex corresponds to all of the tags used in a post, and each node in a
10 | simplex corresponds to a tag. The times are the time of the post in millisecond
11 | but normalized so that the earliest tag starts at 0.
12 | 
13 | ## Statistics
14 | 
15 | Some basic statistics of this dataset are:
16 | * number of nodes: 49,998
17 | * number of timestamped simplices: 14,458,875
18 | * number of unique simplices: 5,675,497
19 | * number of edges in projected graph: 4,147,302
20 | 
21 | ## Data:
22 | 
23 | Source: [tags-stack-overflow dataset](https://www.cs.cornell.edu/~arb/data/tags-stack-overflow/)
24 | 
25 | ## References
26 | 
27 | If you use this data, please cite the following paper:
28 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-016/README_plant-pollinator-mpl-016.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-016
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Doñana Nat. Park, Spain (latitude: 37.016667, longitude: -6.55).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 26
11 | * number of hyperedges: 179
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 26 | 1 |
18 | </center>
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_016.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Herrera, J. (1988) [Pollination relatioships in southern spanish mediterranean shrublands.](https://www.jstor.org/stable/2260469) Journal of Ecology 76: 274-287.
32 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-014/README_plant-pollinator-mpl-014.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-014
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Hazen Camp, Ellesmere Island, Canada (latitude: 81.816667 , longitude: -71.3).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 29
11 | * number of hyperedges: 81
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 29 | 1 |
18 | |||
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_014.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Hocking, B. (1968). [Insect-flower associations in the high Arctic with special reference to nectar](https://doi.org/10.2307/3565022). *Oikos*, 359-387.
32 | 
33 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-062/README_plant-pollinator-mpl-062md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-062
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | 
 7 | 
 8 | Locality of study: Carlinville, Illinois, USA (latitude: 39.278958, longitude: -89.8968771).
 9 | 
10 | ## Statistics
11 | Some basic statistics of this dataset are:
12 | * number of nodes: 456
13 | * number of hyperedges: 1,044
14 | * distribution of the connected components:
15 | <center>
16 | 
17 | | Component Size  | Number |
18 | | ----- | ---- |
19 | | 456 | 1 |
20 | </center>
21 | 
22 | * degree and edge size distributions:
23 | <center>
24 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
25 | </center>
26 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
27 | 
28 | ## Source of original data
29 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_062.
30 | 
31 | ## References
32 | If you use this dataset, please cite these references:
33 | * Robertson, C. 1929. "Flowers and insects: lists of visitors to four hundred and fifty-three flowers". Carlinville, IL, USA, C. Robertson.
34 | 
35 | 


--------------------------------------------------------------------------------
/code/import_hospital-lyon.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "hospital-lyon"
10 | data = pd.read_csv(
11 |     "data/hospital-lyon/detailed_list_of_contacts_Hospital.dat",
12 |     sep="\t",
13 |     header=0,
14 |     names=["time", "node1", "node2", "type1", "type2"],
15 | )
16 | 
17 | H = xgi.Hypergraph()
18 | H["name"] = "hospital-lyon"
19 | 
20 | nodes1 = dict(zip(data["node1"].values.tolist(), data["type1"].values.tolist()))
21 | nodes2 = dict(zip(data["node2"].values.tolist(), data["type2"].values.tolist()))
22 | nodes = dict()
23 | nodes.update(nodes1)
24 | nodes.update(nodes2)
25 | 
26 | for node, nodetype in nodes.items():
27 |     H.add_node(node, type=nodetype)
28 | 
29 | start_time = datetime(2010, 12, 6, 13, 0, 0)
30 | 
31 | for t in data["time"].unique():
32 |     time = timedelta(seconds=int(t))
33 |     d = data[data.time == t]
34 |     links = d[["node1", "node2"]].values.tolist()
35 |     G = nx.Graph(links)
36 |     for e in nx.find_cliques(G):
37 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
38 | 
39 | 
40 | xgi.write_json(H, "data/hospital-lyon/hospital-lyon.json")
41 | 


--------------------------------------------------------------------------------
/datasheets/diseasome/README_diseasome.md:
--------------------------------------------------------------------------------
 1 | # diseasome
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a dataset of diseases and the genes associated with them. In this dataset, a disease is a node and a gene is a hyperedge. The "label" attribute of the nodes gives the disease description and the "label" attribute of the edges gives the gene name. The disease-disease correlations were filtered out to enforce only disease-gene relationships.
 6 | 
 7 | ## Statistics
 8 | Some basic statistics of this dataset are:
 9 | * number of nodes: 516
10 | * number of hyperedges: 903
11 | * The dataset is connected
12 | * degree and edge size distributions:
13 | <center>
14 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
15 | </center>
16 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
17 | 
18 | ## Source of original data
19 | Source: [Gephi](https://github.com/gephi/gephi.github.io/blob/master/datasets/diseasome.gexf.zip)
20 | 
21 | ## References
22 | If you use this dataset, please cite these references:
23 | * [The human disease network](https://doi.org/10.1073/pnas.0701361104). Kwang-Il Goh, Michael E. Cusick, David Valle, and Albert-László Barabási. Proceedings of the National Academy of Sciences (PNAS), 2007.


--------------------------------------------------------------------------------
/code/import_malawi-village.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta
 2 | 
 3 | import networkx as nx
 4 | import pandas as pd
 5 | import xgi
 6 | 
 7 | dataset_name = "Malawi-village"
 8 | data = pd.read_csv(
 9 |     "data/Malawi21/tnet_malawi_pilot.csv",
10 |     sep=",",
11 | )
12 | data["time"] = list(zip(data.day, data.contact_time))
13 | 
14 | H = xgi.Hypergraph()
15 | H["name"] = dataset_name
16 | 
17 | nodes1 = data["id1"].values.tolist()
18 | nodes2 = data["id2"].values.tolist()
19 | nodes = set()
20 | nodes.update(set(nodes1))
21 | nodes.update(set(nodes2))
22 | 
23 | H.add_nodes_from(nodes)
24 | 
25 | start_time = datetime(2019, 12, 22, 11, 31, 40)
26 | # this is calculated by finding when the day switches and then subtracting
27 | # the number of seconds elapsed at the switch. The data seems to start on
28 | # Dec. 22 and end on Jan. 4
29 | 
30 | for t in data["time"].unique():
31 |     days, sec = t
32 |     time = timedelta(seconds=int(sec))
33 |     d = data[data.time == t]
34 |     links = d[["id1", "id2"]].values.tolist()
35 |     G = nx.Graph(links)
36 |     for e in nx.find_cliques(G):
37 |         H.add_edge(e, timestamp=(start_time + time).isoformat())
38 |     print(sec)
39 | 
40 | 
41 | xgi.write_json(H, f"data/Malawi21/malawi-village.json")
42 | 


--------------------------------------------------------------------------------
/code/import_NDC-substances.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "NDC-substances-full"
 9 | new_dataset_name = "NDC-substances"
10 | 
11 | dataset_folder = "NDC-substances-full"
12 | size_file = f"{dataset_name}-nverts.txt"
13 | member_file = f"{dataset_name}-simplices.txt"
14 | labels_file = f"{dataset_name}-node-labels.txt"
15 | times_file = f"{dataset_name}-times.txt"
16 | 
17 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
18 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
19 | node_labels_file = os.path.join(data_folder, dataset_folder, labels_file)
20 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
21 | 
22 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
23 | 
24 | H = xgi.Hypergraph(edgelist)
25 | H["name"] = new_dataset_name
26 | 
27 | delimiter = " "
28 | 
29 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
30 | 
31 | H.add_nodes_from(list(node_labels.keys()))
32 | 
33 | for label, name in node_labels.items():
34 |     H.nodes[label].update({"name": name})
35 | 
36 | 
37 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
38 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-015/README_plant-pollinator-mpl-015.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-015
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Daphní, Athens, Greece (latitude: 38.014466, longitude: 23.635043).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 131
11 | * number of hyperedges: 666
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 130 | 1 |
18 | |1|1|
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_015.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Petanidou, T. (1991). [Pollination ecology in a phryganic ecosystem](https://thesis.ekt.gr/thesisBookReader/id/10184#page/1/mode/2up). Unp. PhD. Thesis, Aristotelian University, Thessaloniki.
32 | 
33 | 


--------------------------------------------------------------------------------
/datasheets/tags-math-sx/README_tags-math-sx.md:
--------------------------------------------------------------------------------
 1 | # tags-ask-ubuntu
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal higher-order network dataset, which here means a sequence of timestamped simplices where each simplex is a set of nodes. In this dataset, nodes are tags and simplices are the sets of tags applied to questions on math.stackexchange.com. 
 6 | 
 7 | Each simplex corresponds to all of the tags used in a post, and each node in a
 8 | simplex corresponds to a tag. The times are the time of the post in millisecond
 9 | but normalized so that the earliest tag starts at 0.
10 | 
11 | ## Statistics
12 | 
13 | Some basic statistics of this dataset are:
14 | * number of nodes: 1,629
15 | * number of timestamped simplices: 822,059
16 | * number of unique simplices: 174,933
17 | * number of edges in projected graph: 91,685
18 | 
19 | ## Source of original data
20 | 
21 | Source: [tags-math-sx dataset](https://www.cs.cornell.edu/~arb/data/tags-math-sx/)
22 | 
23 | ## References
24 | 
25 | If you use this data, please cite the following paper:
26 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/code/import_threads-ask-ubuntu.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | delimiter = " "
 7 | 
 8 | data_folder = "data"
 9 | 
10 | dataset_folder = "threads-ask-ubuntu"
11 | size_file = "threads-ask-ubuntu-nverts.txt"
12 | member_file = "threads-ask-ubuntu-simplices.txt"
13 | e_labels_file = "threads-ask-ubuntu-simplex-labels.txt"
14 | times_file = "threads-ask-ubuntu-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | edge_label_file = os.path.join(data_folder, dataset_folder, e_labels_file)
19 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
20 | 
21 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
22 | edge_labels = utilities.readScHoLPLabels(edge_label_file, delimiter, two_column=False)
23 | 
24 | H = xgi.Hypergraph(dict(zip(edge_labels, edgelist)))
25 | H["name"] = "threads-ask-ubuntu"
26 | 
27 | 
28 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
29 | 
30 | for label, date in edge_times.items():
31 |     H.edges[label].update({"timestamp": date})
32 | 
33 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, "threads-ask-ubuntu.json"))
34 | 


--------------------------------------------------------------------------------
/code/import_threads-math-sx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | delimiter = " "
 7 | 
 8 | data_folder = "data"
 9 | 
10 | dataset_name = "threads-math-sx"
11 | dataset_folder = f"{dataset_name}"
12 | size_file = f"{dataset_name}-nverts.txt"
13 | member_file = f"{dataset_name}-simplices.txt"
14 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
15 | times_file = f"{dataset_name}-times.txt"
16 | 
17 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
18 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
19 | edge_labels_file = os.path.join(data_folder, dataset_folder, e_labels_file)
20 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
21 | 
22 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
23 | 
24 | edge_labels = utilities.readScHoLPLabels(edge_labels_file, delimiter, two_column=False)
25 | H = xgi.Hypergraph(dict(zip(edge_labels, edgelist)))
26 | H["name"] = dataset_name
27 | 
28 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
29 | 
30 | for label, date in edge_times.items():
31 |     H.edges[label].update({"timestamp": date})
32 | 
33 | 
34 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
35 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-044/README_plant-pollinator-mpl-044.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-044
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Amami-Ohsima Island, Japan (latitude: 28.377248, longitude: 129.493741).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 110
11 | * number of hyperedges: 609
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 130 | 1 |
18 | |1|1|
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_044.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Kato M (2000).[Anthophilous insect community and plant-pollinator interactions on Amami Islands in the Ryukyu Archipelago, Japan.](https://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/156116/1/cbl02902_157.pdf) Contr Biol Lab Kyoto Univ 29:157-252.
32 | 


--------------------------------------------------------------------------------
/code/import_science-gallery.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime, timedelta
 3 | 
 4 | import networkx as nx
 5 | import pandas as pd
 6 | import xgi
 7 | 
 8 | dataset_name = "Science-Gallery"
 9 | 
10 | start_time = datetime(1970, 1, 1)
11 | H = xgi.Hypergraph()
12 | H["name"] = dataset_name
13 | 
14 | for fname in sorted(os.listdir("data/ScienceGallery")):
15 |     if "listcontacts" in fname:
16 |         data = pd.read_csv(
17 |             "data/ScienceGallery/" + fname,
18 |             sep="\t",
19 |             header=0,
20 |             names=["time", "node1", "node2"],
21 |         )
22 | 
23 |         nodes1 = data["node1"].values.tolist()
24 |         nodes2 = data["node2"].values.tolist()
25 |         nodes = set()
26 |         nodes.update(set(nodes1))
27 |         nodes.update(set(nodes2))
28 | 
29 |         H.add_nodes_from(nodes)
30 | 
31 |         for t in data["time"].unique():
32 |             time = timedelta(seconds=int(t))
33 |             d = data[data.time == t]
34 |             links = d[["node1", "node2"]].values.tolist()
35 |             G = nx.Graph(links)
36 |             for e in nx.find_cliques(G):
37 |                 H.add_edge(e, timestamp=(start_time + time).isoformat())
38 |         print(f"{fname} completed!")
39 | 
40 | 
41 | xgi.write_json(H, f"data/ScienceGallery/science-gallery.json")
42 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-057/README_plant-pollinator-mpl-057.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-057
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Kibune, Kyoto, Japan (latitude: 35.166667, longitude: 135.866667).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 114
11 | * number of hyperedges: 883
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 114 | 1 |
18 | |||
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_057.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Inoue et al 1990. [Insect-flower Relationship in the Temperate Deciduous Forest of Kibune, Kyoto : An Overview of the Flowering Phenology and the Seasonal Pattern of Insect Visits.](https://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/156100/1/cbl02704_377.pdf)
32 | 


--------------------------------------------------------------------------------
/code/import_hyperbard.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | import xgi
 4 | 
 5 | data_folder = "data"
 6 | collection_name = "hyperbard"
 7 | 
 8 | df = pd.read_csv(
 9 |     f"../{data_folder}/hyperbard_data/metadata/playtypes.csv", header=9, delimiter=","
10 | )
11 | 
12 | play_type = df.set_index("play_name")["play_type"].to_dict()
13 | 
14 | collection = {}
15 | for play in play_type:
16 |     df = pd.read_csv(
17 |         f"../{data_folder}/hyperbard_data/graphdata/{play}_hg-group-mw.edges.csv"
18 |     )
19 |     H = xgi.Hypergraph()
20 | 
21 |     for row in df["onstage"]:
22 |         edge = set()
23 |         for item in row.split(" "):
24 |             edge.add(item.split("#")[1].split("_")[0])
25 |         H.add_edge(edge)
26 | 
27 |     act = df["act"].to_dict()
28 |     scene = df["scene"].to_dict()
29 |     n_tokens = df["n_tokens"].to_dict()
30 |     n_lines = df["n_lines"].to_dict()
31 | 
32 |     H.set_edge_attributes(act, name="act")
33 |     H.set_edge_attributes(scene, name="scene")
34 |     H.set_edge_attributes(n_tokens, name="n_tokens")
35 |     H.set_edge_attributes(n_lines, name="n_lines")
36 |     H["play-type"] = play_type[play]
37 |     collection[play] = H
38 | 
39 | xgi.write_json_collection(
40 |     collection,
41 |     f"../{data_folder}/hyperbard_data/hyperbard",
42 |     collection_name="hyperbard",
43 | )
44 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-034/README_plant-pollinator-mpl-034.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-034
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Chiloe, Chile (latitude: -42, longitude: -73.583333).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 26
11 | * number of hyperedges: 128
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 130 | 1 |
18 | |1|1|
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_034.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Smith-Ramírez C., P. Martinez, M. Nuñez, C. González and J. J. Armesto (2005) [Diversity, flower visitation frequency and generalism of pollinators in temperate rain forests of Chiloé Island,Chile](https://doi.org/10.1111/j.1095-8339.2005.00388.x). Botanical Journal of the Linnean Society, 2005, 147, 399–416.
32 | 


--------------------------------------------------------------------------------
/code/import_threads-stack-overflow.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | delimiter = " "
 7 | 
 8 | data_folder = "data"
 9 | 
10 | new_dataset_name = "threads-stack-overflow"
11 | 
12 | dataset_name = "threads-stack-overflow-full"
13 | dataset_folder = f"{dataset_name}"
14 | size_file = f"{dataset_name}-nverts.txt"
15 | member_file = f"{dataset_name}-simplices.txt"
16 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
17 | times_file = f"{dataset_name}-times.txt"
18 | 
19 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
20 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
21 | edge_labels_file = os.path.join(data_folder, dataset_folder, e_labels_file)
22 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
23 | 
24 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
25 | edge_labels = utilities.readScHoLPLabels(edge_labels_file, delimiter, two_column=False)
26 | 
27 | H = xgi.Hypergraph(dict(zip(edge_labels, edgelist)))
28 | H["name"] = new_dataset_name
29 | 
30 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
31 | 
32 | for label, date in edge_times.items():
33 |     H.edges[label].update({"timestamp": date})
34 | 
35 | 
36 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
37 | 


--------------------------------------------------------------------------------
/code/import_email-enron.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "email-Enron"
 9 | 
10 | dataset_folder = "email-Enron-full"
11 | size_file = "email-Enron-full-nverts.txt"
12 | member_file = "email-Enron-full-simplices.txt"
13 | labels_file = "email-Enron-full-node-labels.txt"
14 | times_file = "email-Enron-full-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | node_labels_file = os.path.join(data_folder, dataset_folder, labels_file)
19 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
20 | 
21 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
22 | 
23 | H = xgi.Hypergraph(edgelist)
24 | H["name"] = dataset_name
25 | 
26 | delimiter = " "
27 | 
28 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
29 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
30 | 
31 | H.add_nodes_from(list(node_labels.keys()))
32 | 
33 | for label, name in node_labels.items():
34 |     H.nodes[label].update({"name": name})
35 | 
36 | for label, date in edge_times.items():
37 |     H.edges[label].update({"timestamp": date})
38 | 
39 | 
40 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, "email-Enron.json"))
41 | 


--------------------------------------------------------------------------------
/code/import_DAWN.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "DAWN"
10 | 
11 | dataset_folder = "DAWN"
12 | size_file = "DAWN-nverts.txt"
13 | member_file = "DAWN-simplices.txt"
14 | labels_file = "DAWN-node-labels.txt"
15 | times_file = "DAWN-times.txt"
16 | 
17 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
18 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
19 | node_labels_file = os.path.join(data_folder, dataset_folder, labels_file)
20 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
21 | 
22 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
23 | 
24 | H = xgi.Hypergraph(edgelist)
25 | H["name"] = dataset_name
26 | 
27 | delimiter = " "
28 | 
29 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
30 | edge_times = utilities.read_SCHOLP_dates(
31 |     edge_times_file, reference_time=datetime(1, 1, 1), time_unit="quarters"
32 | )
33 | 
34 | H.add_nodes_from(list(node_labels.keys()))
35 | 
36 | for label, name in node_labels.items():
37 |     H.nodes[label].update({"name": name})
38 | 
39 | for label, date in edge_times.items():
40 |     H.edges[label].update({"timestamp": date})
41 | 
42 | 
43 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
44 | 


--------------------------------------------------------------------------------
/datasheets/plant-pollinator-mpl-021/README_plant-pollinator-mpl-021.md:
--------------------------------------------------------------------------------
 1 | # plant-pollinator-mpl-021
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a hypergraph dataset where nodes are plants species, and hyperedges are pollinator species that visit a given plant. 
 6 | Locality of study: Ashu, Kyoto, Japan (latitude: 35.333333 , longitude: 135.75).
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 91
11 | * number of hyperedges: 677
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 130 | 1 |
18 | |1|1|
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [web-of-life](https://www.web-of-life.es/), dataset ID: M_PL_021.
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * Kato, M., Kakutani, T., Inoue, T. and Itino, T. (1990). [Insect-flower relationship in the primary beech forest of Ashu, Kyoto: An overview of the flowering phenology and the seasonal pattern of insect visits.](https://repository.kulib.kyoto-u.ac.jp/dspace/bitstream/2433/156101/1/cbl02704_309.pdf) Contrib. Biol. Lab., Kyoto, Univ., 27, 309-375.
32 | 


--------------------------------------------------------------------------------
/code/import_tags-ask-ubuntu.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_folder = "tags-ask-ubuntu"
 9 | size_file = "tags-ask-ubuntu-nverts.txt"
10 | member_file = "tags-ask-ubuntu-simplices.txt"
11 | n_labels_file = "tags-ask-ubuntu-node-labels.txt"
12 | e_labels_file = "tags-ask-ubuntu-simplex-labels.txt"
13 | times_file = "tags-ask-ubuntu-times.txt"
14 | 
15 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
16 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
17 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
18 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
19 | 
20 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
21 | 
22 | H = xgi.Hypergraph(edgelist)
23 | H["name"] = "tags-ask-ubuntu"
24 | 
25 | delimiter = " "
26 | 
27 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
28 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
29 | 
30 | H.add_nodes_from(list(node_labels.keys()))
31 | 
32 | for label, name in node_labels.items():
33 |     H.nodes[label].update({"name": name})
34 | 
35 | for label, date in edge_times.items():
36 |     H.edges[label].update({"timestamp": date})
37 | 
38 | 
39 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, "tags-ask-ubuntu.json"))
40 | 


--------------------------------------------------------------------------------
/code/import_coauth-DBLP.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | dataset_name = "coauth-DBLP-full"
 9 | 
10 | new_dataset_name = "coauth-DBLP"
11 | 
12 | 
13 | dataset_folder = "coauth-DBLP-full"
14 | size_file = f"{dataset_name}-nverts.txt"
15 | member_file = f"{dataset_name}-simplices.txt"
16 | n_labels_file = f"{dataset_name}-node-labels.txt"
17 | times_file = f"{dataset_name}-times.txt"
18 | 
19 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
20 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
21 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
22 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
23 | 
24 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
25 | 
26 | H = xgi.Hypergraph(edgelist)
27 | H["name"] = new_dataset_name
28 | 
29 | delimiter = " "
30 | 
31 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
32 | times = np.loadtxt(edge_times_file)
33 | 
34 | H.add_nodes_from(list(node_labels.keys()))
35 | 
36 | for label, name in node_labels.items():
37 |     H.nodes[label].update({"name": name})
38 | 
39 | for label, date in enumerate(times):
40 |     H.edges[label].update({"timestamp": int(date)})
41 | 
42 | 
43 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
44 | 


--------------------------------------------------------------------------------
/code/import_congress-bills.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "congress-bills-full"
 9 | 
10 | new_dataset_name = "congress-bills"
11 | 
12 | dataset_folder = "congress-bills-full"
13 | size_file = f"{dataset_name}-nverts.txt"
14 | member_file = f"{dataset_name}-simplices.txt"
15 | labels_file = f"{dataset_name}-node-labels.txt"
16 | times_file = f"{dataset_name}-times.txt"
17 | 
18 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
19 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
20 | node_labels_file = os.path.join(data_folder, dataset_folder, labels_file)
21 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
22 | 
23 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
24 | 
25 | H = xgi.Hypergraph(edgelist)
26 | H["name"] = new_dataset_name
27 | 
28 | delimiter = "\t"
29 | 
30 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
31 | edge_times = utilities.read_SCHOLP_dates(edge_times_file)
32 | 
33 | H.add_nodes_from(list(node_labels.keys()))
34 | 
35 | for label, name in node_labels.items():
36 |     H.nodes[label].update({"name": name})
37 | 
38 | for label, date in edge_times.items():
39 |     H.edges[label].update({"timestamp": date})
40 | 
41 | 
42 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
43 | 


--------------------------------------------------------------------------------
/code/import_kaggle-whats-cooking.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import xgi
 4 | 
 5 | data_folder = "data"
 6 | 
 7 | dataset_name = "cat-edge-Cooking"
 8 | new_dataset_name = "kaggle-whats-cooking"
 9 | 
10 | dataset_folder = f"{dataset_name}"
11 | edges_file = "hyperedges.txt"
12 | n_labels_file = "node-labels.txt"
13 | e_labels_file = "hyperedge-labels.txt"
14 | l_identities_file = "hyperedge-label-identities.txt"
15 | 
16 | edgelist_file = os.path.join(data_folder, dataset_folder, edges_file)
17 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
18 | edge_labels_file = os.path.join(data_folder, dataset_folder, e_labels_file)
19 | label_identities_file = os.path.join(data_folder, dataset_folder, l_identities_file)
20 | 
21 | H = xgi.read_edgelist(edgelist_file, delimiter="\t", nodetype=int)
22 | H["name"] = new_dataset_name
23 | 
24 | with open(node_labels_file) as f:
25 |     node_labels = f.read().splitlines()
26 | 
27 | for i, n in enumerate(H.nodes):
28 |     H.nodes[n].update({"name": node_labels[i]})
29 | 
30 | with open(edge_labels_file) as f:
31 |     edge_labels = f.read().splitlines()
32 | 
33 | with open(label_identities_file) as f:
34 |     edge_label_identities = f.read().splitlines()
35 | 
36 | for i, e in enumerate(H.edges):
37 |     H.edges[e].update({"name": edge_label_identities[int(edge_labels[i]) - 1]})
38 | 
39 | 
40 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
41 | 


--------------------------------------------------------------------------------
/code/import_tags-math-sx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "tags-math-sx"
 9 | dataset_folder = f"{dataset_name}"
10 | size_file = f"{dataset_name}-nverts.txt"
11 | member_file = f"{dataset_name}-simplices.txt"
12 | n_labels_file = f"{dataset_name}-node-labels.txt"
13 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
14 | times_file = f"{dataset_name}-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
19 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
20 | 
21 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
22 | 
23 | H = xgi.Hypergraph(edgelist)
24 | H["name"] = dataset_name
25 | 
26 | delimiter = " "
27 | 
28 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
29 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
30 | 
31 | H.add_nodes_from(list(node_labels.keys()))
32 | 
33 | for label, name in node_labels.items():
34 |     H.nodes[label].update({"name": name})
35 | 
36 | for label, date in edge_times.items():
37 |     H.edges[label].update({"timestamp": date})
38 | 
39 | 
40 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
41 | 


--------------------------------------------------------------------------------
/code/import_tags-stack-overflow.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "tags-stack-overflow"
 9 | dataset_folder = f"{dataset_name}"
10 | size_file = f"{dataset_name}-nverts.txt"
11 | member_file = f"{dataset_name}-simplices.txt"
12 | n_labels_file = f"{dataset_name}-node-labels.txt"
13 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
14 | times_file = f"{dataset_name}-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
19 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
20 | 
21 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
22 | 
23 | H = xgi.Hypergraph(edgelist)
24 | H["name"] = dataset_name
25 | 
26 | delimiter = " "
27 | 
28 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
29 | edge_times = utilities.read_SCHOLP_dates(edge_times_file, time_unit="milliseconds")
30 | 
31 | H.add_nodes_from(list(node_labels.keys()))
32 | 
33 | for label, name in node_labels.items():
34 |     H.nodes[label].update({"name": name})
35 | 
36 | for label, date in edge_times.items():
37 |     H.edges[label].update({"timestamp": date})
38 | 
39 | 
40 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{dataset_name}.json"))
41 | 


--------------------------------------------------------------------------------
/code/import_coauth-MAG-Geology.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "coauth-MAG-Geology-full"
10 | 
11 | new_dataset_name = "coauth-MAG-Geology"
12 | 
13 | dataset_folder = "coauth-MAG-Geology-full"
14 | size_file = f"{dataset_name}-nverts.txt"
15 | member_file = f"{dataset_name}-simplices.txt"
16 | n_labels_file = f"{dataset_name}-node-labels.txt"
17 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
18 | times_file = f"{dataset_name}-times.txt"
19 | 
20 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
21 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
22 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
23 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
24 | 
25 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
26 | 
27 | H = xgi.Hypergraph(edgelist)
28 | H["name"] = new_dataset_name
29 | 
30 | delimiter = " "
31 | 
32 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
33 | times = np.loadtxt(edge_times_file)
34 | 
35 | H.add_nodes_from(list(node_labels.keys()))
36 | 
37 | for label, name in node_labels.items():
38 |     H.nodes[label].update({"name": name})
39 | 
40 | for label, date in enumerate(times):
41 |     H.edges[label].update({"timestamp": int(date)})
42 | 
43 | 
44 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
45 | 


--------------------------------------------------------------------------------
/code/import_coauth-MAG-History.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_name = "coauth-MAG-History-full"
10 | 
11 | new_dataset_name = "coauth-MAG-History"
12 | 
13 | dataset_folder = "coauth-MAG-History-full"
14 | size_file = f"{dataset_name}-nverts.txt"
15 | member_file = f"{dataset_name}-simplices.txt"
16 | n_labels_file = f"{dataset_name}-node-labels.txt"
17 | e_labels_file = f"{dataset_name}-simplex-labels.txt"
18 | times_file = f"{dataset_name}-times.txt"
19 | 
20 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
21 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
22 | node_labels_file = os.path.join(data_folder, dataset_folder, n_labels_file)
23 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
24 | 
25 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
26 | 
27 | H = xgi.Hypergraph(edgelist)
28 | H["name"] = new_dataset_name
29 | 
30 | delimiter = " "
31 | 
32 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
33 | times = np.loadtxt(edge_times_file)
34 | 
35 | H.add_nodes_from(list(node_labels.keys()))
36 | 
37 | for label, name in node_labels.items():
38 |     H.nodes[label].update({"name": name})
39 | 
40 | for label, date in enumerate(times):
41 |     H.edges[label].update({"timestamp": int(date)})
42 | 
43 | 
44 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
45 | 


--------------------------------------------------------------------------------
/datasheets/tags-ask-ubuntu/README_tags-ask-ubuntu.md:
--------------------------------------------------------------------------------
 1 | # tags-ask-ubuntu
 2 | 
 3 | ## Summary
 4 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedges is a set of nodes. In this dataset, nodes are tags and hyperedges are the sets of tags applied to questions on askubuntu.com. The timestamps are in ISO8601 format and are normalized to start at 0. This dataset is derived from tags on Ask Ubuntu posts. The raw data was downloaded from https://archive.org/details/stackexchange
 5 | 
 6 | ## Statistics
 7 | Some basic statistics of this dataset are:
 8 | * number of nodes: 3,029
 9 | * number of timestamped hyperedges: 271,233
10 | * distribution of the connected components:
11 | <center>
12 | 
13 | | Component Size  | Number |
14 | | ----- | ---- |
15 | | 3021 | 1 |
16 | | 1 | 8 |
17 | </center>
18 | 
19 | * degree and edge size distributions:
20 | <center>
21 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
22 | </center>
23 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
24 | 
25 | ## Source of original data
26 | Source: [tags-ask-ubuntu dataset](https://www.cs.cornell.edu/~arb/data/tags-ask-ubuntu/)
27 | 
28 | ## References
29 | If you use this dataset, please cite these references:
30 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/datasheets/coauth-mag-geology/README_coauth-MAG-Geology.md:
--------------------------------------------------------------------------------
 1 | # coauth-MAG-Geology
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal higher-order network dataset, which here means a sequence of timestamped simplices where each simplex is a set of nodes. In this dataset, nodes are authors and a simplex is a publication marked with the "Geology" tag in the Microsoft Academic Graph. Timestamps are the year of publication. 
 6 | 
 7 | ## Statistics
 8 | 
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 1,256,385
11 | * number of timestamped simplices: 1,590,335
12 | * number of unique simplices: 1,207,390
13 | * number of edges in projected graph: 512,0762
14 | 
15 | ## Changelog
16 | 
17 | - v0.2: removed restriction on edge size (was max 25 nodes) with PR #22 https://github.com/xgi-org/xgi-data/pull/22
18 | 
19 | ## Source of original data
20 | 
21 | Source: [coauth-MAG-Geology dataset](https://www.cs.cornell.edu/~arb/data/coauth-MAG-Geology/)
22 | 
23 | ## References
24 | 
25 | If you use this data, please cite the following papers:
26 | 
27 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). 
28 | Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. 
29 | Proceedings of the National Academy of Sciences (PNAS), 2018.
30 | 
31 | * [An overview of Microsoft Academic Service (MAS) and applications[(https://doi.org/10.1145/2740908.2742839).
32 | Arnab Sinha, Zhihong Shen, Yang Song, Hao Ma, Darrin Eide, Bo-June Hsu, and Kuansan Wang. 
33 | Proceedings of WWW, 2015. 


--------------------------------------------------------------------------------
/datasheets/NDC-substances/README-NDC-substances.md:
--------------------------------------------------------------------------------
 1 | # NDC substances
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal higher-order network dataset, which here means a sequence of timestamped simplices where each simplex is a set of nodes. Under the Drug Listing Act of 1972, the U.S. Food and Drug Administration releases information on all commercial drugs going through the regulation of the agency, forming the National Drug Code (NDC) Directory. In this dataset, each simplex corresponds to an NDC code for a drug, and the nodes are substances that make up the drug. Timestamps are in days and represent when the drug was first marketed. We restricted to simplices that consist of at most 25 nodes. 
 6 | 
 7 | 
 8 | The file NDC-substances-node-labels.txt maps the node IDs to the substances.
 9 | 
10 | The nth line in NDC-classes-simplex-labels.txt is the name of the drug
11 | corresponding to the nth simplex.
12 | 
13 | ## Statistics
14 | 
15 | * number of nodes: 5,311
16 | * number of timestamped simplices: 112,405
17 | * number of unique simplices: 10,025
18 | * number of edges in projected graph: 88,268
19 | 
20 | ## Origin of data
21 | 
22 | Source: [NDC-substances](https://www.cs.cornell.edu/~arb/data/NDC-substances/).
23 | 
24 | ## References 
25 | 
26 | If you use this data, please cite the following paper:
27 | Simplicial closure and higher-order link prediction.
28 | Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg.
29 | [Proceedings of the National Academy of Sciences (PNAS)](https://doi.org/10.1073/pnas.1800683115), 2018.


--------------------------------------------------------------------------------
/code/inspect_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | # graph parameters
 5 | filename = sys.argv[1]
 6 | 
 7 | with open(filename) as file:
 8 |     # load JSON file
 9 |     data = json.loads(file.read())
10 | 
11 |     # load hypergraph attributes
12 |     try:
13 |         hypergraph_attrs = data["hypergraph-data"]
14 | 
15 |         # Is a dataset name specified?
16 |         try:
17 |             name = hypergraph_attrs["name"]
18 |         except:
19 |             print("Dataset name not specified!")
20 |     except:
21 |         print("No hypergraph attributes!")
22 | 
23 |     # Are nodes specified?
24 |     try:
25 |         node_data = data["node-data"]
26 |     except:
27 |         print("No nodes specified!")
28 | 
29 |     # Are hyperedges specified?
30 |     try:
31 |         edge_data = data["edge-data"]
32 | 
33 |     except:
34 |         print("No hyperedges specified!")
35 | 
36 |     try:
37 |         edges = data["edge-dict"]
38 |         for e in edges:
39 |             # are the nodes in the hyperedges in the list of nodes?
40 |             if e not in edge_data:
41 |                 print(f"edge {e} not in the list of edges.")
42 |             try:
43 |                 members = edges[e]
44 |                 for node in members:
45 |                     if node not in node_data:
46 |                         print(f"Edge {e} contains non-existent node {node}!")
47 |             except:
48 |                 print(f"Edge {e} has no associated members!")
49 | 
50 |     except:
51 |         print("No hyperedges specified!")
52 | 
53 | print("Inspection complete.")
54 | 


--------------------------------------------------------------------------------
/datasheets/coauth-mag-history/README_coauth-MAG-History.md:
--------------------------------------------------------------------------------
 1 | # coauth-MAG-History
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal higher-order network dataset, which here means a sequence of timestamped simplices where each simplex is a set of nodes. In this dataset, nodes are authors and a simplex is a publication marked with the "History" tag in the Microsoft Academic Graph. Timestamps are the year of publication. 
 6 | 
 7 | ## Statistics
 8 | 
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 1,014,734
11 | * number of timestamped simplices: 1,812,511
12 | * number of unique simplices: 895,668
13 | * number of edges in projected graph: 1,156,914
14 | 
15 | ## Changelog
16 | 
17 | - v1.2: fixed year format with PR #31 https://github.com/xgi-org/xgi-data/pull/31 
18 | - v1.1: removed restriction on edge size (was max 25 nodes) with PR #22 https://github.com/xgi-org/xgi-data/pull/22
19 | 
20 | ## Source of original data
21 | 
22 | Source: [coauth-MAG-History dataset](https://www.cs.cornell.edu/~arb/data/coauth-MAG-History/)
23 | 
24 | ## References
25 | 
26 | If you use this data, please cite the following papers:
27 | 
28 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). 
29 | Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. 
30 | Proceedings of the National Academy of Sciences (PNAS), 2018.
31 | 
32 | * [An overview of Microsoft Academic Service (MAS) and applications[(https://doi.org/10.1145/2740908.2742839).
33 | Arnab Sinha, Zhihong Shen, Yang Song, Hao Ma, Darrin Eide, Bo-June Hsu, and Kuansan Wang. 
34 | Proceedings of WWW, 2015. 


--------------------------------------------------------------------------------
/code/import_house-bills.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "house-bills"
 9 | 
10 | new_dataset_name = "house-bills"
11 | 
12 | dataset_folder = "house-bills"
13 | edgelist_file = f"hyperedges-{dataset_name}.txt"
14 | node_names_file = f"node-names-{dataset_name}.txt"
15 | node_affiliations_file = f"node-labels-{dataset_name}.txt"
16 | affiliation_names_file = f"label-names-{dataset_name}.txt"
17 | 
18 | edgelist_filepath = os.path.join(data_folder, dataset_folder, edgelist_file)
19 | node_names_filepath = os.path.join(data_folder, dataset_folder, node_names_file)
20 | node_affiliations_filepath = os.path.join(
21 |     data_folder, dataset_folder, node_affiliations_file
22 | )
23 | affiliation_names_filepath = os.path.join(
24 |     data_folder, dataset_folder, affiliation_names_file
25 | )
26 | 
27 | H = xgi.read_edgelist(edgelist_filepath, delimiter=",")
28 | H["name"] = new_dataset_name
29 | 
30 | node_labels = utilities.readScHoLPLabels(node_names_filepath, two_column=False)
31 | node_affiliation = utilities.readScHoLPLabels(
32 |     node_affiliations_filepath, two_column=False
33 | )
34 | 
35 | for id, name in node_labels.items():
36 |     H.nodes[str(id)].update({"name": name})
37 | 
38 | affiliation_names = []
39 | with open(affiliation_names_filepath) as label_data:
40 |     for line in label_data:
41 |         affiliation_names.append(line.strip("\n"))
42 | 
43 | for id, label in node_affiliation.items():
44 |     H.nodes[str(id)].update({"affiliation": affiliation_names[int(label) - 1]})
45 | 
46 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
47 | 


--------------------------------------------------------------------------------
/code/import_senate-bills.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "senate-bills"
 9 | 
10 | new_dataset_name = "senate-bills"
11 | 
12 | dataset_folder = "senate-bills"
13 | edgelist_file = f"hyperedges-{dataset_name}.txt"
14 | node_names_file = f"node-names-{dataset_name}.txt"
15 | node_affiliations_file = f"node-labels-{dataset_name}.txt"
16 | affiliation_names_file = f"label-names-{dataset_name}.txt"
17 | 
18 | edgelist_filepath = os.path.join(data_folder, dataset_folder, edgelist_file)
19 | node_names_filepath = os.path.join(data_folder, dataset_folder, node_names_file)
20 | node_affiliations_filepath = os.path.join(
21 |     data_folder, dataset_folder, node_affiliations_file
22 | )
23 | affiliation_names_filepath = os.path.join(
24 |     data_folder, dataset_folder, affiliation_names_file
25 | )
26 | 
27 | H = xgi.read_edgelist(edgelist_filepath, delimiter=",")
28 | H["name"] = new_dataset_name
29 | 
30 | node_labels = utilities.readScHoLPLabels(node_names_filepath, two_column=False)
31 | node_affiliation = utilities.readScHoLPLabels(
32 |     node_affiliations_filepath, two_column=False
33 | )
34 | 
35 | for id, name in node_labels.items():
36 |     H.nodes[str(id)].update({"name": name})
37 | 
38 | affiliation_names = []
39 | with open(affiliation_names_filepath) as label_data:
40 |     for line in label_data:
41 |         affiliation_names.append(line.strip("\n"))
42 | 
43 | for id, label in node_affiliation.items():
44 |     H.nodes[str(id)].update({"affiliation": affiliation_names[int(label) - 1]})
45 | 
46 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | XGI-DATA is distributed with the BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2021-2024, XGI-DATA Developers
 4 | 
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright notice, this
11 |    list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright notice,
14 |    this list of conditions and the following disclaimer in the documentation
15 |    and/or other materials provided with the distribution.
16 | 
17 | 3. Neither the name of the copyright holder nor the names of its
18 |    contributors may be used to endorse or promote products derived from
19 |    this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/code/import_house-committees.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "house-committees"
 9 | 
10 | new_dataset_name = "house-committees"
11 | 
12 | dataset_folder = "house-committees"
13 | edgelist_file = f"hyperedges-{dataset_name}.txt"
14 | node_names_file = f"node-names-{dataset_name}.txt"
15 | node_affiliations_file = f"node-labels-{dataset_name}.txt"
16 | affiliation_names_file = f"label-names-{dataset_name}.txt"
17 | 
18 | edgelist_filepath = os.path.join(data_folder, dataset_folder, edgelist_file)
19 | node_names_filepath = os.path.join(data_folder, dataset_folder, node_names_file)
20 | node_affiliations_filepath = os.path.join(
21 |     data_folder, dataset_folder, node_affiliations_file
22 | )
23 | affiliation_names_filepath = os.path.join(
24 |     data_folder, dataset_folder, affiliation_names_file
25 | )
26 | 
27 | H = xgi.read_edgelist(edgelist_filepath, delimiter=",")
28 | H["name"] = new_dataset_name
29 | 
30 | node_labels = utilities.readScHoLPLabels(node_names_filepath, two_column=False)
31 | node_affiliation = utilities.readScHoLPLabels(
32 |     node_affiliations_filepath, two_column=False
33 | )
34 | 
35 | for id, name in node_labels.items():
36 |     H.nodes[str(id)].update({"name": name})
37 | 
38 | affiliation_names = []
39 | with open(affiliation_names_filepath) as label_data:
40 |     for line in label_data:
41 |         affiliation_names.append(line.strip("\n"))
42 | 
43 | for id, label in node_affiliation.items():
44 |     H.nodes[str(id)].update({"affiliation": affiliation_names[int(label) - 1]})
45 | 
46 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
47 | 


--------------------------------------------------------------------------------
/code/import_senate-committees.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import utilities
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "senate-committees"
 9 | 
10 | new_dataset_name = "senate-committees"
11 | 
12 | dataset_folder = "senate-committees"
13 | edgelist_file = f"hyperedges-{dataset_name}.txt"
14 | node_names_file = f"node-names-{dataset_name}.txt"
15 | node_affiliations_file = f"node-labels-{dataset_name}.txt"
16 | affiliation_names_file = f"label-names-{dataset_name}.txt"
17 | 
18 | edgelist_filepath = os.path.join(data_folder, dataset_folder, edgelist_file)
19 | node_names_filepath = os.path.join(data_folder, dataset_folder, node_names_file)
20 | node_affiliations_filepath = os.path.join(
21 |     data_folder, dataset_folder, node_affiliations_file
22 | )
23 | affiliation_names_filepath = os.path.join(
24 |     data_folder, dataset_folder, affiliation_names_file
25 | )
26 | 
27 | H = xgi.read_edgelist(edgelist_filepath, delimiter=",")
28 | H["name"] = new_dataset_name
29 | 
30 | node_labels = utilities.readScHoLPLabels(node_names_filepath, two_column=False)
31 | node_affiliation = utilities.readScHoLPLabels(
32 |     node_affiliations_filepath, two_column=False
33 | )
34 | 
35 | for id, name in node_labels.items():
36 |     H.nodes[str(id)].update({"name": name})
37 | 
38 | affiliation_names = []
39 | with open(affiliation_names_filepath) as label_data:
40 |     for line in label_data:
41 |         affiliation_names.append(line.strip("\n"))
42 | 
43 | for id, label in node_affiliation.items():
44 |     H.nodes[str(id)].update({"affiliation": affiliation_names[int(label) - 1]})
45 | 
46 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, f"{new_dataset_name}.json"))
47 | 


--------------------------------------------------------------------------------
/datasheets/congress-bills/README_congress-bills.md:
--------------------------------------------------------------------------------
 1 | # congress-bills
 2 | 
 3 | ## Summary
 4 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. In this dataset, nodes are US Congresspersons and hyperedges are comprised of the sponsor and co-sponsors of legislative bills put forth in both the House of Representatives and the Senate. Timestamps are in ISO8601 format. The dataset was derived from James Fowler's data.
 5 | 
 6 | ## Statistics
 7 | Some basic statistics of this dataset are:
 8 | * number of nodes: 1,718
 9 | * number of timestamped hyperedges: 282,049
10 | * there is a single connected component of size 1,718
11 | * degree and edge size distributions:
12 | 
13 | <center>
14 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
15 | </center>
16 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
17 | 
18 | ## Source of original data
19 | Source: [congress-bills dataset](https://www.cs.cornell.edu/~arb/data/congress-bills/)
20 | 
21 | ## References
22 | If you use this dataset, please cite these references:
23 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.
24 | * [Connecting the Congress: A Study of Cosponsorship Networks](https://doi.org/10.1093/pan/mpl002). James H. Fowler. Political Analysis, 2006.
25 | * [Legislative Cosponsorship Networks in the U.S. House and Senate](https://doi.org/10.1016/j.socnet.2005.11.003). James H. Fowler. Social Networks, 2006.


--------------------------------------------------------------------------------
/code/import_NDC-classes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | 
 4 | import utilities
 5 | import xgi
 6 | 
 7 | data_folder = "data"
 8 | 
 9 | dataset_folder = "NDC-classes-full"
10 | size_file = "NDC-classes-full-nverts.txt"
11 | member_file = "NDC-classes-full-simplices.txt"
12 | nlabels_file = "NDC-classes-full-node-labels.txt"
13 | elabels_file = "NDC-classes-full-simplex-labels.txt"
14 | times_file = "NDC-classes-full-times.txt"
15 | 
16 | hyperedge_size_file = os.path.join(data_folder, dataset_folder, size_file)
17 | member_ID_file = os.path.join(data_folder, dataset_folder, member_file)
18 | node_labels_file = os.path.join(data_folder, dataset_folder, nlabels_file)
19 | edge_labels_file = os.path.join(data_folder, dataset_folder, elabels_file)
20 | edge_times_file = os.path.join(data_folder, dataset_folder, times_file)
21 | 
22 | edgelist = utilities.readScHoLPData(hyperedge_size_file, member_ID_file)
23 | 
24 | H = xgi.Hypergraph(edgelist)
25 | H["name"] = "NDC-classes"
26 | 
27 | delimiter = " "
28 | 
29 | node_labels = utilities.readScHoLPLabels(node_labels_file, delimiter)
30 | edge_labels = utilities.readScHoLPLabels(edge_labels_file, delimiter, two_column=False)
31 | 
32 | edge_times = utilities.read_SCHOLP_dates(
33 |     edge_times_file, reference_time=datetime(1, 1, 1), time_unit="milliseconds"
34 | )
35 | 
36 | H.add_nodes_from(list(node_labels.keys()))
37 | 
38 | H.set_edge_attributes(edge_labels, name="name")
39 | 
40 | for label, name in node_labels.items():
41 |     H.nodes[label].update({"name": name})
42 | 
43 | for label, date in edge_times.items():
44 |     H.edges[label].update({"timestamp": date})
45 | 
46 | 
47 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, "ndc-classes.json"))
48 | 


--------------------------------------------------------------------------------
/datasheets/contact-primary-school/README_contact-primary-school.md:
--------------------------------------------------------------------------------
 1 | # contact-primary-school
 2 | 
 3 | ## Summary
 4 | 
 5 | This dataset is constructed from a contact network amongst children and teachers
 6 | at a primary school. The contact network was downloaded from
 7 | http://www.sociopatterns.org/datasets/primary-school-temporal-network-data/
 8 | 
 9 | We form simplices through cliques of simultaneous contacts. Specifically, for every unique timestamp in the dataset, we construct a simplex for every maximal clique amongst the contact edges that exist for that timestamp. Timestamps were
10 | recorded in 20 second intervals.
11 | 
12 | ## Statistics
13 | Some basic statistics of this dataset are:
14 | * number of nodes: 242
15 | * number of timestamped simplices: 106,879
16 | * number of unique simplices: 12,799
17 | * number of edges in projected graph: 8,317
18 | 
19 | * degree and edge size distributions:
20 | <center>
21 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
22 | </center>
23 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
24 | 
25 | ## Source of original data
26 | Source: [contact-primary-school dataset](hhttps://www.cs.cornell.edu/~arb/data/contact-primary-school/)
27 | 
28 | ## References
29 | If you use this dataset, please cite these references:
30 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.
31 | * [CHigh-Resolution Measurements of Face-to-Face Contact Patterns in a Primary School](https://doi.org/10.1371/journal.pone.0023176). RJuliette Stehlé, Nicolas Voirin, Alain Barrat, Ciro Cattuto, Lorenzo Isella, Jean-François Pinton, Marco Quaggiotto, Wouter Van den Broeck, Corinne Régis, Bruno Lina, and Philippe Vanhems. PLoS ONE, 2011.


--------------------------------------------------------------------------------
/datasheets/NDC-classes/README.md:
--------------------------------------------------------------------------------
 1 | # ndc-classes
 2 | 
 3 | ## Overview
 4 | This dataset consists of the pharmaceutical classes used to classify drugs in
 5 | the National Drug Code Directory maintained by the Food and Drug
 6 | Administration between 1946 and 2017. The original data was downloaded from
 7 | https://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm.
 8 | 
 9 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. Timestamps are in ISO8601 format. In the original dataset, the same drug substance can have more than one NDC code. For example, different dosages of the same drug may result in multiple NDC codes.
10 | 
11 | ## Statistics
12 | Some basic statistics of this dataset are:
13 | * number of nodes: 1,161
14 | * number of timestamped hyperedges: 49,726
15 | * distribution of the connected components:
16 | <center>
17 | 
18 | | Component Size  | Number |
19 | | ----- | ---- |
20 | | 628 | 1 |
21 | | 60 | 1 |
22 | | 26 | 1 |
23 | | 18 | 1 |
24 | | 8 | 2 |
25 | | 7 | 3 |
26 | | 6 | 2 |
27 | | 5 | 2 |
28 | | 4 | 11 |
29 | | 3 | 20 |
30 | | 2 | 127 |
31 | | 1 | 12 |
32 | </center>
33 | 
34 | * degree and edge size distributions:
35 | <center>
36 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
37 | </center>
38 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
39 | 
40 | ## Source of original data
41 | Sources:
42 | * [NDC-classes dataset](https://www.cs.cornell.edu/~arb/data/NDC-classes/)
43 | * [FDA](https://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm)
44 | 
45 | ## References
46 | If you use this dataset, please cite these references:
47 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115), Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/datasheets/contact-high-school/README_contact-high-school.md:
--------------------------------------------------------------------------------
 1 | # contact-high-school
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. This dataset is constructed from a contact network amongst high school students
 6 | in Marseilles, France, in December 2013. The contact network was downloaded from
 7 | http://www.sociopatterns.org/datasets/high-school-contact-and-friendship-networks/
 8 | 
 9 | We form simplices through cliques of simultaneous contacts. Specifically, for
10 | every unique timestamp in the dataset, we construct a simplex for every maximal
11 | clique amongst the contact edges that exist for that timestamp. Timestamps were
12 | recorded in 20 second intervals.
13 | 
14 | ## Statistics
15 | Some basic statistics of this dataset are:
16 | * number of nodes: 327
17 | * number of timestamped hyperedges: 172,035
18 | * there is a single connected component of size 327
19 | 
20 | * degree and edge size distributions:
21 | <center>
22 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
23 | </center>
24 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
25 | 
26 | ## Source of original data
27 | Source: [contact-high-school dataset](https://www.cs.cornell.edu/~arb/data/contact-high-school/)
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.
32 | * [Contact Patterns in a High School: A Comparison between Data Collected Using Wearable Sensors, Contact Diaries and Friendship Surveys](https://doi.org/10.1371/journal.pone.0136497). Rossana Mastrandrea, Julie Fournet, and Alain Barrat. PLoS ONE, 2015.


--------------------------------------------------------------------------------
/datasheets/DAWN/README.md:
--------------------------------------------------------------------------------
 1 | # dawn
 2 | 
 3 | The Drug Abuse Warning Network (DAWN) is a national health surveillance system that records drug use contributing to hospital emergency department visits throughout the United States. Hyperedges in this dataset are the drugs used by a patient (as reported by the patient) in an emergency department visit. The drugs include illicit substances, prescription and over-the-counter medication, and dietary supplements. Timestamps of visits (under the `timestamp` attribute of the hyperedges) are recorded at the resolution of quarter-years, spanning a total duration of 8 years (2004 to 2011). The names of the drugs are encoded in the `name` attribute of the nodes.
 4 | 
 5 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. Timestamps are in ISO8601 format. In the original dataset, the same drug substance can have more than one NDC code. For example, different dosages of the same drug may result in multiple NDC codes.
 6 | 
 7 | ## Statistics
 8 | Some basic statistics of this dataset are:
 9 | * number of nodes: 2,558
10 | * number of timestamped hyperedges: 2,272,433
11 | * distribution of the connected components:
12 | <center>
13 | | Component Size  | Number |
14 | | ----- | ---- |
15 | | 2290 | 1 |
16 | | 1 | 68 |
17 | </center>
18 | 
19 | * degree and edge size distributions:
20 | <center>
21 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
22 | </center>
23 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
24 | 
25 | ## Source of original data
26 | Sources:
27 | * [DAWN dataset](https://www.cs.cornell.edu/~arb/data/DAWN/)
28 | 
29 | ## References
30 | If you use this dataset, please cite these references:
31 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115), Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.


--------------------------------------------------------------------------------
/code/import_diseasome.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import xgi
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_folder = "diseasome"
 9 | 
10 | H = xgi.Hypergraph()
11 | tree = ET.parse(os.path.join(data_folder, dataset_folder, "diseasome.gexf"))
12 | root = tree.getroot()
13 | 
14 | node_attr = dict()
15 | edge_attr = dict()
16 | for item in root:
17 |     for subelement in item:
18 |         if "nodes" in subelement.tag:
19 |             for node in subelement:
20 |                 for attrlist in node:
21 |                     for attr in attrlist:
22 |                         if (
23 |                             attr.attrib["id"] == "0"
24 |                             and attr.attrib["value"] == "disease"
25 |                         ):
26 |                             node_attr[node.attrib["id"]] = {
27 |                                 "label": node.attrib["label"]
28 |                             }
29 |                         elif (
30 |                             attr.attrib["id"] == "0" and attr.attrib["value"] == "gene"
31 |                         ):
32 |                             edge_attr[node.attrib["id"]] = {
33 |                                 "label": node.attrib["label"]
34 |                             }
35 | 
36 | for item in root:
37 |     for subelement in item:
38 |         if "edges" in subelement.tag:
39 |             for edge in subelement:
40 |                 source = edge.attrib["source"]
41 |                 target = edge.attrib["target"]
42 |                 if source in node_attr and target in edge_attr:
43 |                     H.add_node_to_edge(edge.attrib["target"], edge.attrib["source"])
44 |                 elif target in node_attr and source in edge_attr:
45 |                     H.add_node_to_edge(edge.attrib["source"], edge.attrib["target"])
46 |                 else:
47 |                     print(f"Edge ({source}, {target}) Not bipartite!")
48 | 
49 | xgi.set_edge_attributes(H, edge_attr)
50 | xgi.set_node_attributes(H, node_attr)
51 | H["name"] = "Diseasome"
52 | 
53 | 
54 | xgi.write_json(H, os.path.join(data_folder, dataset_folder, "diseasome.json"))
55 | 


--------------------------------------------------------------------------------
/datasheets/hospital-lyon/README_hospital-lyon.md:
--------------------------------------------------------------------------------
 1 | # hospital-lyon
 2 | 
 3 | ## Summary
 4 | This dataset contains the temporal network of contacts between patients, patients and health-care workers (HCWs) and among HCWs in a hospital ward in Lyon, France, from Monday, December 6, 2010 at 1:00 pm to Friday, December 10, 2010 at 2:00 pm. The study included 46 HCWs and 29 patients.
 5 | 
 6 | The active contacts are resolved to a 20-second interval in the data collection. In the original data, each line has the form "t i j Si Sj", where i and j are the anonymous IDs of the persons in contact, Si and Sj are their statuses (NUR=paramedical staff, i.e. nurses and nurses’ aides; PAT=Patient; MED=Medical doctor; ADM=administrative staff), and the interval during which this contact was active is [ t – 20s, t ]. If a node is connected to more than one other node at a given time interval, we assume that all these nodes participate in a group interactions, i.e., if nodes 1 and 2 as well as nodes 2 and 3 are active at a given time interval, we assume that 1, 2, and 3 participate in a group interaction together. All timestamps are in standard ISO8601 format.
 7 | 
 8 | ## Statistics
 9 | * number of nodes: 75 (46 HCWs and 29 patients)
10 | * number of timestamped hyperedges: 21,398
11 | * there is a single connected component of size 75
12 | * degree and edge size distributions:
13 | 
14 | <center>
15 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
16 | </center>
17 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
18 | 
19 | ## Source of original data
20 | Source: [SocioPatterns dataset: Hospital ward dynamic contact network](http://www.sociopatterns.org/datasets/hospital-ward-dynamic-contact-network/)
21 | 
22 | ## References
23 | If you use this dataset, please cite these references:
24 | * [Estimating Potential Infection Transmission Routes in Hospital Wards Using Wearable Proximity Sensors](http://dx.doi.org/10.1371%2Fjournal.pone.0073970). Philippe Vanhems, Alain Barrat, Ciro Cattuto, Jean-François Pinton, Nagham Khanafer, Corinne Régis, Byeul-a Kim, Brigitte Comte, Nicolas Voirin. PLoS ONE, 2013.


--------------------------------------------------------------------------------
/code/utilities.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime, timedelta
 3 | 
 4 | 
 5 | def readScHoLPData(edge_size_file, member_ID_file):
 6 |     edgelist = list()
 7 |     with open(edge_size_file) as size_file, open(member_ID_file) as id_file:
 8 |         sizes = size_file.read().splitlines()
 9 |         members = id_file.read().splitlines()
10 |         member_index = 0
11 |         for index in range(len(sizes)):
12 |             edge = list()
13 |             edge_size = int(sizes[index])
14 |             for i in range(member_index, member_index + edge_size):
15 |                 member = members[i]
16 |                 edge.append(member)
17 |             edgelist.append(tuple(edge))
18 |             member_index += edge_size
19 |     return edgelist
20 | 
21 | 
22 | def readScHoLPLabels(labels_file, delimiter="\t", two_column=True):
23 |     label_dict = dict()
24 |     with open(labels_file) as label_data:
25 |         for i, line in enumerate(label_data):
26 |             if two_column:
27 |                 s = line.split(delimiter, 1)
28 |                 idx = s[0]
29 |                 val = s[1].rstrip("\n")
30 |             else:
31 |                 idx = i + 1
32 |                 val = line.rstrip("\n")
33 |             label_dict[idx] = val
34 |     return label_dict
35 | 
36 | 
37 | def read_SCHOLP_dates(
38 |     timestamp_file, reference_time=datetime(1, 1, 1), time_unit="days"
39 | ):
40 |     time_dict = dict()
41 |     with open(timestamp_file) as time_data:
42 |         lines = time_data.read().splitlines()
43 |         for i in range(len(lines)):
44 |             t = int(lines[i])
45 |             if time_unit == "days":
46 |                 time = reference_time + timedelta(days=t)
47 |             elif time_unit == "seconds":
48 |                 time = reference_time + timedelta(seconds=t)
49 |             elif time_unit == "milliseconds":
50 |                 time = reference_time + timedelta(seconds=t / 1000)
51 |             elif time_unit == "quarters":
52 |                 year = int((t - 1) / 4)
53 |                 quarter = (t - 1) % 4 + 1
54 |                 time = datetime(year, int(3 * quarter), 1)
55 |             elif time_unit == "years":
56 |                 year = t
57 |                 time = datetime(year, 1, 1)
58 |             time_dict[i] = time.isoformat()
59 |     return time_dict
60 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # YAML 1.2
 2 | cff-version: "1.2.0"
 3 | authors:
 4 | - email: nicholas.landry@uvm.edu
 5 |   family-names: Landry
 6 |   given-names: Nicholas W.
 7 |   orcid: "https://orcid.org/0000-0003-1270-4980"
 8 | - family-names: Lucas
 9 |   given-names: Maxime
10 |   orcid: "https://orcid.org/0000-0001-8087-2981"
11 | - family-names: Iacopini
12 |   given-names: Iacopo
13 |   orcid: "https://orcid.org/0000-0001-8794-6410"
14 | - family-names: Petri
15 |   given-names: Giovanni
16 |   orcid: "https://orcid.org/0000-0003-1847-5031"
17 | - family-names: Schwarze
18 |   given-names: Alice
19 |   orcid: "https://orcid.org/0000-0002-9146-8068"
20 | - family-names: Patania
21 |   given-names: Alice
22 |   orcid: "https://orcid.org/0000-0002-3047-4376"
23 | - family-names: Torres
24 |   given-names: Leo
25 |   orcid: "https://orcid.org/0000-0002-2675-2775"
26 | contact:
27 | - email: nicholas.landry@uvm.edu
28 |   family-names: Landry
29 |   given-names: Nicholas W.
30 |   orcid: "https://orcid.org/0000-0003-1270-4980"
31 | doi: 10.5281/zenodo.7939055
32 | message: If you use this software, please cite our article in the
33 |   Journal of Open Source Software.
34 | preferred-citation:
35 |   authors:
36 |   - email: nicholas.landry@uvm.edu
37 |     family-names: Landry
38 |     given-names: Nicholas W.
39 |     orcid: "https://orcid.org/0000-0003-1270-4980"
40 |   - family-names: Lucas
41 |     given-names: Maxime
42 |     orcid: "https://orcid.org/0000-0001-8087-2981"
43 |   - family-names: Iacopini
44 |     given-names: Iacopo
45 |     orcid: "https://orcid.org/0000-0001-8794-6410"
46 |   - family-names: Petri
47 |     given-names: Giovanni
48 |     orcid: "https://orcid.org/0000-0003-1847-5031"
49 |   - family-names: Schwarze
50 |     given-names: Alice
51 |     orcid: "https://orcid.org/0000-0002-9146-8068"
52 |   - family-names: Patania
53 |     given-names: Alice
54 |     orcid: "https://orcid.org/0000-0002-3047-4376"
55 |   - family-names: Torres
56 |     given-names: Leo
57 |     orcid: "https://orcid.org/0000-0002-2675-2775"
58 |   date-published: 2023-05-17
59 |   doi: 10.21105/joss.05162
60 |   issn: 2475-9066
61 |   issue: 85
62 |   journal: Journal of Open Source Software
63 |   publisher:
64 |     name: Open Journals
65 |   start: 5162
66 |   title: "XGI: A Python package for higher-order interaction networks"
67 |   type: article
68 |   url: "https://joss.theoj.org/papers/10.21105/joss.05162"
69 |   volume: 8
70 | title: "XGI: A Python package for higher-order interaction networks"


--------------------------------------------------------------------------------
/datasheets/email-eu/README_email-eu.md:
--------------------------------------------------------------------------------
 1 | # email-eu
 2 | 
 3 | ## Overview
 4 | This hypergraph dataset was generated using email data from a large European research institution for a period from October 2003 to May 2005 (18 months). Information about all incoming and outgoing email between members of the research institution has been anonymized. The e-mails only represent communication between institution members (the core), and the dataset does not contain incoming messages from or outgoing messages to the rest of the world.
 5 | 
 6 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. Timestamps are in ISO8601 format. In email communication, messages can be sent to multiple recipients. In this dataset, nodes are email addresses at a European research institution. The original data source only contains directed temporal edge tuples (sender, receiver, timestamp), where timestamps are recorded at 1-second resolution. The hyperedges are undirected and consist of a sender and all receivers grouped such that the email between the sender and each receiver has the same timestamp.
 7 | 
 8 | ## Statistics
 9 | Some basic statistics of this dataset are:
10 | * number of nodes: 1,005
11 | * number of timestamped hyperedges: 235,263
12 | * distribution of the connected components:
13 | <center>
14 | 
15 | | Component Size  | Number |
16 | | ----- | ---- |
17 | | 986 | 1 |
18 | | 1 | 19 |
19 | </center>
20 | 
21 | * degree and edge size distributions:
22 | <center>
23 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
24 | </center>
25 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
26 | 
27 | ## Source of original data
28 | Source: [email-Eu dataset](https://www.cs.cornell.edu/~arb/data/email-Eu/)
29 | 
30 | ## References
31 | If you use this dataset, please cite these references:
32 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115), Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.
33 | * [Local Higher-order Graph Clustering](https://doi.org/10.1145/3097983.3098069), Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. Proceedings of KDD, 2017.
34 | * [Graph Evolution: Densification and Shrinking Diameters](https://doi.org/10.1145/1217299.1217301), Jure Leskovec, Jon Kleinberg, and Christos Faloutsos. ACM Transactions on Knowledge Discovery from Data, 2007.


--------------------------------------------------------------------------------
/HOW_TO_CONTRIBUTE.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Adding a dataset to XGI-DATA
 4 | 
 5 | ### Creating a JSON file for a dataset
 6 | 1. Create a script titled `import_<dataset name>.py`. Choose a dataset name that is concise, yet descriptive.
 7 | 2. Convert the raw data into an `xgi` hypergraph, using the above script. Examples of importing are in the [code](/code/) folder.
 8 | 3. Save the dataset to a JSON file using the `xgi.write_json()` [method](https://xgi.readthedocs.io/en/stable/api/readwrite/xgi.readwrite.json.html#module-xgi.readwrite.json).
 9 | 
10 | ### Adding to Zenodo
11 | 1. Navigate to the [XGI page](https://zenodo.org/communities/xgi) on Zenodo.
12 | 2. Click the "New Upload" button. This should prompt you to log into Zenodo and will bring up the form to upload a new dataset.
13 | 3. Enter the information in the "new upload" form:
14 |    1. In the "Files" section, drag and drop the file or click the "Upload files" button.
15 |    2. When asked "Do you already have a DOI for this upload?", select "No".
16 |    3. Under "Resource type", select "Dataset" from the dropdown list.
17 |    4. Under "Title" enter the dataset name selected above.
18 |    5. Under "Creators", add yourself with name, ORCID, and affiliation along with your role (typically "Data collector" or "Data curator")
19 |    6. Under "Description" write the name of the dataset, where it is from, how it was collected, what nodes and edges are, and some basic statistics about the dataset.
20 |    7. Under "Version", type "v0.0" if this is the first version of the dataset.
21 | 4. Click the "Submit for review" button. This will send it to the XGI-DATA moderators for review.
22 | 
23 | Once the dataset has been added to Zenodo, do the following:
24 | 
25 | ### Updating Github
26 | 1. Fork XGI-DATA.
27 | 2. Move the import script created prior to the `code` folder.
28 | 3. Add an entry (in alphabetical order) in [`index.json`](https://github.com/xgi-org/xgi-data/blob/add-contribution-guide/index.json) with:
29 |    1. The dataset name as the key (all lowercase!)
30 |    2. The value as a dictionary `{"url": <url>}`
31 |    3. The url can be found by going to the [XGI page](https://zenodo.org/communities/xgi) on Zenodo, and clicking on the record you just made. Then the url is `https://zenodo.org/records/<number>/<dataset name>.json`.
32 | 4. In `README.md`, add the dataset name (alphabetically) as a hyperlink with the Zenodo page url.
33 | 5. Run `get_stats.json` with the dataset name as an argument in `load_xgi_data()`. If every cell in this notebook is run, it will update the `index.json` file and add a plot of the degree/edge size distribution.


--------------------------------------------------------------------------------
/datasheets/email-enron/README_email-enron.md:
--------------------------------------------------------------------------------
 1 | # email-enron
 2 | 
 3 | ## Summary
 4 | 
 5 | This is a temporal hypergraph dataset, which here means a sequence of timestamped hyperedges where each hyperedge is a set of nodes. In email communication, messages can be sent to multiple recipients. In this dataset, nodes are email addresses at Enron and a hyperedge is comprised of the sender and all recipients of the email. Only email addresses from a core set of employees are included. Timestamps are in ISO8601 format. 
 6 | 
 7 | This dataset was collected and prepared by the CALO Project (A Cognitive Assistant that Learns and Organizes). It contains data from about 150 users, mostly senior management of Enron, organized into folders. The corpus contains a total of about 0.5M messages. This data was originally made public, and posted to the web, by the Federal Energy Regulatory Commission during its investigation.
 8 | 
 9 | The email dataset was later purchased by Leslie Kaelbling at MIT, and turned out to have a number of integrity problems. A number of folks at SRI, notably Melinda Gervasio, worked hard to correct these problems, and it is thanks to them (not me) that the dataset is available. The dataset here does not include attachments, and some messages have been deleted "as part of a redaction effort due to requests from affected employees". Invalid email addresses were converted to something of the form user@enron.com whenever possible (i.e., recipient is specified in some parse-able format like "Doe, John" or "Mary K. Smith") and to no_address@enron.com when no recipient was specified.
10 | 
11 | ## Statistics
12 | Some basic statistics of this dataset are:
13 | * number of nodes: 148
14 | * number of timestamped hyperedges: 10,885
15 | * distribution of the connected components:
16 | <center>
17 | 
18 | | Component Size  | Number |
19 | | ----- | ---- |
20 | | 143 | 1 |
21 | | 1 | 5 |
22 | </center>
23 | 
24 | * degree and edge size distributions:
25 | <center>
26 | <img src="stats.png" alt="hypergraph statistics" style="width:75%">
27 | </center>
28 | <figcaption align = "center"><b>Hypergraph degree and edge size distributions</b></figcaption>
29 | 
30 | ## Source of original data
31 | Source: [email-Enron dataset](https://www.cs.cornell.edu/~arb/data/email-Enron/)
32 | 
33 | ## References
34 | If you use this dataset, please cite these references:
35 | * [Simplicial closure and higher-order link prediction](https://doi.org/10.1073/pnas.1800683115). Austin R. Benson, Rediet Abebe, Michael T. Schaub, Ali Jadbabaie, and Jon Kleinberg. Proceedings of the National Academy of Sciences (PNAS), 2018.
36 | * [Enron Email Dataset](https://www.cs.cmu.edu/~enron/), William Cohen, 2015.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # macOS stuff
132 | .DS_Store
133 | 
134 | 
135 | *.txt
136 | *.dat
137 | 
138 | test.*
139 | *.gexf
140 | *.tsv
141 | data
142 | *.json
143 | *.json.gz


--------------------------------------------------------------------------------
/code/import_eventernote-events.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import pandas as pd
 5 | 
 6 | data_folder = "data"
 7 | 
 8 | dataset_name = "eventernote-events"
 9 | 
10 | new_dataset_name = "eventernote-events"
11 | 
12 | dataset_folder = "eventernote"
13 | events_file = f"events.csv"
14 | actors_file = f"actors.csv"
15 | places_file = f"places.csv"
16 | dest_file = f"{new_dataset_name}.json"
17 | 
18 | events_path = os.path.join(data_folder, dataset_folder, events_file)
19 | actors_path = os.path.join(data_folder, dataset_folder, actors_file)
20 | places_path = os.path.join(data_folder, dataset_folder, places_file)
21 | dest_path = os.path.join(data_folder, dataset_folder, dest_file)
22 | 
23 | events = pd.read_csv(events_path)
24 | 
25 | # process events
26 | # filter out events after 2024-04-30
27 | events = events[events["event_date"] <= "2024-04-30"]
28 | # drop events with no actors
29 | events = events.dropna(subset=["actor_id"])
30 | # drop duplicates
31 | events.drop_duplicates(subset="id", inplace=True)
32 | events.set_index("id", inplace=True)
33 | events.sort_index(inplace=True)
34 | 
35 | # process actors
36 | 
37 | actors = pd.read_csv(actors_path)
38 | actor_cols = ["id", "name", "kana", "initial", "sex"]
39 | actors = actors[actor_cols]
40 | # deduplicate actor ids
41 | duplicates = actors[actors.duplicated(subset="id", keep=False)]
42 | nan_ids = actors[actors["id"].isna()]
43 | actors.drop_duplicates(subset="id", inplace=True)
44 | actors.set_index("id", inplace=True)
45 | actors.sort_index(inplace=True)
46 | node_data = actors.to_dict(orient="index")
47 | 
48 | event_cols = ["event_date", "place_id", "event_name"]
49 | events_save = events[event_cols].copy()
50 | edge_data = events_save.to_dict(orient="index")
51 | 
52 | 
53 | # nodes are actors
54 | def process_actor_id(x):
55 |     # if float or int, return as list
56 |     if isinstance(x, (float, int)):
57 |         return [str(x)]
58 |     # if string, split and convert to list
59 |     # return list(map(int, x.split(",")))
60 |     return x.split(",")
61 | 
62 | 
63 | edge_dict = events["actor_id"].map(process_actor_id).to_dict()
64 | 
65 | # corrections
66 | # node 3065 in edge_dict[484] should be 3562
67 | edge_dict[484].remove("3065")
68 | edge_dict[484].append("3562")
69 | 
70 | # remove node 3668 from edge 116525
71 | edge_dict[116525].remove("3668")
72 | 
73 | # add info for node 30071
74 | node_data[30071] = {"name": "TOWA TEI", "kana": "ていとうわ", "initial": "て", "sex": 2}
75 | 
76 | # save
77 | 
78 | H = {"hypergraph-data": {"name": "eventernote-events"}}
79 | 
80 | H["node-data"] = node_data
81 | H["edge-data"] = edge_data
82 | H["edge-dict"] = edge_dict
83 | 
84 | with open(dest_path, "w") as f:
85 |     json.dump(H, f)
86 | 


--------------------------------------------------------------------------------
/code/import_eventernote-places.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import pandas as pd
  5 | 
  6 | data_folder = "data"
  7 | 
  8 | dataset_name = "eventernote-places"
  9 | 
 10 | new_dataset_name = "eventernote-places"
 11 | 
 12 | dataset_folder = "eventernote"
 13 | events_file = f"events.csv"
 14 | actors_file = f"actors.csv"
 15 | places_file = f"places.csv"
 16 | dest_file = f"{new_dataset_name}.json"
 17 | 
 18 | events_path = os.path.join(data_folder, dataset_folder, events_file)
 19 | actors_path = os.path.join(data_folder, dataset_folder, actors_file)
 20 | places_path = os.path.join(data_folder, dataset_folder, places_file)
 21 | dest_path = os.path.join(data_folder, dataset_folder, dest_file)
 22 | 
 23 | events = pd.read_csv(events_path)
 24 | 
 25 | # process events
 26 | # filter out events after 2024-04-30
 27 | events = events[events["event_date"] <= "2024-04-30"]
 28 | # drop events with no actors
 29 | events = events.dropna(subset=["actor_id", "place_id"])
 30 | events["place_id"] = events["place_id"].astype(int)
 31 | # drop duplicates
 32 | events.drop_duplicates(subset="id", inplace=True)
 33 | # drop NAs in place ids
 34 | events.set_index("id", inplace=True)
 35 | events.sort_index(inplace=True)
 36 | 
 37 | # process places
 38 | places = pd.read_csv(places_path)
 39 | places_cols = ["id", "place_name", "prefecture"]
 40 | places = places[places_cols]
 41 | # deduplicate place ids
 42 | places.drop_duplicates(subset="id", inplace=True)
 43 | places.set_index("id", inplace=True)
 44 | places.sort_index(inplace=True)
 45 | edge_data = places.to_dict(orient="index")
 46 | 
 47 | # process actors
 48 | 
 49 | actors = pd.read_csv(actors_path)
 50 | actor_cols = ["id", "name", "kana", "initial", "sex"]
 51 | actors = actors[actor_cols]
 52 | # deduplicate actor ids
 53 | duplicates = actors[actors.duplicated(subset="id", keep=False)]
 54 | nan_ids = actors[actors["id"].isna()]
 55 | actors.drop_duplicates(subset="id", inplace=True)
 56 | actors.set_index("id", inplace=True)
 57 | actors.sort_index(inplace=True)
 58 | node_data = actors.to_dict(orient="index")
 59 | 
 60 | 
 61 | # nodes are actors
 62 | def process_actor_id(x):
 63 |     # if float or int, return as list
 64 |     if isinstance(x, (float, int)):
 65 |         return [str(x)]
 66 |     # if string, split and convert to list
 67 |     # return list(map(int, x.split(",")))
 68 |     return x.split(",")
 69 | 
 70 | 
 71 | # for every event, add actor_ids to the place id
 72 | edge_dict = {int(x): [] for x in edge_data.keys()}
 73 | edge_dict[974] = []
 74 | edge_dict[0] = []
 75 | for idx, row in events.iterrows():
 76 |     actor_ids = process_actor_id(row["actor_id"])
 77 |     for actor_id in actor_ids:
 78 |         edge_dict[int(row["place_id"])].append(actor_id)
 79 | 
 80 | # remove empty edges
 81 | edge_dict = {k: v for k, v in edge_dict.items() if v}
 82 | 
 83 | # corrections
 84 | # node 3065 in edge_dict[43] should be 3562
 85 | edge_dict[43].remove("3065")
 86 | edge_dict[43].append("3562")
 87 | 
 88 | # remove node 3668 from edge 3080
 89 | edge_dict[3080].remove("3668")
 90 | 
 91 | # add info for node 30071
 92 | node_data[30071] = {"name": "TOWA TEI", "kana": "ていとうわ", "initial": "て", "sex": 2}
 93 | 
 94 | # add info for edges 974 and 0
 95 | edge_data[974] = {"place_name": "未定", "prefecture": "0"}
 96 | 
 97 | edge_data[0] = {"place_name": "未定", "prefecture": "0"}
 98 | 
 99 | # save
100 | 
101 | H = {
102 |     "hypergraph-data": {
103 |         "name": new_dataset_name,
104 |     }
105 | }
106 | 
107 | H["node-data"] = node_data
108 | H["edge-data"] = edge_data
109 | H["edge-dict"] = edge_dict
110 | 
111 | with open(dest_path, "w") as f:
112 |     json.dump(H, f)
113 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # XGI-DATA
 2 |  
 3 | This is a repository of openly available hypergraph datasets in JSON format with documentation more extensively describing the datasets. They are hosted in the [XGI Community](https://zenodo.org/communities/xgi) on Zenodo and a table of statistics can be found on [Read The Docs](https://xgi.readthedocs.io/en/stable/xgi-data.html). There is also a rudimentary inspection script for checking that datasets are in the proper format. This is loosely inspired by [Datasheets for Datasets](https://arxiv.org/abs/1803.09010) by Gebru et al.
 4 | 
 5 | ## Overview of the xgi-data format
 6 | The xgi-data format for hypergraph data sets is a JSON data structure with the following structure:
 7 | * `hypergraph-data`: This tag accesses the attributes of the entire hypergraph dataset such as the authors or dataset name.
 8 | * `node-data`: This tag accesses the nodes of the hypergraph and their associated properties as a dictionary where the keys are node IDs and the corresponding values are dictionaries. If a node doesn't have any properties, the associated dictionary is empty.
 9 |   * `name`: This tag accesses the node's name if there is one that is different from the ID specified in the hyperedges.
10 |   * Other tags are user-specified based on the particular attributes provided by the dataset.
11 | * `edge-data`: This tag accesses the hyperedges of the hypergraph and their associated attributes.
12 |   * `name`: This tag accesses the edge's name if one is provided.
13 |   * `timestamp`: This is the tag specifying the time associated with the hyperedge if it is given. All times are stored in ISO8601 standard.
14 |   * Other tags are user-specified based on the particular attributes provided by the dataset.
15 | * `edge-dict`: This tag accesses the edge IDs and the corresponding nodes which participate in that hyperedge.
16 | 
17 | All IDs are strings but can be converted to other types if desired.
18 | 
19 | ## Data sets available on xgi-data
20 | 
21 | Currently available data sets are:
22 | * [coauth-dblp](https://zenodo.org/records/10155873)
23 | * [coauth-mag-geology](https://zenodo.org/records/10928443)
24 | * [coauth-mag-history](https://zenodo.org/records/13151009)
25 | * [congress-bills](https://zenodo.org/records/10928561)
26 | * [contact-high-school](https://zenodo.org/records/10155802)
27 | * [contact-primary-school](https://zenodo.org/records/10155810)
28 | * [dawn](https://zenodo.org/records/10155779)
29 | * [diseasome](https://zenodo.org/records/10155812)
30 | * [disgenenet](https://zenodo.org/records/10155817)
31 | * [email-enron](https://zenodo.org/records/10155819)
32 | * [email-eu](https://zenodo.org/records/10155823)
33 | * [eventernote-events](https://zenodo.org/records/11151063)
34 | * [eventernote-places](https://zenodo.org/records/11263394)
35 | * [hospital-lyon](https://zenodo.org/records/10155825)
36 | * [house-bills](https://zenodo.org/records/10957691)
37 | * [house-committees](https://zenodo.org/records/10957702)
38 | * [hypertext-conference](https://zenodo.org/records/10206136)
39 | * [hyperbard](https://zenodo.org/records/11211879)
40 | * [invs13](https://zenodo.org/records/10206151)
41 | * [invs15](https://zenodo.org/records/10206154)
42 | * [kaggle-whats-cooking](https://zenodo.org/records/10157609)
43 | * [malawi-village](https://zenodo.org/records/10206147)
44 | * [ndc-classes](https://zenodo.org/records/10155772)
45 | * [ndc-substances](https://zenodo.org/records/10929019)
46 | * [plant-pollinator-mpl-014](https://zenodo.org/records/14561179)
47 | * [plant-pollinator-mpl-015](https://zenodo.org/records/13754154)
48 | * [plant-pollinator-mpl-016](https://zenodo.org/records/13754237)
49 | * [plant-pollinator-mpl-021](https://zenodo.org/records/14561215)
50 | * [plant-pollinator-mpl-034](https://zenodo.org/records/14561227)
51 | * [plant-pollinator-mpl-044](https://zenodo.org/records/14561246)
52 | * [plant-pollinator-mpl-046](https://zenodo.org/records/14561257)
53 | * [plant-pollinator-mpl-049](https://zenodo.org/records/13754332)
54 | * [plant-pollinator-mpl-057](https://zenodo.org/records/14561268)
55 | * [plant-pollinator-mpl-062](https://zenodo.org/records/13753744)
56 | * [science-gallery](https://zenodo.org/records/10206142)
57 | * [senate-bills](https://zenodo.org/records/10957697)
58 | * [senate-committees](https://zenodo.org/records/10957699)
59 | * [sfhh-conference](https://zenodo.org/records/10198859)
60 | * [tags-ask-ubuntu](https://zenodo.org/records/10155835)
61 | * [tags-math-sx](https://zenodo.org/records/10155845)
62 | * [tags-stack-overflow](https://zenodo.org/records/10155885)
63 | * [threads-ask-ubuntu](https://zenodo.org/records/10373311)
64 | * [threads-math-sx](https://zenodo.org/records/10373324)
65 | * [threads-stack-overflow](https://zenodo.org/records/10373328)
66 | 
67 | These datasets can be loaded with `xgi` using the following lines:
68 | ```python
69 | import xgi
70 | H = xgi.load_xgi_data("<dataset_name>")
71 | ```
72 | where `<dataset_name>` is chosen from the list above.
73 | 
74 | These datasets have been taken from the following sources:
75 | * [Data! by Austin Benson](https://www.cs.cornell.edu/~arb/data/)
76 | * [DisGeneNet](https://www.disgenet.org/)
77 | * [Gephi](https://github.com/gephi/gephi.github.io/)
78 | * [SocioPatterns](http://www.sociopatterns.org/)
79 | 
80 | ## Repository Description
81 | `index.json` is a dictionary of the data sets that are currently available on xgi-data and the url where they are hosted.
82 | The `code` folder contains the scripts used to convert hypergraph datasets into a more standard format and the JSON inspection script. This code can be adapted to convert data sets that are currently not part of xgi-data into xgi-data format.
83 | 
84 | 
85 | ## Checking dataset format
86 | To check if a file has the xgi-data format, run the following command:
87 | ```
88 | python inspect_json.py filepath.json
89 | ```
90 | 
91 | ## Funding
92 | The XGI-DATA package has been supported by NSF Grant 2121905, ["HNDS-I: Using Hypergraphs to Study Spreading Processes in Complex Social Networks"](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2121905).


--------------------------------------------------------------------------------
/get_stats.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import json\n",
 10 |     "from os.path import dirname, join\n",
 11 |     "from pathlib import Path\n",
 12 |     "\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "import seaborn as sns\n",
 15 |     "import xgi"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "dataset_name = \"arxiv-kaggle\""
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "with open(\"index.json\") as file:\n",
 34 |     "    jsondata = json.load(file)\n",
 35 |     "baseurl = jsondata[dataset_name][\"url\"]\n",
 36 |     "\n",
 37 |     "try:\n",
 38 |     "    H = xgi.from_hif_dict(xgi.request_json_from_url(baseurl))\n",
 39 |     "    stats = {}\n",
 40 |     "except KeyError:\n",
 41 |     "    if (\n",
 42 |     "        \"data-type\" in jsondata[dataset_name]\n",
 43 |     "        and jsondata[dataset_name][\"data-type\"] == \"collection\"\n",
 44 |     "    ):\n",
 45 |     "        H = {}\n",
 46 |     "        stats = {\"datasets\": {}}\n",
 47 |     "        for key, data in xgi.request_json_from_url(baseurl)[\"datasets\"].items():\n",
 48 |     "            relpath = data[\"relative-path\"]\n",
 49 |     "            url = join(dirname(baseurl), relpath)\n",
 50 |     "            H[key] = xgi.from_hypergraph_dict(xgi.request_json_from_url(url))\n",
 51 |     "            stats[\"datasets\"][key] = {\"url\": url}\n",
 52 |     "    else:\n",
 53 |     "        H = xgi.from_hypergraph_dict(xgi.request_json_from_url(baseurl))\n",
 54 |     "        stats = {}"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "if isinstance(H, xgi.Hypergraph):\n",
 64 |     "    stats.update(\n",
 65 |     "        {\n",
 66 |     "            \"num-nodes\": int(H.num_nodes),\n",
 67 |     "            \"num-edges\": int(H.num_edges),\n",
 68 |     "        }\n",
 69 |     "    )\n",
 70 |     "elif isinstance(H, dict):\n",
 71 |     "    for key, net in H.items():\n",
 72 |     "        stats[\"datasets\"][key].update(\n",
 73 |     "            {\n",
 74 |     "                \"num-nodes\": int(net.num_nodes),\n",
 75 |     "                \"num-edges\": int(net.num_edges),\n",
 76 |     "            }\n",
 77 |     "        )"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 6,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "if isinstance(H, xgi.Hypergraph):\n",
 87 |     "    simpleH = H.copy()\n",
 88 |     "    simpleH.merge_duplicate_edges()\n",
 89 |     "    stats.update(\n",
 90 |     "        {\n",
 91 |     "            \"num-unique-edges\": int(simpleH.num_edges),\n",
 92 |     "        }\n",
 93 |     "    )\n",
 94 |     "elif isinstance(H, dict):\n",
 95 |     "    for key, net in H.items():\n",
 96 |     "        simpleH = net.copy()\n",
 97 |     "        simpleH.merge_duplicate_edges()\n",
 98 |     "        stats[\"datasets\"][key].update(\n",
 99 |     "            {\n",
100 |     "                \"num-unique-edges\": int(simpleH.num_edges),\n",
101 |     "            }\n",
102 |     "        )"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 7,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "if isinstance(H, xgi.Hypergraph):\n",
112 |     "    s = H.edges.size\n",
113 |     "    stats.update(\n",
114 |     "        {\n",
115 |     "            \"min-edge-size\": int(s.min()),\n",
116 |     "            \"max-edge-size\": int(s.max()),\n",
117 |     "            \"mean-edge-size\": round(s.mean(), 2),\n",
118 |     "        }\n",
119 |     "    )\n",
120 |     "elif isinstance(H, dict):\n",
121 |     "    for key, net in H.items():\n",
122 |     "        s = net.edges.size\n",
123 |     "        stats[\"datasets\"][key].update(\n",
124 |     "            {\n",
125 |     "                \"min-edge-size\": int(s.min()),\n",
126 |     "                \"max-edge-size\": int(s.max()),\n",
127 |     "                \"mean-edge-size\": round(s.mean(), 2),\n",
128 |     "            }\n",
129 |     "        )"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 8,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "if isinstance(H, xgi.Hypergraph):\n",
139 |     "    d = H.nodes.degree\n",
140 |     "    stats.update(\n",
141 |     "        {\n",
142 |     "            \"min-degree\": int(d.min()),\n",
143 |     "            \"max-degree\": int(d.max()),\n",
144 |     "            \"mean-degree\": round(d.mean(), 2),\n",
145 |     "        }\n",
146 |     "    )\n",
147 |     "elif isinstance(H, dict):\n",
148 |     "    for key, net in H.items():\n",
149 |     "        d = net.nodes.degree\n",
150 |     "        stats[\"datasets\"][key].update(\n",
151 |     "            {\n",
152 |     "                \"min-degree\": int(s.min()),\n",
153 |     "                \"max-degree\": int(d.max()),\n",
154 |     "                \"mean-degree\": round(d.mean(), 2),\n",
155 |     "            }\n",
156 |     "        )"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 9,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "if isinstance(H, xgi.Hypergraph):\n",
166 |     "    cc = [len(c) for c in xgi.connected_components(H)]\n",
167 |     "    stats.update(\n",
168 |     "        {\n",
169 |     "            \"num-components\": len(cc),\n",
170 |     "            \"gc-size\": max(cc),\n",
171 |     "        }\n",
172 |     "    )\n",
173 |     "elif isinstance(H, dict):\n",
174 |     "    for key, net in H.items():\n",
175 |     "        cc = [len(c) for c in xgi.connected_components(net)]\n",
176 |     "        stats[\"datasets\"][key].update(\n",
177 |     "            {\n",
178 |     "                \"num-components\": len(cc),\n",
179 |     "                \"gc-size\": max(cc),\n",
180 |     "            }\n",
181 |     "        )"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 10,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "with open(\"index.json\") as file:\n",
191 |     "    data = json.load(file)\n",
192 |     "\n",
193 |     "data[dataset_name].update(stats)\n",
194 |     "\n",
195 |     "datastring = json.dumps(data, indent=2)\n",
196 |     "\n",
197 |     "with open(\"index.json\", \"w\") as output_file:\n",
198 |     "    output_file.write(datastring)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 11,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "def plot_distributions(\n",
208 |     "    H,\n",
209 |     "    node_bins=None,\n",
210 |     "    edge_bins=None,\n",
211 |     "    log_x1=True,\n",
212 |     "    log_y1=True,\n",
213 |     "    log_x2=True,\n",
214 |     "    log_y2=True,\n",
215 |     "):\n",
216 |     "\n",
217 |     "    if node_bins is None:\n",
218 |     "        node_bins = int(H.num_nodes / 20)\n",
219 |     "        node_bins = min(node_bins, 100)\n",
220 |     "\n",
221 |     "    if edge_bins is None:\n",
222 |     "        s = H.edges.size\n",
223 |     "        edge_bins = int(s.max() - s.min() + 1)\n",
224 |     "        edge_bins = min(edge_bins, 100)\n",
225 |     "\n",
226 |     "    plt.figure(figsize=(8, 4))\n",
227 |     "    plt.subplot(121)\n",
228 |     "\n",
229 |     "    h1 = H.nodes.degree.ashist(bins=node_bins, density=True, log_binning=log_x1)\n",
230 |     "    h2 = H.edges.size.ashist(bins=edge_bins, density=True, log_binning=log_x2)\n",
231 |     "    y1 = h1[h1.value > 0].value\n",
232 |     "    y2 = h2[h2.value > 0].value\n",
233 |     "    ymin = 0.95 * min(y1.min(), y2.min())\n",
234 |     "    ymax = 1.1 * max(y1.max(), y2.max())\n",
235 |     "\n",
236 |     "    if log_x1 and log_y1:\n",
237 |     "        plt.loglog(h1[\"bin_center\"], h1[\"value\"], \"ko\", markersize=2)\n",
238 |     "    elif log_x1 and not log_y1:\n",
239 |     "        plt.semilogx(h1[\"bin_center\"], h1[\"value\"], \"ko\", markersize=2)\n",
240 |     "    elif not log_x1 and log_y1:\n",
241 |     "        plt.semilogy(h1[\"bin_center\"], h1[\"value\"], \"ko\", markersize=2)\n",
242 |     "    elif not log_x1 and not log_y1:\n",
243 |     "        plt.plot(h1[\"bin_center\"], h1[\"value\"], \"ko\", markersize=2)\n",
244 |     "\n",
245 |     "    plt.title(\"Degree distribution\")\n",
246 |     "    plt.xlabel(r\"$k$\", fontsize=16)\n",
247 |     "    plt.ylabel(r\"$P(k)$\", fontsize=16)\n",
248 |     "    plt.ylim([ymin, ymax])\n",
249 |     "    sns.despine()\n",
250 |     "\n",
251 |     "    plt.subplot(122)\n",
252 |     "\n",
253 |     "    if log_x2 and log_y2:\n",
254 |     "        plt.loglog(h2[\"bin_center\"], h2[\"value\"], \"ko\", markersize=2)\n",
255 |     "    elif log_x2 and not log_y2:\n",
256 |     "        plt.semilogx(h2[\"bin_center\"], h2[\"value\"], \"ko\", markersize=2)\n",
257 |     "    elif not log_x2 and log_y2:\n",
258 |     "        plt.semilogy(h2[\"bin_center\"], h2[\"value\"], \"ko\", markersize=2)\n",
259 |     "    elif not log_x2 and not log_y2:\n",
260 |     "        plt.plot(h2[\"bin_center\"], h2[\"value\"], \"ko\", markersize=2)\n",
261 |     "\n",
262 |     "    plt.title(\"Edge size distribution\")\n",
263 |     "    plt.xlabel(r\"$s$\", fontsize=16)\n",
264 |     "    plt.ylabel(r\"$P(s)$\", fontsize=16)\n",
265 |     "    plt.ylim([ymin, ymax])\n",
266 |     "    sns.despine()\n",
267 |     "    plt.tight_layout()\n",
268 |     "    Path(f\"datasheets/{dataset_name}\").mkdir(parents=True, exist_ok=True)\n",
269 |     "    plt.savefig(f\"datasheets/{dataset_name}/stats.png\", dpi=300)\n",
270 |     "    plt.show()"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": 12,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAGGCAYAAADmRxfNAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAU69JREFUeJzt3Ql4VOXZ//GHgAmLogIVDBDRatGIgAWCuC9YXhcQq7WtVSOKVk2hgksB2z9albTFhQJW61J3W9wA9cVqRZQqIAhCqxRTLCqKgHFBWQSB+V+/p555J5OZZCbnzMxZvp/rGoc5mcycHJPnnme576dZLBaLGQAAAABwocjNNwMAAACA0LEAAAAA4BodCwAAAACu0bEAAAAA4BodCwAAAACu0bEAAAAA4BodCwAAAACu0bEAAAAA4BodCwAAAACu0bEAfOL888833bp1q3OsWbNm5tprr835e7/00kv2vXTvOPbYY02PHj1MPrz77rv2/e+77768vB+AYPF7G6H2Ujc/SI4bumY6pmuY7zjm/H+76aabTD7o59b7oXDoWCBvnMbNubVs2dKUlpaaQYMGmcmTJ5svv/yy0KcYCo888oiZNGmS8SM/nxuA/LX/ybcFCxYU+hSRYPPmzfZDeuJgk1/4+dxgTItCnwCi59e//rXZd999zddff23Wrl1rG4fLL7/c3HLLLeapp54yPXv2LPQp+saWLVtMixYtsv7w/uabb9prmqmjjz7avldxcXETztL9ue2zzz72/XfZZZecvj8Af7T/yfbff38TZM8//7zxq3PPPdf86Ec/MiUlJVl9eL/uuuvsv7OZibnrrrvMzp07TS41dG6//OUvzZgxY3L6/mgYHQvk3UknnWT69u0bfzx27Fjz4osvmlNPPdUMGTLE/Otf/zKtWrXK2/nEYjHz1Vdf5fU9M6VZnVzSz63ORFFRUc7fqyHODBaAaLX/YZHrQRk3mjdvbm+5tGnTJtOmTZuCDw5pIC7bwTh4i6VQ8IXjjz/e/OpXvzLvvfeeeeihh+p8bcWKFebMM8807dq1sx8+FZQ0s5HsH//4hznmmGNsB6FLly7mhhtuMPfee2+9taVa/6lOzHPPPWdfS8//4x//aL/2+eef29H0rl272tEdjaL99re/rTcCo8da0nPwwQfbc+rYsaP56U9/aj777LOMft4ZM2bY/AV9r+6nT5+e0VpZLRfT+eln0Pnttdde5sQTTzRLliyJj9787//+r72OzhIDZ72rk0fxl7/8xY7qdO7c2bRu3dp88cUXKXMsHIsXLzaHH364vU4aabzjjjvqfD3d+t3k12zo3NKtn1aH86ijjrIBa4899jCnnXaa7XimWlO7cuVKu75Xz9t9993NsGHD7MgWgGBRO6y/Zf0d6++5srLSHkvlscceM+Xl5XXa0lT5am7abM2sqz1RXFG7u/fee9u2KLHNS86x0PunW/aV2M5++OGH5oILLrDno9fW+f3pT3/K6Dpt3brVjBo1ynzrW98yu+22mx2Y++CDD+o9L1Ub/frrr9tlyB06dIi37ToP0fP0mqKZAee8nVik67vrrruad955x5x88sn2vX/yk5/Ev5Z87R233nqrnZ3W+ylWa/Y6kzyVxNds7NxS5Vhs377dXH/99ebb3/62vcZ6rXHjxtnrl8j5bPDKK6+YiooK+3uy3377mQceeCCD/xtw0K2Dr6Zr9ceuKeWLLrrIHnvrrbfMEUccYT8Ea3pTHzAfffRRM3ToUPPEE0+Y008/Pd44H3fccbZB0QyInnf33Xennfp9++23zY9//GMbWPRe3bt3tx9C1djptXS8rKzMzJs3z77eRx99VCc3QF9XY61gM3LkSLNq1SozdepU88Ybb5hXX321wVEb/XxnnHGGDYbV1dXmk08+iQetxlxyySXm8ccfNz/72c/s9+t71Qjqw/Z3v/tdc80115gNGzbY4KJGXBQAEqmB1ejalVdeaRvWhkbaFHQVOM466yx7vXTtL730Uvs9ThDKVCbnluiFF16wo5tq2BUstFRqypQp9vdBHank4KVzVHDUNdXX9f9fHS91DAH4g9qA2traOsfUbrdv3z4+g6wP7WrX1N4ddNBBtrOgzkUyDVT88Ic/NIcccoj9u1d7deGFF9p4kcxNm632WrFoxIgRtt1Zv369+dvf/mbef//9tB+iFS82btxY55javaVLl8Z/1nXr1pnDDjvM/vxq0/WB+dlnn7U/gwZ8GlvOOnz4cDsQd/bZZ9vBHw3EnHLKKaYxOv/vfe979v0UV9V50wf2J5980n5dx2+//Xbb1ivGfv/737fHE5cp68O6OiZHHnmkTczWIFVD9OFcA2NVVVV2pvz3v/+9HVD85z//aTtVmcrk3FJdp/vvv98OUF5xxRXmtddes78vipvJg3oaoNLz9P9Av3Pq5Klj06dPH9vpQwZiQJ7ce++9Mf3KLVq0KO1zdt9999ihhx4af3zCCSfEDjnkkNhXX30VP7Zz587Y4YcfHjvggAPix0aMGBFr1qxZ7I033ogf++STT2Lt2rWz77lq1ar48X322cce++tf/1rnva+//vpYmzZtYjU1NXWOjxkzJta8efPY+++/bx///e9/t9//8MMP13meXi/V8WS9e/eO7b333rHPP/88fuz555+336tzS6Rj48ePr3N9qqqqGnz9U045pd7ryJw5c+zr7bfffrHNmzen/JruHcccc4w9dvPNN8ePbd261Z7/XnvtFdu2bVud/6+J1zjda6Y7N32vnqvXSrxOeh/9f3QsW7YsVlRUFDvvvPPix3R99L0XXHBBndc8/fTTY+3bt2/wWgHID6edSHUrKSmJP2/GjBn22O9+97v4se3bt8eOOuqoem2EYkOXLl1iX375ZfzYSy+9VK8tddNmf/bZZ/Y5EydObPDnU3upWzqPPvqofZ1f//rX8WMXXnihjQW1tbV1nvujH/3ItvXJ7XSipUuX2te77LLL6hw/++yz68WN5DZ6+vTpjcbijz/+uN7rOCorK+3XFBtTfS3x2jtte6tWrWIffPBB/Phrr71mj48aNarRa5j8mg2dmxMPkq/T8OHD6zzvyiuvtMdffPHFep8N5s6dGz+2fv16+/t5xRVXpLlSSMZSKPiKRrCd6lCffvqpHYHRSLSOaZRLN43Sa6Tk3//+t51dkL/+9a9mwIABpnfv3vHX0tIpZ3o2mUa29RrJU+padrPnnnvG30u3gQMHmh07dpi5c+fGn6cpei1BSnyeRjR0/nPmzEn782nmQyNWGgnRazj0WpqBaIxGljTasmbNGtNUeu9M80m0VlUjfQ7NVOixRry0RCpXnOukkSL9f3RoVErXatasWfW+R6ObifT/Ur8rGvkD4A+33XabHe1PvGmU3qG/bbU7GpF2KD9AswWJ1AZqtPu8886rM/OpWWfNYCRy02arrVS7p+VLmS51TbZ8+XI7w6uZGC1DFY0badZ98ODB9t+J56XYpJkdZ4lrKk4bqNmXRJkU7VAckWeeecYWUWmqxP9HjdEqg8SZJC016t+/f8q23EvO648ePbrOcc1cOLNeiRSHFTsSZ0i0ouE///lPTs8zTFgKBV/R1LGWrzhTkmpwlXuhWyr6gKvGSuv21bHItNJIqqok6qgoT8NZv5nqvZznqdF3zjPd81LRecoBBxxQ72tqvBoKJPK73/3OdgyUA6KgqGVKCqxaLpSpVD97OioHrGVlib7zne/Ye02daxo/F5zrpGuSTEsjlB/jJAs6tHQtkTqIog8Dbdu2zcl5AsiOPlA2lLytv33lMCQvk0xuC5w2IlUbr2OJbambNlvLabWcUh9EtWRHbZ7W4avd7dSpk2mMBja0XEdxSsuBnPX/H3/8sc0bufPOO+0t2/PSz6+iG8obSJSqzUymzpeWdylHQcuzlNegD/5aUpVp5Sh1/jJZvutIFfMUS7S8Npec65T8e6L/d+pgOb9H6eKIE0ua2qmMIjoW8A2tvVfj7zQATsK0cgGSZxfclihMNWKv99OI1tVXX53ye5wP1HqeAtTDDz+c8nnpOiZe0OyNRlO0LlS5GhMnTrRBT2tjlY+QCa+rX6XbjEizPPmUrurJf1eUAYgqt222ZgE0s6CiGxrU0ECX1uhrRv3QQw9t8Hs166rZlYULF9YZ4HDi2znnnJMyf0RyVXpdbbZy9bR3yNNPP21/Js2o3HzzzfZYQ7lvDnVA9IHd6/NK1V57EUsy3TSPOOIeHQv4xoMPPmjvnU6EMwqvpDotR2qIKk1ohiNZqmPpaORHMyaNvZeep8RiJRFn+yFd5+mMoKVKKM+ERvMuu+wye9OIlpK2b7zxxnjHwstdRxUQk2cGampq7L2TtOjMDCRXbUkeCcrm3JzrlOqaqEqYKpkkz6QACD797c+ePdu2xYkfcJPbAqeNyKTdd9NmJ76GZi10U/utZbf6IJ5cxTDRb37zG9sZ0cDPgQceWOdrTiUnfWhuLOakop9fnRNVZkqcpcg0johmX3RT/NAeQ1o6rKqBSnb2evfqVDFPsSQx+V2xJNWSo+RYks25OddJ76/ZbocS5xWznN8jeIccC/iCRn5UrUjLdJy8CI0waYpWpWC15j6ZppId6ozMnz/frst3KEcj3QhVutkAvYZGb5KpAVIVDOd5CgY632R6TrqyiE6nQAFJFSo0O+PQOmOtw22I3jPxe5xrpOVKiWXz9IE7+XlNpZ/HKcUr27Zts48VFLUUS5ypeCcHxTnXVNP7mZ5b4nVKvJ4qT6iZGi0BAxA++ttWu6PKP4ntiSrCJVK7p/KyWl6UWH3p5ZdftrkXidy02aoWqCpGidTmqVOQXK40kToyyqdQNTwtM0o1Mq7lSMqzSC67mhzfUnEGkiZPnlzneGL1wnS0rCd5BN7JT3R+JqfKU0PXJhvqYDk5kaIZHOULJs6067pq4CjxZ1+2bJmt2pUom3NzYkXyddGGvJJJFS1khxkL5J0S9dR4qEHXqIE6FfpgrZED7U+RuFGaEv1Uzk7JeCoLq1kMfY86AFo6pUZHtHxJI0dayqQkP6fcrNZLqoORyQjHVVddZd9f62ed8nIarVeQ0rSxcgo0Uq71qUpg1lS4OjIq26dZFY2IKElQZfRUri4dfZ8aM/1cmn7W+SloqpRdcnnCREpg15pWvXavXr3saJ6C16JFi+zImUPnPW3aNJus1q9fP/s8TeM3hYK3llrpZ9dSML2ufmZ1GpzyjDpvjXqpLK9+FiVba9TL6YglyubctMxLQUe5Myr955SbVRJm4t4eAILX/idTuVS172oPNLOgMqhqd5RMqxH/VAMSEyZMsAnRer7KyOoDs0rIqsOR2Ja6abM1qn7CCSfYzonORbkFWoqqOKTdrNNReW4NwCi3IHlWQ3FK+Rqa0VDiuJKYFd/0+mpDlR+itl3/TkcdAb3HH/7wB3ttdP0005PJLL0GbPR9KteqD/OKLdoxW0u1nA/imtnR+ai9Vtuvdl3XVbem0LJlxTwlfKvzog/6KrubuPRY8VAf+DVQqDZfM/LaN0kxJrEIRzbnplippWaKWeqI6HdBnRpdA3X4VKYeHqtXJwrIU7nB4uLiWKdOnWInnnhi7Pe//33siy++SPl977zzji0vqufusssusc6dO8dOPfXU2OOPP17neSo1q5KEKg2nEoTV1dWxyZMn2/dau3ZtnZJyKnuaisoWjh07Nrb//vvb8+vQoYMtbXvTTTfFy6s67rzzzlifPn1sGb3ddtvNlj68+uqrY2vWrGn0WjzxxBOxgw46yJ5reXl57Mknn6xXUk8SS+qp1OtVV10V69Wrl30/lcbVv//whz/U+Z6NGzfakoN77LFHnbKLTvnXxx57rN75pCs3e/DBB8def/312IABA2ItW7a0rzV16tSU/48GDhxof56OHTvGxo0bF/vb3/5W7zXTnVuqcrPywgsvxI444gh7jdu2bRsbPHhwbPny5SnLC6oEYaJ0ZXAB+KvcbPLfvkpMn3vuufZvXmVX9W+176naiL/85S+xAw880LY9PXr0iD311FOxM844wx5L1pQ2W6VgVeJbr6c2V+fTv39/Wz42UXKp1IZ+1sQ2cd26dfb1u3btauOb4pzKrOtcG7Nly5bYyJEjbVltnZvax9WrVzdabnbJkiWxH//4x7GysjJ73VTWWzFVbX2iefPm2eulWJj4mopVer9U0pWbVblelS7Xz6n3VKxW+fBkDz30kC2JrvdUyfHnnnsuZWxMd27J5Wbl66+/jl133XWxfffd115jnYPifGIZ+4Y+GzRWShh1NdN/vO6sAH6hpDst3dHoVbqkLABAeGg0X7MFmgkHkF/kWCA0tFQmkfYwUEK4pl/pVABAuGgPhuQll9pvQktklZ8HIP+YsUCoRqkUTFT5Qetf77nnHlvVSOtOjz766EKfHgDAQ8rBUEUllWxVPphyN7QmX3lYSojWGn4A+UXyNkJDSWdKslaSlpK1VYZVnQs6FQAQPipPqoIQKtShSkIq2qHCGEqKplMBFAYzFgAAAABcI8cCAAAAgGt0LAAAAAC4RsfCA1pNps1bWFUGAEhEfAAQJXQsPKBdK1WFQvfZ0C7Po0aNsvcAgPBpanwAgCCiY+HCbbfdZreV79evX9bfq87EaaedZqZMmWLv6VwAQHi4iQ8AEFR0LFyoqqoyy5cvN4sWLcr6e+fMmWM3bduxY4e916Y+AIBwcBMfACCo6FgUyHHHHRfvVOieXUIBAAAQZGyQ53KqWzd1DLI1ZMgQM3PmTDtToU6FHgMAwsFNfACAoGKDPA+o4oeS8zZs2GDatm1b6NMBAPgE8QFAlLAUCgAAAIBrdCxcoOoHACAV4gOAKGIplAeY6gYApEJ8ABAlzFgAAAAAcI2OBQAAAADX6Fi4wBpaAEAqxAcAUUSOhQdYQwsA8DI+PPXUU2bOnDl2M1X2OQIQFMxYAADgI+pUnHbaaWbKlCn2Xo8BIAjoWAAA4COaqWjevLndtVv3L730UqFPCQAyQscCAAAf0fInp1Oh+2OPPbbQpwQAGWmR2dOQLjlPNzX8AAB4ER+UUzFz5kw7U6FOBTkWAIKC5G0PkLwNAEiF+AAgSlgKBQBAhCgZfNSoUSSFA/AcHYtvPPPMM6Z79+7mgAMOMHfffXehTwcA4BNhig9UnAKQS3QsjDHbt283o0ePNi+++KJ54403zMSJE80nn3xS6NMCABRY2OIDFacA5BIdC2PMwoULzcEHH2w6d+5sdt11V3PSSSeZ559/vtCnBQAosLDFBypOAcilUHQs5s6dawYPHmxKS0tNs2bNzIwZM+o9R9U5unXrZlq2bGn69+9vg4VjzZo1Nmg49O8PP/wwb+cPAMgN4kPqilMjR46091ScAuClUHQsNm3aZHr16mWDQyrTpk2zU9njx483S5Yssc8dNGiQWb9+fd7PFQCQP8SH+tSZuOWWW+hUAPBcKDoWmpq+4YYbzOmnn57y62pAL7roIjNs2DBTXl5u7rjjDtO6dWvzpz/9yX5dI1mJI1D6t46ls3XrVltCMPHmJ1T8AID/Ij4AQP6EomPRkG3btpnFixebgQMHxo8VFRXZx/Pnz7ePKyoqzJtvvmkDxsaNG82zzz5rR6zSqa6utnXJnVvXrl2NX1DxAwAyE7X44BaDVgBM1DsWtbW1NkGtY8eOdY7r8dq1a+2/W7RoYW6++Wab1Na7d29zxRVXmPbt26d9zbFjx9rNjpzb6tWrjV9Q8QMAMhO1+OAGg1YAMtEio2dFgNaaZrretKSkxN60Zlc3BSa/UPCbNGkSFT8AwCNhiQ9eD1qRowEgcjMWHTp0sI3gunXr6hzX406dOrl67aqqKrN8+XKzaNEi4xdU/ACAzEQtPrhBmVoAmQh9x6K4uNj06dPHzJ49O35s586d9vGAAQNcvbZGo5Ts169fP+MnVPwAgMZFMT40FYNWACKzFEoJdStXrow/XrVqlVm6dKlp166dKSsrs6UEKysrTd++fW0inpYKqQShqoAAAMKL+FCYJWEAoqlZLBaLmYDTWk9N0yZTsLjvvvvsv6dOnWomTpxoE/KUgDd58mS7EZIXVE5Q1T+UqNe2bVtPXhMA4B7xAQDyJxQdi0JJTM6rqakhcAAALOIDgCiiY+EBRqQAAKkQHwBESeiTtwEAAADkHh0LF8JW9QMA4A3iA4AoYimUB5jqBgCkEtX4oJ25tameEuepJAVEBzMWAADA007FaaedZqZMmWLv9RhANNCxcCFsU91q/EeNGkUQAACXwhYfsqGZCmeHbt2r5C+AaGAplAfCMNXtjDA5wYCdVQHAvTDEh2wRT4DoYsYCFiNMAAAvqBOhzsTIkSPpVAAR06LQJwB/UILdpEmT4p2LY489ttCnBAAIKHUm6FAA0UPHwqOdVcMywqSZCnUqCAgA0HRhig8AkClyLDwQxTW0AIDGER8ARAk5FgAAAABco2MBAAAAwDU6FgAAIK/YNwkIJzoWLkR5AyQAQHrEh/TYmRsILzoWLlRVVZnly5ebRYsWmahhtAkA0otyfGgM+yYB4UXHAlljtAkA4GbfJKdTwb5JQLjQsUDWGG0CADQVO3MD4UXH4hunn3662XPPPc2ZZ55Z6FPxPUabAEQJ8cF76kzccsstdCqAkKFj8Y2f//zn5oEHHij0aQQCo00AooT4AACZaZHh80JPo+4s6cmcOhN0KABEAfEBAEI0YzF37lwzePBgU1paapo1a2ZmzJiRsrRft27dTMuWLU3//v3NwoULC3KuAID8IT4AgH8EomOxadMm06tXLxscUpk2bZoZPXq0GT9+vFmyZIl97qBBg8z69evjz+ndu7fp0aNHvduaNWvy+JMAALxEfAgXSpkDwdYsFovFTIBoRGr69Olm6NCh8WMagdImRFOnTrWPd+7cabp27WpGjBhhxowZk/Fra6pbr/H44483+LytW7fam+OLL76w77dhwwbTtm3bJv1cYaTAoApSSvZm2RSAXCM+hKOUuVMYhBw+IHgCMWPRkG3btpnFixebgQMHxo8VFRXZx/Pnz8/Je1ZXV5vdd989flPQQF3sdQGg0IgPwUIpcyD4At+xqK2ttY1Qx44d6xzX47Vr12b8Ogo0P/jBD8ysWbNMly5dGgw6Y8eOtaNPN910k+nevbvZf//9Xf0MYUSAAFBoxIdgoZQ5EHxUhfrGCy+8kPFzS0pK7O2KK66wN011a2QKdQPEpEmTCBAAAo/4kN9S5hqIUsxgGRQQPIHvWHTo0MF+eF23bl2d43rcqVOnnL63kgV10wdn1EWAAFBoxIfgoZQ5EGyBXwpVXFxs+vTpY2bPnh0/puQ8PR4wYEBO37uqqsosX77cLFq0KKfvE1TsrAqgkIgPAJBfgZix2Lhxo1m5cmX88apVq8zSpUtNu3btTFlZmS0lWFlZafr27WsqKirsEhyVIBw2bFhOz4sRqexRKQqAl4gPAOAfgSg3q+U0+iCaTMHivvvus/9WGcCJEyfahDzVJJ88ebItM5gPzhpaygk2jFKCALxGfIgGBqWAYAhEx8KvEkekampqCByN0KZHKj/rVP0YOXKkXSoFAGFDfPAOg1JAcAQ+x6KQWEObHUoJAogK4oN3KF8OBAcdCxc0GlVeXm53dUXmlaI0U8GIE4AwIz54h0EpIDhYCuUB1tACAFIhPni3HIry5YD/0bHwAIHDPRLzAIQR8QFAlLAUygWmur1NzFNit+71GACCjPgAIIroWLhAcp43SMwDEDbEBwBRRMcCBUdiHgAgW5rdVhlzZrkB/yDHwgOsoXWPxDwAYUR8yA32tgD8iRkLF1hD6x0FBG2WlxwYGJECEETEh9xiCS3gT8xYeIARqdxgRApA0BEfcoP4APgTMxbwLUakAACpsOEq4E90LBCYpO6VK1eyJAoA0OASWgCFQ8cCvh+ROuWUU+zjWbNmsc8FAACAT9GxgO87F/vttx9LogAAaVHoA/AHOhYuUPUjP9jnAkDQEB/yn8g9ZcoUZrWBAqMqlAeo+pF77HMBIIiID7mnmQp1KpwBKCV0K/cCQP4xY4FAYJ8LAEAqzGoD/kHHwhizevVq2xBp2rpnz57mscceK/QpIQNMfwPINeKD/1F6FvAPlkIZYz766COzbt0607t3b7N27VrTp08fU1NTY9q0aZPR9zPVXRhMfwPINeIDAGSOGQtjzN57722DhnTq1Ml06NDBfPrpp4U+LWQ5/d2qVSuWRQHwFPEheFgiCxROIDoWc+fONYMHDzalpaWmWbNmZsaMGSkrcHTr1s20bNnS9O/f3yxcuLBJ77V48WL7IbVr164enDnyNf09btw4M2HCBJZFARFDfEAilsgChRWIjsWmTZtMr169bHBIZdq0aWb06NFm/PjxZsmSJfa5gwYNMuvXr48/RyNOPXr0qHdbs2ZN/DkahTrvvPPMnXfemZefC94ldW/evJm9LoAIIj4g0Zw5c4gFQAEFLsdCI1LTp083Q4cOjR/TCJRqhU+dOtU+3rlzpx1RGjFihBkzZkxGr7t161Zz4oknmosuusice+65jT5Xt8Q1tHo/1tAWfpTKCSgk8AHRQ3wAsQAorEDMWDRk27Ztdnp64MCB8WNFRUX28fz58zN6DfWtzj//fHP88cc3GjSkurraJuM5N6bF/VcVRFhjC0Qb8SF6qBAFFFbgOxa1tbV2VKJjx451juuxKnhk4tVXX7XT5Vqbqylx3f75z3+mff7YsWPt6JNzUzlC+GdZlLDGFgDxIZrS7XsEIPda5OE9fO/II4+00+OZKikpsTet6dVNgQv+XmNLgAHQFMQHAIjQjIVK/+nDo+qMJ9JjlQZE9FCGFoAQH+CgBC2QH4HvWBQXF9sNi2bPnh0/ptElPR4wYEBO37uqqsosX77cLFq0KKfvg/yVoSX4AOFBfIBQghbIn0B0LDZu3GiWLl1qb7Jq1Sr77/fff98+VinBu+66y9x///3mX//6l7n00kttCcJhw4bl9Lw0zV1eXm4rjsD/ZWiVtHnttdemDSoEHyB4iA9oDCVogTyKBcCcOXNUErferbKyMv6cKVOmxMrKymLFxcWxioqK2IIFC/J2fhs2bLDno3v4y8yZM+3/m6Kiojr3Op7s8ssvjzVv3tx+XfejRo0qyDkDyBzxAZnGAad9T9X+A/BG4Pax8JPE5LyamhrqlPuUZh40U7Fs2TK7DEIjVlom5VSQSnwe9c8BeIH44C9q3zVTceyxx9KuAzlEx8ID2gBJ9coJHP6V3GlQ7oWWSSnROzHIEHwAeIn44D9q57U8Krn9B+AeHQsPEDiCwek0qEqUErqZmQCQa8QHf2FmGsitQCRv+xXJecFP6M4kkY9KUQCyRXzwJxK5gdxixsIDjEiFc1lUqucyugUgG8QHf6FNB3KLnbcR2X0ukpdFTZo0qV6QYRdvAAhn+08uHeA9lkK5wFR3+JdFJe/irUAEAI0hPvi//dd9uqWuLIEFmoalUB5gqjvc0+KJlaKEaiIAMkV8CF77z3IpoOmYsUCkOdPi2tciXfBwRreEnbkBINyJ3CR4A01HxwKRlzgt3pDkYHPPPfcwVQ4AAZVuqStLYIGmYymUC+ysGq1NlJKnx4WpcgCpEB+CId1SVyHBG8geHQsPsIY2WmtvFWxWrlxpZs2aFR/V0lIqZ7kUADiID8FAXgXgDZZCASmkW2PrLJsaPnw4U+UAEBLkVQDeoGMBpNDYGtvkpG8h3wIAgom8CsAbLIXyAFPd4V9729CUOFPoANIhPoSvzQeQHjtve5Sch/BRYMkkuLA7N4BkxIfwtvkA0mPGwgOMSEVb8ozFuHHj7I7eiZvoJVeYAhANxAcAUULHwgMEDjhT6K1atTITJkyosyxKWCoFRBPxAUCUkLxtjPn8889N3759Te/evU2PHj3MXXfdVehTQsA41aI0U+F0IIqKisy1115r7r77bqqNAAFFfACAzDFjYYz9wLd161bTunVrs2nTJhs8Xn/9ddO+ffuMvp8RKSQvi1KnYufOnfF7YcYCCB7iA1JheSuQGjMW33zgU9AQBRD1tehvoSmcMrS9evWKdyr0+6XjlKYFgof4gHQDSFOmTLH3tOVAwDoWc+fONYMHDzalpaWmWbNmZsaMGfWeo+ob3bp1My1btjT9+/c3CxcuzHq6Wx8Gu3TpYq666irToUMHD38CRIk6EVoC5XQqNOJ54YUXxnfmJiAB3iE+IJfURicPBLGZHhDwjoWmn9WoKzikMm3aNDN69Ggzfvx4s2TJEvvcQYMGmfXr18ef46yPTb6tWbPGfn2PPfYwy5YtM6tWrTKPPPKIWbduXd5+PoRP8gZ6zlQ5AQnwFvEB+Z6ZYDM9IEQ5FhqRmj59uhk6dGj8mEag+vXrZ6ZOnWofa6S4a9euZsSIEWbMmDFZv8dll11mjj/+eHPmmWem/Lqmw3VLXEOr92MNLbwoTQugaYgP8JJmKtSpcDoRGihyZp7ZTA8I8IxFQ7Zt22YWL15sBg4cGD+mte16PH/+/IxeQ6NPX375pf23Gn9NrXfv3j3t86urq20ynnNT0ACynclQp0KlaVkWBeQG8QFuJM9MqJy4syzKqQSo+1TLpYCoCnzHora21v7Bd+zYsc5xPV67dm1Gr/Hee++Zo446yk6R614jWYccckja548dO9YGGOe2evVq1z8Hol2aNnFZFEEK8AbxAbkeCCKRG6irRdLjSKqoqDBLly7N+PklJSX2pjW9uilwAU0ZDZs0aVKd0TAFsqefftoe09coTQsUFvEh2tT+6qbBnuSBIB1PlTdHm40oC/yMhapz6I85OZlOjzt16pTT966qqjLLly83ixYtyun7IDqjYc8884z9GsndgHvEB3glXcI2idxAyDoWxcXFpk+fPmb27NnxY0rO0+MBAwbk9L01GlVeXm4TAwEvlkU5tRSUhEqQAtwhPiDXlf7SHQeiKhBLoTZu3GhWrlwZf6ySf5qabteunSkrK7OlBCsrK03fvn3ttLWWkKgE4bBhwwp63kBTl0WpLr/2vkgMUuz0CtRHfEC+l0VlehyIokCUm9VyEH2YSqZgcd9999l/q5TgxIkTbUKeapJPnjzZlhnMB5UTVPUPygnCjYbKFyaXqWVkDPgv4gP8jkEhREkgOhZ+lZicV1NTQ+BAQeqpA/Af4kO0OZ2J1q1b2/w5BoUQFXQsPMCIFHKF4AQEG/EhepJnmLV3inJ7GBRCFLjOsVB1DSXCLVmyxP77s88+M3vuuaetE66kOe1QmlxDHEDTdunesmULO70iUIgRiJrEErSJnQoKciAKmtSx+Prrr820adPsNO/ChQvtsVQTH6psI1rLqtJ7Z511ltlll11MWFCnHLmUXB9dnQpGuhAExAjiQ5QlF+NgUAhRkvVSqAcffNDuLPrRRx/ZQPGtb33Llu07+OCDTfv27e1Ur6Z8P/nkE/Pmm2+a+fPn238rgJSWlprq6mpzzjnnmDBhqhu50FDCdmIyoJAYCL8gRtRFfIimhopxAGGWVcdCwUGjT9p06Oyzzzbnn3++6dWrV6Pfp9J/9957r/nzn/9sA4hGp+bNm2fCgsCBfAan5A6HkHsBPyBG1Ed8ABAlWXUsFCw0EvWzn/3MlJSUZP1mW7dutWX+fvvb35ra2loTdFT9QKErRDlLSfRnTGIgCo0Y8X+ID9FEaVlEXVYdC428JDaM2oho3333zfpNk18n6BiRQj4xYwG/8qptD1OMID5EB/sNAVkmbyc3ij/4wQ/s+thsk+1oXIGmU6BSwHKWSIn+3apVKztS5mDUDPmW3LZv27bNFBcXu34dIIgFN9Qu0/4ialztY6EyaqrkoWUZjVmxYoU58MADTRgxIoVCYxYDfvTLX/7S/OEPf7C/g0cddVSdr2kXbP1+du7c2YQZ8SE6MpmxYKkUwq7IzTcrMU9B44knnmjwea+++qo58sgjTdho/Wx5ebnp169foU8FEZc4Uqa8C90SR82AQv1eqoZ/RUVFneNXXnml7VCUlZWZww47zPz73/82YUN8iO5ssnLd0nUq1PHQYKzu9RgIG1czFps3b7YbHKmsoDY/2m+//eo9Z8aMGbY6iOqa6xZGjEih0JixgB+pfOz+++9v5s6dGz+mErM9e/a0nd927drZKlD77LOP+ec//2l23XVXEzbEB6QqvJFcbIOZDISFqxmL1q1b202QtI5WGxvpPtHtt99u8zBEzwOQn5GyxH87AY3RMeSbOg3JS50eeeQRe3/rrbeajz/+2Fx//fXmvffeM1OnTi3QWQL5oU6D06lI3IWbmQyEiasZi8QOhHItLrnkErs0Sq655hrzm9/8xuyxxx72j+SII44wYcWIFIIyk6EdYDXTyKgY8kHton7XNHPt0L4WH3zwgVm3bp1p0aKFXSqlGQt1QBYsWGDChviAdHsTiWYp3nnnHTNr1qyUMxlAqKtCqQ/i1M1PdOmll5oXXnjB/PGPfzSHH364mT17trn//vttsHj22WdDm7QNBCn3QsUWJkyYYB9PmjSJJVLIOS2DWrZsWfzx6tWr7ZInzXCrUyH6vfzud79rXn755QKeKZAfanN1a2j5qtPpAEK/FMoZffrFL35hE7YVJBx/+tOfbCJeZWWl7VT07t3blqINc6eC5DwEaepdI8P6EEdSN/Ll5JNPNu+//76566677OMbb7zRDk4NGjSozvOUW7FlyxYTJsQHZFOaVp0Nlq8ickuhnGlr+43fzFx07NjRVvzo37+/2b59uxk/frz53ve+Zzsebdq0MVHAVDeCMPWufS6cGQuSupEPn376qf1wrVwKtY1qKxUXlFOx5557xp93zDHH2MpQa9asMWFDfEA2pWnZZA+R6lhoROmNN94wixYtsrfXX3/dBgPnJZzORrdu3ewojaa3dVPlqMQgEjYEDgR5fS/5Fsilt956y1x00UU2f0IFP7Rk9ic/+Un86yr60b59e9sBee2110zYEB+QSXvstMENVY4CIpG8rUZTHQyno6F7TX3bF0/Ix1C+xX/+8x8TRgQOBA2jYsg3FQ0oKSmxv3OJpk+fbs444wzz85//3FaKChviA7JB24xIJW+nooby+OOPtzeHpr0TOxoLFy60U99BCHwHHXSQLZF70003Ffp0gLwldV977bX2OAEMuaLZinQx5OKLL7YfpvyM+IB8lg53lq+qrXYww4zIlJvNhBK9u3btavxMJXJXrlxpzzObwMGIFII6KqZOhZPUrXtGx4DUiA/IJzY9RSQ3yMuG3zsVyhVZsWKFOemkkwp9KkDeRsW0p4DTqaBSFJAa8QGFnFXWsnLdqOiHIMhbx8KNuXPnmsGDB5vS0lL7x5W42VJiaT8ljbds2dJWqNLyq2xceeWVprq62sOzBvzfudASKKdToaClqXfKHKKplCeh3bbd0FJaJaxmiviAsJcK18IS3djnAqHrWBQiaMimTZvsyKqCQyrTpk0zo0ePtqVulyxZYp+rOunr16+PP0f7avTo0aPeTeUNNXL7ne98x96AKM5c6G9Su3KrHK0qkmgKns4FsqU2et999zVjx461o/zZePvtt81VV11lvv3tb5vbb7894+8jPiDsbbPuE/+duAxK7TSDQfCVWBaaN28e22233WJjxoyJ1dTUZPOtsRUrVsSuvPJK+/0tWrSINZVOefr06XWOVVRUxKqqquKPd+zYESstLY1VV1dn9Jr6ebp06RLbZ599Yu3bt4+1bds2dt1116V9/ldffRXbsGFD/LZ69Wp7Xvo3EFSXX365/RvX77LuhwwZYo/NnDmz0KeGgFiyZEnsiCOOiDVr1ixWVFRk//3//t//iz377LOxd999N/bll1/a5+lej2fNmhX71a9+FTvssMPs8/V9Rx55ZOyNN95o0vsTHxAlapud9lr3tNXwAxOkoJEqcGzdutX+USUHk/POO89+MMrWvffeG7viiisafM748ePteSTfCBwIU5AiYKGpHnvssdjhhx8ejxUN3fQc3RRPHn/8cVfvS3xAlAeDRo0aVehTAmJZlZs99NBDzSuvvGIef/xxW2983rx5Zv78+Y3NiNj7ww8/3E7XqV65l2pra+2aQ+0AnkiPlWyXC5rm19T6XXfdZW96f1ULAcJS5lC/z7NmzaqTLEgVEmTqzDPPtLelS5fafSqUiKrNVbVsyaEduLWBqtaSDx061C5H8hrxAWGmv51JkyaRe4HwlJstRNBQcp7eU68pWgPbuXNn28kZMGBA/HlXX321efnll/OykyvlBBH2UofKv1Adf2qoww39Dqmd3GOPPWyhAK8RHxA1qXbvBgK7QZ46C7pdd911eQkaqXTo0MF++Fm3bl2d43rcqVOnnL63kgV1c2pMA2HdpElJ3fo70+gYNdTRmHfffdcmSmvcav/997cJ084meek2yssF4gPC2plI3CyP9hiBLzeroPHkk0+aJ554wixbtix+XAFj7733zlunQoqLi02fPn3M7Nmz48dUPlOPE0eocqGqqsosX77c7i4OhI2C1S233GIHDJyZC93fc889VCFBSupIXHbZZbYzoR2qzzrrLDtzrR2rNUOQb8QHhHU2mep9CMWMhYKGGss777wznjshKsN3xx13mGOOOSYX52g2btxYZ43qqlWr7DKsdu3ambKyMruetbKy0vTt29dUVFTYUVUtyxo2bJjJJUakEMV1vApkzF4glT/+8Y82FjjLklq0aGG+/vprW0pWJV6fe+45z+ME8QFR3TgvOf/NmcnQIC9LV1Ew2WR633777fEKHqrmUVxcHH9cUlISe+mll3KSYT5nzpyUVTYqKyvjz5kyZUqsrKzMnpPKCy5YsCCWL6r2QdUPhJmqQqniyODBg6lCgrT69etnY8PYsWNjGzdujG3fvj329ttvx84//3wbJw444ADP35P4gChJV2LWOa6/v8R7Kvoh37JK3tZoz+LFi80vfvELc80119hdTN955x27I+n9999vp79rampMVCSOSOnnJjkPYUdSNxqy2267mf3226/OElnHOeecY/785z/bZUhRqF5DfEA+E7a1PFXLoxJnyNROa1M9LWkF8iWrjgVBIzWqfiCKQS0xqVvBbPDgwWb48OF0MCKsqKjI/OQnPzEPPvhgva8pmVvLkW644QbbIY0K4gPyOeijv0HlETn3DP7A1zkWWpfas2fPlF/TOtZHHnnElvWLSseCNbSIIqcKiUbInE6FPPPMM+bpp58m7yLidtlll5THu3fvHi8BGwXEBxSykt+WLVuo6IdgVIUiaPwfqn4gyjQCpg9NStIVTX46yYRAMu1tJPrAEwXEBxSqkt+NN96YsqIfbTN8W242lagFDSDqnBEyLYESdn+FaNM5jY6qOs2nn35a7+su9mQF0ITBH9pm+HqDPCdoaMMj3VTSL6pBg6luRJ2zLCoxmVC0TIo1vdH0r3/9y1xxxRXxx6WlpfF4IVFpL4kP8NPyKNpm+DJ5W8lA9pu+WfqQHDRUHercc8+1FaKihOQ8IHXVKNb0Rsvjjz9uKwfqpmTtxBmLxLihjVS1cZ2SuZ37vfbay4QR8QF+QNsMX3YsCBqpETgAU6/koQLYKaecYivJMUIWTe+99148Zjhxo7a2NmXc6Ny5s3n//fdN2BAf4Pe2WbR0kXYaee9YJCNo/BeBA0g9KiaMkCHR6tWr68QM3a9fv97GizAuGyI+wO9tc+K/aadR0I5F1IMGGyAB9Tn5FitXrjSzZs1i9gKN+vDDD22sCNPvBfEBQWibk6v6saEefNexiErQSMSIFFAfsxcA8QH+Q9uMwHcswo7AAWQ3e8GoGKKC+AA/Sq7k52ysxy7dcIuOhQcIHEDDqEiCqCI+IIht9Lhx4+hkoEnoWHiAwAFkN0JGoEJUEB8QtKpR2lpg586dDAShsDtvA0BDFJi0/IkABQD+3aVbnQp1LpzHGhACMkXHwgVV/CgvLzf9+vUr9KkAgZu90AiZ7oEwIj4giLt0K/9Ny6ASZyycPAwgEyyF8gBT3UDT1/IOHjzYDB8+nJkMhBLxAWFI7mYDPWSKjoUHCBxA09byiuqoqxliHS/CiPiAIKPwBrLFUqhvdOvWzfTs2dP07t07vsU9gNyt5U3emIl1vPAr4gOiSjMVTqeCdhqZoGORYN68eWbp0qX2DwlAbtfyagmUsI4XQUB8QNSTummnkYkWGT0LADzuXOhGCVoA8P9AUGK+hZazkm+BQM9YzJ07145ulpaW2uUTM2bMSFmBQ9PVLVu2NP379zcLFy7M6j30usccc4yt4PHwww97ePYAMilBS6UoNAXxAchPOy3Kt1COnO6vueYa2mwEc8Zi06ZNplevXuaCCy4w3//+9+t9fdq0aWb06NHmjjvusEFj0qRJZtCgQebtt982e+21l32O1sZu37693vc+//zzNiC98sorpnPnzuajjz4yAwcONIcccohdUwsgvwmC+vslQRCZIj4A+c+30D4XEyZMiLfZTnU/53nMaERX4KpCaeRo+vTpZujQofFjChYaSZo6dap9rPrLXbt2NSNGjDBjxozJ+j2uuuoqc/DBB5vzzz8/5de3bt1qb4lVP/R+VP0A3FeKUqA65ZRTzH777UdwQlaID0D+KkQ5O3QnVvcT5+vaD2Pz5s204xETiKVQDdm2bZtZvHixHUVy6Jddj+fPn5/xiNeXX35p/71x40bz4osv2sCRTnV1tS0f6NwUNAB4lyCoAOZMtzPNjqYiPgC53UQvsbqf/q1b4owG7Xj0BL5jUVtba3+JO3bsWOe4Hq9duzaj11i3bp058sgj7XT6YYcdZs4777wGd0sdO3asHX1ybqtXr3b9cwBRlhiwNKVOeUN4gfgA5Cbf4sYbb6xX3U+dC6d8uDod6lzQjkdPIHIsck1LLpYtW5bx80tKSuxNCYG6ORt9AfCmUtTTTz9NeUP4AvEByLy6n+jfrVq1iudg0I5HS+A7Fh06dLC/uBpVSqTHnTp1Kth5AfCmvCFrc9FUxAcgfx2MxMdOflND7bg6JCR6h09okrcrKirsWj7RFFxZWZn52c9+1qTkvGwpOU9raUnOA4DCIT4A/pTciUhOBKcSYHgEYsZCCXMrV66MP161apXdAbVdu3Y2QKiUYGVlpenbt68NICp9poS7YcOG5fS8mOoGco9RLTSE+AAEq5y4Er+fffbZejkYtO8hEQuAOXPmaFal3q2ysjL+nClTpsTKyspixcXFsYqKitiCBQvydn4bNmyw56N7AN6ZOXOm/dtq3ry5vR83blzs8ssvt8cBIT4A/qY222nDi4qKUt7TpodH4JZC+UniiFRNTQ1T3UAO97dwaqYzdY4gID4ADe9/oXttTjl+/Hja8hChY+EB1tAC+Q1IeqzStCp7CPgZ8QH4b1ueqloUA0ThQ8fCAwQOIP8BSfXThw8fTlCCrxEfgLoSy9PSfocPHQsXmOoG8h+Q7rnnHnvPiBf8jPgAZI4iHeFBx8IDjEgBhcm7YEkU/I74ADSM0rPhUlToEwCAbGhEy+lUsKMrAASbZiqc9twpPetFZ0WDULpHftGxcEHT3OXl5aZfv36FPhUgcjtza6ZC90IAgd8QH4DCDBY5MyCa2dY9sSG/WArlAaa6AX9MoWvjpc2bN7NOF75BfACyS+gWN/kWLJctLDoWHiBwAIXBPhfwO+IDkJ98CycBvHXr1pS0LaAWhXxzAHBDI1qTJk2qs89F4jpdggkABDvfIpN2PNXs9ZYtWyhpWwDkWLjAGlrAP/kWCiSJMxYkdaOQiA9A/vItkjsk6lRo+ZNiBInc+cVSKA8w1Q2Ee+MlaqyjqYgPgDfteEPtcLolVJSyzT86Fh4gcADhRWCCG8QHoOmyyZtI1SFJTuQ+5ZRTzH777ccgUQ6RYwEglLyaZWjqml8AgHeDOqly6JLb+eS2OTkPT8/Xv3WMQaLcIMcCQOh4WcecDfkAIP8SB3WSq/6pHc6knU/Mwxs8eLDnG/GhPjoWLpCcB/hT8izDPffc0+TkveQN+RjhQiaID4A7iYM66lSoQEdiO5zpjt16rhK5hw8fziBRHpBj4QHW0AL+nkIXN3XRWY+LpiI+ALkpyNGU/Dfn9Vq1apXVZqrEgszRsfAAgQPwHyeArFy50syaNSvjXVjZZAleIj4A3kr8kC/ZVgLMtENCLGgakre/sWrVKnPBBReYdevW2V+eBQsWmDZt2hT6tAA0kZPIp+Dw9NNPZzT9nUmyIKKH+AD4Q2Ib7SRgNzRQ1NSCHMSCpqNj8Y3zzz/f3HDDDeaoo44yn376qSkpKSn0KQHwgJMj0dColjMy9c477zSYLIhoIj4A/uBFlb7kSlGp2vbGEseRHh0LY8xbb71ldtllFxs0pF27doU+JQA5mr1QEnfiOtmG8jGULKgdXL3ecA/BQXwA/COTTkG2g02SHBeS34dYkIVYALz88suxU089Nbb33nsrHyQ2ffr0es+ZOnVqbJ999omVlJTEKioqYq+99lrGr6/XO+200+x7HHroobEbb7wxq/PbsGGDPS/dA/CnmTNn2r/T5s2b23s9lssvvzx+TPdDhgyJjRo1Kv51+BvxAYgWtc1etdHp4kK699G/FTOID+kFYsZi06ZNplevXnaN6/e///16X582bZoZPXq0ueOOO0z//v1tL3PQoEHm7bffNnvttZd9Tu/evc327dvrfe/zzz9vj//97383S5cutc//n//5H1si8MQTT8zLzwegMCVoncS8xBKEF154ISNSAUJ8AKIl1UZ4XseFVBvupcrvIFakEAuYVCNSGoGqqqqKP96xY0estLQ0Vl1dndFrzps3L/a9730v/vh3v/udvaXz1Vdf2dEn57Z69WpGpACfSx6ZSvz3uHHjmKUIAeIDAK/iQnI8SJ7dVsxAfYHfIG/btm1m8eLFZuDAgfFjSrTR4/nz52f0Ghp9Wr9+vfnss89sgs7cuXPNQQcdlPb51dXVtnygc+vatasnPwuA3GloB1atnVVlEUafwoX4AKAh2ezMnbhhH0nc6QW+Y1FbW2v/B3fs2LHOcT1eu3ZtRq/RokULW6P46KOPNj179jQHHHCAOfXUU9M+f+zYsbYmuXNbvXq1658DQO6xA2u0EB8AeBUXEjshLINKLxA5Fvlw0kkn2VsmVGpQt9tuu83enEoyAMJTghZwEB+A8MskLniZ3xFWge9YdOjQwfYutXFRIj3u1KlTwc4LgL8RIMKP+AAgG8QF9wK/FKq4uNj06dPHzJ49O35M62D1eMCAATl976qqKrN8+XKzaNGinL4PACB7xAcA+eDskfTUU0+ZqAvEjMXGjRvNypUr449XrVplS/9po6KysjJbSrCystL07dvXVFRU2DJgKkE4bNiwnJ4XU90AUFjEBwCFoE6EU7JceViUof2vZioNZXxO692UjZ9MweK+++6z/546daqZOHGiTchTTfLJkyfbmuX58MUXX9jqH0rUa9u2bV7eEwBAfACQf4l7WmjwQNXmNBvavHlzm9ytZHDneYn7YkRBIDoWfpU4IlVTU0PgAABYxAcgvLTsacqUKfU6FTt27IjPWCR3PqIykxH4HItCYg0tED2spUUmiA9AeCXuaaFOxbhx4+qVoZ2TtKt38r4YYRWIHAsA8IPEESjW0gJANGVSmva4446zcSJ5X4ywL4+iY+ECyXlAtKQagQpjYIB7xAcg2qVph6TofERhcIocCw+QnAdEQ1TXzKLpiA8AUuVmJCd6hwU5FgCQIWcEKnktLQAA2eRmJC6PChNmLFyg6gcAIBXiAwBHYl6FNJSbEXR0LDzAVDcAIBXiAxAtTyUlZ0dtCS1LoQAAAACXnvqmE6E8Ct07nYwolZ2lYwEAAAC4NCdFJyIKeRWJ6Fi4oPWz5eXlpl+/foU+FQCAjxAfgOg5LkUnImpFP8ix8ABraAEAqRAfgGh56qmnQp2c3Rg6Fh4gcADRFPYdVOEe8QFAlLAUCgA8StIDACDK6FgAQBNErdIHAACNoWPhAsl5QHRFrdIHskN8ABBF5Fh4gDW0QDRFPUkPjSM+AIgSOhYeIHAAAFIhPgCIEpZCAQAAAHCNjoUx5u233za9e/eO31q1amVmzJhR6NMCABQY8QEAMsdSqCQbN2403bp1M++9955p06ZNRt/DVDeAROxvEU7EBwBoWItGvh7JDwQnnHBCxkEDABI7E61btzYTJkyw1aImTZpkZs6cSeciJIgPABCCpVBz5841gwcPNqWlpaZZs2Ypp6FV2k8jSS1btjT9+/c3CxcubNJ7Pfroo+aHP/yhB2cNIIqb5alTUVRUxP4WeUJ8AAD/CETHYtOmTaZXr142OKQybdo0M3r0aDN+/HizZMkS+9xBgwaZ9evXx5+jtbE9evSod1uzZk2dKet58+aZk08+OS8/F4DwbZanTsXOnTvZ3yJPiA8AgjwoNWrUKHsfFoHLsdCI1PTp083QoUPjxzQCpU2Ipk6dah8rqHft2tWMGDHCjBkzJuPXfvDBB81zzz1nHnrooQaft3XrVntLDDh6P9bQAtGesXA6E+PGjTNbtmxhf4s8Iz4ACGrcmBmSZbOBz7HYtm2bWbx4sRk7dmz8mEYMBw4caObPn5/1NPfFF1/c6POqq6vNdddd16TzBRA+CgYKCmyW5y/EBwBBmOlu3ry5ueeee0JR9CMQS6EaUltba/+ndOzYsc5xPV67dm3Gr6PRJK271RR5YxSk9PybbrrJdO/e3ey///5NOncA4aFAcMsttwQ6IIQN8QGAXx133HHxToXuNYOhPD3NYgR5aVTgOxZeUTnAdevWmeLi4kafW1JSYqe0r7jiCrNixQo7IgYACCfiA4BczXSPHDnSFqBInL0IctGPwHcsOnToYP8nqNFPpMedOnXK6XsrWbC8vNyu3wUA+AvxAUAQZrqHDx9eZ/YiVdGPoCR6B75joRGkPn36mNmzZ8ePKTlPjwcMGJDT966qqjLLly83ixYtyun7AAiuoASDMCI+AAja7MXMFEnciSXN/b5UqkVQdjtduXJl/PGqVavM0qVLTbt27UxZWZktJVhZWWn69u1rKioq7KZUKkE4bNiwnI9I6abeJQA0VPWDzfJyg/gAIAyGDBmSNj4kJ3prqZRvY0ksAObMmaOSuPVulZWV8edMmTIlVlZWFisuLo5VVFTEFixYkLfz27Bhgz0f3QOA4/LLL481b97ctg+6HzVqVKFPKXSIDwDCbubMmfE4ons99qvA7WPhJ4kjUjU1NdQpBxCJOuVoHPEBgNfxJAglzelYeEAbIKlqCIEDQFCDAXKD+AAgSgKfvF1IVP0AkGnVDyGJOzqIDwCiiBkLDzAiBSCbJVHjxo0zmzdvDvwOq2gc8QFAlDBjAQA5lljRo6ioyEyYMCEQZQMBAMgGHQsXmOoGkAnNTDhlArWPgjoXYdhhFekRHwBEEUuhPMBUN4BMk7hbtWplZyyoFBUNxAcAbuKGZryDtGyWjoUHCBwAskGlqOggPgCIUrnyQOy8DQBR2WEVABBtTz31lLn22mvrLZsNQtygY+HRBkgAADiIDwDczFSoU5GYk6cZ7iBgKZQHmOoGAKRCfACQDe13pKqBThXB3r17m/HjxwditkKoCgUAAAD4sIrg+AB1KoSlUAAAAIAPDBkyxCZqB7XAB0uhPMBUNwAgFeIDgChhKRQA+ChpT+tr2Y0bABDEWMGMhUdVP2pqahiRAhC5muVIjfgAIIqxghkLF6qqqszy5cvNokWLCn0qAAJOu6s6gcKpWY7gIj4AiGKsoGMBAD6rBBKkmuUAgPw5zuexgo7FN2699VZz8MEHm/LycjNy5EjDCjEAhagEovbHb1PbUUd8AOAXQ3weK8ixMMZ8/PHH5rDDDjNvvfWW2WWXXczRRx9tbrrpJjNgwICMvp+qHwBysY5WU94anfJb4IgS4gMAZI59LL6xfft289VXX9l/f/3112avvfYq9CkBiKjE5LxJkyb5clQqSogPABCipVBz5841gwcPNqWlpaZZs2ZmxowZ9Z6j6hvdunUzLVu2NP379zcLFy7M+PW/9a1vmSuvvNKUlZXZ9xg4cKD59re/7fFPAQDhSM7zE+IDAPhHIDoWmzZtMr169bLBIZVp06aZ0aNH223PlyxZYp87aNAgs379+vhzevfubXr06FHvtmbNGvPZZ5+ZZ555xrz77rvmww8/NPPmzbPBCgAKwe/JeX5CfAAA/whcjoVGpKZPn26GDh0aP6YRqH79+pmpU6faxzt37jRdu3Y1I0aMMGPGjGn0NR977DE7IugEpokTJ9rkvKuvvjrl87du3WpviWto9X6soQXgVS6FjqtdUqeCZVCZIT4AQGEFYsaiIdu2bTOLFy+209OOoqIi+3j+/PkZvYYafY1CaQ2tRgcVRLp37572+dXV1TYZz7np+wGgqbkUU6ZMsfeJu6iqM3HLLbfQqXCB+AAA+RX4jkVtba1t7Dt27FjnuB6vXbs2o9dQxY+TTz7ZHHrooaZnz552/WxDwXzs2LF29Mm5rV692vXPASB6yKXILeIDAOQXVaG+ceONN9pbJkpKSuxNU+O6KXABQLa0/ElVnxrKpaDsbOERHwAgIh2LDh062KC8bt26Osf1uFOnTgU7LwDIdKOjdLkUlJ11h/gAAPkV+KVQxcXFpk+fPmb27NnxY0rO0+NMNzBqqqqqKrN8+XKzaNGinL4PgPBqKJeCpVLuEB8AIL8CMWOxceNGs3LlyvjjVatWmaVLl5p27drZ2uIqJVhZWWn69u1rKioq7MieShAOGzYsp+fFVDeAQi+VijriAwD4RyDKzWqUTgE2mYLFfffdZ/+tUoIqA6iEPNUknzx5si0zmA8qJ6jqH5QTBOA1ys42jPgAAP4RiI6FXyWOSNXU1BA4AAAW8QFAFNGx8AAjUgCAVIgPAKIk8MnbAAAAQFSXy44aNarOBquFRMfCBU1zl5eXm379+hX6VAAAPkJ8AJBrTknyKVOm2Hs/dC7oWLhAOUEAQCrEBwC55seS5HQsACAg/DblDQAonOOOOy7eqfBLSXKSt12g6geAfEnchVttDrtw+xvxAYDfSpLruZrlUIckV/GDjoUHqPoBINc0U6F1tM7o1MiRI+2O3fkKFmga4gOAKA1OsRQKAAI85Z2cvHfNNdewXAoAUJB8DDoWLlD1A0C+aGRJI0yaqUgcaUoMFkVFRWbChAm+qhASVcQHAFHMx2AplAc0xb3HHnuY1atXM9UNIK9mzZplfvzjH9tOxc6dO02zZs2MmnU9vuSSS0x1dXVWr7fbbrvZ14A3iA8A/BQv/v73v5ujjjrKnHzyyVl/fybxgY6FBz744APTtWvXQp8GALhGLoC3iA8AohQf6Fh4QKOEa9asqdOT0/R3Yv3yxMdK5lOgyfUIVvI55OL7Gntuuq9nczzdtfT7dczmewt5HSUf19LP1zHd1/x4HdOdl1ffx4xF7uODH36P/Pz3mOnxQl/Dhs7Vq+8jNnjzfdm2/Q19LUzXMNvfxRUrVjQaH1o0+kpolJYcdOnSpc4xrWFL/AVKfix6nMsGL9V7ev19jT033dezOd7YtfTrdczme/1wHXN9Lf18HdN9zY/XMd175vL74G188MPvkZ//HjM9Xuhr2NC5evV9xAZvvi/btr+hr4XpGmb7u5jJoBPJ2zncdbWhx4U4h1x8X2PPTff1bI4X+lq6eb9Mv5frWNjrmO5rfryObt6zEOeK1Ar9e+Tnv8dMjxf6Grp5T79cwyD/Lnr1WSUMcaHKB3/PDpZCFQB1zb3BdfQO19IbXEd4gd8j97iG3uA6uvdFxK4hMxYFUFJSYsaPH2/v0XRcR+9wLb3BdYQX+D1yj2voDa6jeyURu4bMWAAAAABwjRkLAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LH3rmmWdM9+7dzQEHHGDuvvvuQp9OYJ1++ulmzz33NGeeeWahTyWwtFPosccea8rLy03Pnj3NY489VuhTCqTPP//c9O3b1/Tu3dv06NHD3HXXXYU+JQQU8cEbxAd3iA3ufR7SuEBVKJ/Zvn27/UOdM2eOrXvcp08fM2/ePNO+fftCn1rgvPTSS+bLL780999/v3n88ccLfTqB9NFHH5l169bZhm/t2rX297Gmpsa0adOm0KcWKDt27DBbt241rVu3Nps2bbJB5PXXX+fvGlkhPniH+OAOscG9HSGNC8xY+MzChQvNwQcfbDp37mx23XVXc9JJJ5nnn3++0KcVSBpN2W233Qp9GoG2995728AhnTp1Mh06dDCffvppoU8rcJo3b26DhyiQaDyHMR1ki/jgHeKDO8QG95qHNC7QsfDY3LlzzeDBg01paalp1qyZmTFjRr3n3HbbbaZbt26mZcuWpn///jZYONasWWODhkP//vDDD03UuL2O8P46Ll682I6wdO3a1USNF9dR0969evUyXbp0MVdddZUNxIgW4oM3iA/uERvcIy6kRsfCY5rO0i+JfplSmTZtmhk9erTdhXHJkiX2uYMGDTLr16/P+7n6GdfRX9dRI1HnnXeeufPOO00UeXEd99hjD7Ns2TKzatUq88gjj9hlBIgW2jVvcB3dIza4R1xIQzkWyA1d3unTp9c5VlFREauqqoo/3rFjR6y0tDRWXV1tH7/66quxoUOHxr/+85//PPbwww/Hoqwp19ExZ86c2BlnnJG3cw3jdfzqq69iRx11VOyBBx7I6/mG8ffRcemll8Yee+yxnJ8r/Iv44A3ig3vEBveIC/+HGYs82rZtm50yHDhwYPxYUVGRfTx//nz7uKKiwrz55pt2envjxo3m2WeftT1cZHcd4c11VHt5/vnnm+OPP96ce+65BTzbYF9HjUIpUVQ2bNhgp9BV2QdwEB+8QXxwj9jg3rYIx4UWhT6BKKmtrbXrEDt27FjnuB6vWLHC/rtFixbm5ptvNscdd5zZuXOnufrqqwNfIaAQ11H0B6wpRk1Xav2iyuENGDCgAGcc3Ov46quv2ulclRN01o8++OCD5pBDDinIOQf1Or733nvm4osvjifnjRgxgmuIOogP3iA+uEdscK82wnGBjoUPDRkyxN7gzgsvvFDoUwi8I4880n6AgTsaaV66dGmhTwMhQHzwBvHBHWKDexUhjQsshcojZfurvFhyco4eq1wbMsN19AbX0RtcR3iB3yNvcB3d4xq61yHC15CORR4VFxfbTWRmz54dP6Yevx4zBZs5rqM3uI7e4DrCC/weeYPr6B7X0L3iCF9DlkJ5TAl1K1eujD9WCTFNdbVr186UlZXZ0mOVlZV2G3dNg02aNMmu8Rw2bFhBz9tvuI7e4Dp6g+sIL/B75A2uo3tcQ/e4hmkkVIiCB1S+Tpc1+VZZWRl/zpQpU2JlZWWx4uJiW45swYIFBT1nP+I6eoPr6A2uI7zA75E3uI7ucQ3d4xqm1kz/SdfpAAAAAIBMkGMBAAAAwDU6FgAAAABco2MBAAAAwDU6FgAAAABco2MBAAAAwDU6FgAAAABco2MBAAAAwDU6FgAAAABco2MBAAAAwDU6FoBPbd682bRo0cLsuuuuZseOHYU+HQAAgAbRsQB86o033rAdiu9+97umefPmhT4dAACABtGxAHxq0aJF9r5///6FPhUAAIBG0bEAfN6xqKioKPSpAAAANIqOBeBTr7/+esqOxaZNm8wFF1xgmjVrZpdJvfPOOwU6QwBAIX3yySfmV7/6lY0Fbdu2NcXFxWbvvfc2hx9+uLnmmmvMV199VehTRMQ0i8VisUKfBIC6NmzYYPbcc0+z1157mbVr18aP/+Mf/zA//OEPzYoVK8yll15qbr31VlNSUlLQcwUA5N9bb71lTjjhBLNu3TpTVlZmevXqZVq1amU+/PBDGyuKiorMZ599ZgehgHxpkbd3ApDVbIX6/ImzFbfffrsZPXq02WWXXcy0adPMWWedVdBzBAAUzkUXXWQ7FYoNP/3pT+t0ILZs2WIWLlxIpwJ5R8cC8Hni9ueff24uvPBC8+STT5revXubRx991BxwwAGFPkUAQIFoSez8+fPt8qdLLrmk3tc1c3HMMccU5NwQbeRYAD7Or1C5WXUm1Km4+OKLbSChUwEA0aYlsK1btzZffPGFna14//33C31KgEWOBeBD++yzT51Acf/995vzzjuvoOcEAPCPhx56yM5WaPZCDjnkEHPKKaeYs88+2/4bKARmLACfWb9+ve1UdOvWzZx44on22OzZswt9WgAAHznnnHPMe++9Z+6++27zgx/8wKxevdr85je/MT179rT5eEAh0LEAfJpf0bdvX/PEE0/YpVAPPPCAGTt2bKFPDQDgI+3bt7c5eMq9UyK3KgUqYVv3a9asKfTpIYLoWAA+za/o06eP2W233cysWbPs0iiNRN12222FPj0AgA9pD4vLL7/ctGvXLp6jB+QbHQvApzMW2vBItNnRs88+a/e1GDlypE3kBgBE0wsvvGDjwNatW+scV0fixhtvtJvmaca7a9euBTtHRBfJ24DPdOzY0eZZ1NbW2mluxyuvvBLPufjb3/5mjjzyyAKeJQCgEFQF6s477zRt2rSxA1BdunQxGzduNEuWLLGb46lDoRjRvXv3Qp8qIoiOBeAjStrWsiftoqqkvGTKudDGeLvvvrvtaJSXlxfkPAEAhfHiiy+amTNnmnnz5tmEbc1QaN+KAw880Jx22mlmxIgRdn8LoBDoWAAAAABwjRwLAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LAAAAAK7RsQAAAADgGh0LAAAAAMat/w8AwW8DanvXgQAAAABJRU5ErkJggg==",
281 |       "text/plain": [
282 |        "<Figure size 800x400 with 2 Axes>"
283 |       ]
284 |      },
285 |      "metadata": {},
286 |      "output_type": "display_data"
287 |     }
288 |    ],
289 |    "source": [
290 |     "plot_distributions(H, log_x1=True, log_y1=True, log_x2=True, log_y2=True)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": null,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": []
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": []
306 |   },
307 |   {
308 |    "cell_type": "code",
309 |    "execution_count": null,
310 |    "metadata": {},
311 |    "outputs": [],
312 |    "source": []
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "xgi",
318 |    "language": "python",
319 |    "name": "python3"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 3
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython3",
331 |    "version": "3.13.3"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 4
336 | }
337 | 


--------------------------------------------------------------------------------
/index.json:
--------------------------------------------------------------------------------
   1 | {
   2 |   "arxiv-kaggle": {
   3 |     "url": "https://zenodo.org/records/15808027/files/arxiv.json",
   4 |     "num-nodes": 1821977,
   5 |     "num-edges": 2765236,
   6 |     "num-unique-edges": 1986653,
   7 |     "min-edge-size": 1,
   8 |     "max-edge-size": 2811,
   9 |     "mean-edge-size": 4.61,
  10 |     "min-degree": 1,
  11 |     "max-degree": 2591,
  12 |     "mean-degree": 7.0,
  13 |     "num-components": 63744,
  14 |     "gc-size": 1694867
  15 |   },
  16 |   "coauth-dblp": {
  17 |     "url": "https://zenodo.org/records/13203175/files/coauth-DBLP.json",
  18 |     "data-type": "hypergraph",
  19 |     "num-nodes": 1930378,
  20 |     "num-edges": 3700681,
  21 |     "num-unique-edges": 2467389,
  22 |     "min-edge-size": 1,
  23 |     "max-edge-size": 280,
  24 |     "mean-edge-size": 2.79,
  25 |     "min-degree": 1,
  26 |     "max-degree": 1425,
  27 |     "mean-degree": 5.35,
  28 |     "num-components": 32,
  29 |     "gc-size": 1659954
  30 |   },
  31 |   "coauth-mag-geology": {
  32 |     "url": "https://zenodo.org/records/10928443/files/coauth-MAG-Geology.json",
  33 |     "data-type": "hypergraph",
  34 |     "num-nodes": 1261129,
  35 |     "num-edges": 1591166,
  36 |     "num-unique-edges": 1204704,
  37 |     "min-edge-size": 1,
  38 |     "max-edge-size": 284,
  39 |     "mean-edge-size": 2.8,
  40 |     "min-degree": 1,
  41 |     "max-degree": 1153,
  42 |     "mean-degree": 3.53,
  43 |     "num-components": 52,
  44 |     "gc-size": 903973
  45 |   },
  46 |   "coauth-mag-history": {
  47 |     "url": "https://zenodo.org/records/13151009/files/coauth-MAG-History.json",
  48 |     "data-type": "hypergraph",
  49 |     "num-nodes": 1034876,
  50 |     "num-edges": 1813147,
  51 |     "num-unique-edges": 896062,
  52 |     "min-edge-size": 1,
  53 |     "max-edge-size": 925,
  54 |     "mean-edge-size": 1.32,
  55 |     "min-degree": 1,
  56 |     "max-degree": 4348,
  57 |     "mean-degree": 2.32,
  58 |     "num-components": 75,
  59 |     "gc-size": 242048
  60 |   },
  61 |   "congress-bills": {
  62 |     "url": "https://zenodo.org/records/10928561/files/congress-bills.json",
  63 |     "data-type": "hypergraph",
  64 |     "num-nodes": 1718,
  65 |     "num-edges": 282049,
  66 |     "num-unique-edges": 105733,
  67 |     "min-edge-size": 1,
  68 |     "max-edge-size": 400,
  69 |     "mean-edge-size": 8.66,
  70 |     "min-degree": 3,
  71 |     "max-degree": 9375,
  72 |     "mean-degree": 1421.3,
  73 |     "num-components": 1,
  74 |     "gc-size": 1718
  75 |   },
  76 |   "contact-high-school": {
  77 |     "url": "https://zenodo.org/records/10155802/files/contact-high-school.json",
  78 |     "data-type": "hypergraph",
  79 |     "num-nodes": 327,
  80 |     "num-edges": 172035,
  81 |     "num-unique-edges": 7818,
  82 |     "min-edge-size": 2,
  83 |     "max-edge-size": 5,
  84 |     "mean-edge-size": 2.05,
  85 |     "min-degree": 7,
  86 |     "max-degree": 4495,
  87 |     "mean-degree": 1078.65,
  88 |     "num-components": 1,
  89 |     "gc-size": 327
  90 |   },
  91 |   "contact-primary-school": {
  92 |     "url": "https://zenodo.org/records/10155810/files/contact-primary-school.json",
  93 |     "data-type": "hypergraph",
  94 |     "num-nodes": 242,
  95 |     "num-edges": 106879,
  96 |     "num-unique-edges": 12704,
  97 |     "min-edge-size": 2,
  98 |     "max-edge-size": 5,
  99 |     "mean-edge-size": 2.1,
 100 |     "min-degree": 125,
 101 |     "max-degree": 2234,
 102 |     "mean-degree": 925.61,
 103 |     "num-components": 1,
 104 |     "gc-size": 242
 105 |   },
 106 |   "dawn": {
 107 |     "url": "https://zenodo.org/records/10155779/files/dawn.json",
 108 |     "data-type": "hypergraph",
 109 |     "num-nodes": 2558,
 110 |     "num-edges": 2272433,
 111 |     "num-unique-edges": 141087,
 112 |     "min-edge-size": 1,
 113 |     "max-edge-size": 16,
 114 |     "mean-edge-size": 1.58,
 115 |     "min-degree": 1,
 116 |     "max-degree": 479242,
 117 |     "mean-degree": 1406.1,
 118 |     "num-components": 269,
 119 |     "gc-size": 2290
 120 |   },
 121 |   "diseasome": {
 122 |     "url": "https://zenodo.org/records/10155812/files/diseasome.json",
 123 |     "data-type": "hypergraph",
 124 |     "num-nodes": 516,
 125 |     "num-edges": 903,
 126 |     "num-unique-edges": 481,
 127 |     "min-edge-size": 1,
 128 |     "max-edge-size": 11,
 129 |     "mean-edge-size": 1.72,
 130 |     "min-degree": 1,
 131 |     "max-degree": 41,
 132 |     "mean-degree": 3.0,
 133 |     "num-components": 1,
 134 |     "gc-size": 516
 135 |   },
 136 |   "email-enron": {
 137 |     "url": "https://zenodo.org/records/10155819/files/email-enron.json",
 138 |     "data-type": "hypergraph",
 139 |     "num-nodes": 148,
 140 |     "num-edges": 10885,
 141 |     "num-unique-edges": 1514,
 142 |     "min-edge-size": 1,
 143 |     "max-edge-size": 37,
 144 |     "mean-edge-size": 2.47,
 145 |     "min-degree": 0,
 146 |     "max-degree": 1327,
 147 |     "mean-degree": 181.85,
 148 |     "num-components": 6,
 149 |     "gc-size": 143
 150 |   },
 151 |   "email-eu": {
 152 |     "url": "https://zenodo.org/records/10155823/files/email-eu.json",
 153 |     "data-type": "hypergraph",
 154 |     "num-nodes": 1005,
 155 |     "num-edges": 235263,
 156 |     "num-unique-edges": 25148,
 157 |     "min-edge-size": 1,
 158 |     "max-edge-size": 40,
 159 |     "mean-edge-size": 2.39,
 160 |     "min-degree": 1,
 161 |     "max-degree": 8664,
 162 |     "mean-degree": 559.8,
 163 |     "num-components": 20,
 164 |     "gc-size": 986
 165 |   },
 166 |   "eventernote-events": {
 167 |     "url": "https://zenodo.org/records/11151063/files/eventernote-events.json",
 168 |     "data-type": "hypergraph",
 169 |     "num-nodes": 77343,
 170 |     "num-edges": 350460,
 171 |     "num-unique-edges": 141621,
 172 |     "min-edge-size": 1,
 173 |     "max-edge-size": 620,
 174 |     "mean-edge-size": 5.92,
 175 |     "min-degree": 0,
 176 |     "max-degree": 1951,
 177 |     "mean-degree": 26.84,
 178 |     "num-components": 18,
 179 |     "gc-size": 69885
 180 |   },
 181 |   "eventernote-places": {
 182 |     "url": "https://zenodo.org/records/11263394/files/eventernote-places.json",
 183 |     "data-type": "hypergraph",
 184 |     "num-nodes": 77343,
 185 |     "num-edges": 13993,
 186 |     "num-unique-edges": 11732,
 187 |     "min-edge-size": 1,
 188 |     "max-edge-size": 6420,
 189 |     "mean-edge-size": 50.98,
 190 |     "min-degree": 0,
 191 |     "max-degree": 421,
 192 |     "mean-degree": 9.22,
 193 |     "num-components": 9,
 194 |     "gc-size": 71698
 195 |   },
 196 |   "hospital-lyon": {
 197 |     "url": "https://zenodo.org/records/10155825/files/hospital-lyon.json",
 198 |     "data-type": "hypergraph",
 199 |     "num-nodes": 75,
 200 |     "num-edges": 27834,
 201 |     "num-unique-edges": 1824,
 202 |     "min-edge-size": 2,
 203 |     "max-edge-size": 5,
 204 |     "mean-edge-size": 2.09,
 205 |     "min-degree": 10,
 206 |     "max-degree": 3603,
 207 |     "mean-degree": 774.68,
 208 |     "num-components": 1,
 209 |     "gc-size": 75
 210 |   },
 211 |   "house-bills": {
 212 |     "url": "https://zenodo.org/records/10957691/files/house-bills.json",
 213 |     "data-type": "hypergraph",
 214 |     "num-nodes": 1494,
 215 |     "num-edges": 60987,
 216 |     "num-unique-edges": 54933,
 217 |     "min-edge-size": 2,
 218 |     "max-edge-size": 399,
 219 |     "mean-edge-size": 20.47,
 220 |     "min-degree": 1,
 221 |     "max-degree": 6220,
 222 |     "mean-degree": 835.79,
 223 |     "num-components": 1,
 224 |     "gc-size": 1494
 225 |   },
 226 |   "house-committees": {
 227 |     "url": "https://zenodo.org/records/10957702/files/house-committees.json",
 228 |     "data-type": "hypergraph",
 229 |     "num-nodes": 1290,
 230 |     "num-edges": 341,
 231 |     "num-unique-edges": 336,
 232 |     "min-edge-size": 1,
 233 |     "max-edge-size": 81,
 234 |     "mean-edge-size": 34.73,
 235 |     "min-degree": 1,
 236 |     "max-degree": 44,
 237 |     "mean-degree": 9.18,
 238 |     "num-components": 1,
 239 |     "gc-size": 1290
 240 |   },
 241 |   "hyperbard": {
 242 |     "url": "https://zenodo.org/records/11211879/files/hyperbard_collection_information.json",
 243 |     "data-type": "collection",
 244 |     "datasets": {
 245 |       "alls-well-that-ends-well": {
 246 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_alls-well-that-ends-well.json",
 247 |         "num-nodes": 32,
 248 |         "num-edges": 87,
 249 |         "num-unique-edges": 67,
 250 |         "min-edge-size": 1,
 251 |         "max-edge-size": 12,
 252 |         "mean-edge-size": 3.75,
 253 |         "min-degree": 1,
 254 |         "max-degree": 31,
 255 |         "mean-degree": 10.19,
 256 |         "num-components": 2,
 257 |         "gc-size": 31
 258 |       },
 259 |       "a-midsummer-nights-dream": {
 260 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_a-midsummer-nights-dream.json",
 261 |         "num-nodes": 28,
 262 |         "num-edges": 92,
 263 |         "num-unique-edges": 73,
 264 |         "min-edge-size": 1,
 265 |         "max-edge-size": 14,
 266 |         "mean-edge-size": 4.85,
 267 |         "min-degree": 1,
 268 |         "max-degree": 39,
 269 |         "mean-degree": 15.93,
 270 |         "num-components": 1,
 271 |         "gc-size": 28
 272 |       },
 273 |       "antony-and-cleopatra": {
 274 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_antony-and-cleopatra.json",
 275 |         "num-nodes": 87,
 276 |         "num-edges": 168,
 277 |         "num-unique-edges": 131,
 278 |         "min-edge-size": 1,
 279 |         "max-edge-size": 13,
 280 |         "mean-edge-size": 4.23,
 281 |         "min-degree": 1,
 282 |         "max-degree": 75,
 283 |         "mean-degree": 8.17,
 284 |         "num-components": 1,
 285 |         "gc-size": 87
 286 |       },
 287 |       "as-you-like-it": {
 288 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_as-you-like-it.json",
 289 |         "num-nodes": 30,
 290 |         "num-edges": 80,
 291 |         "num-unique-edges": 58,
 292 |         "min-edge-size": 1,
 293 |         "max-edge-size": 13,
 294 |         "mean-edge-size": 3.62,
 295 |         "min-degree": 1,
 296 |         "max-degree": 35,
 297 |         "mean-degree": 9.67,
 298 |         "num-components": 1,
 299 |         "gc-size": 30
 300 |       },
 301 |       "coriolanus": {
 302 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_coriolanus.json",
 303 |         "num-nodes": 77,
 304 |         "num-edges": 151,
 305 |         "num-unique-edges": 130,
 306 |         "min-edge-size": 1,
 307 |         "max-edge-size": 12,
 308 |         "mean-edge-size": 4.69,
 309 |         "min-degree": 1,
 310 |         "max-degree": 69,
 311 |         "mean-degree": 9.19,
 312 |         "num-components": 2,
 313 |         "gc-size": 75
 314 |       },
 315 |       "cymbeline": {
 316 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_cymbeline.json",
 317 |         "num-nodes": 47,
 318 |         "num-edges": 109,
 319 |         "num-unique-edges": 75,
 320 |         "min-edge-size": 1,
 321 |         "max-edge-size": 15,
 322 |         "mean-edge-size": 3.01,
 323 |         "min-degree": 1,
 324 |         "max-degree": 33,
 325 |         "mean-degree": 6.98,
 326 |         "num-components": 3,
 327 |         "gc-size": 41
 328 |       },
 329 |       "hamlet": {
 330 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_hamlet.json",
 331 |         "num-nodes": 41,
 332 |         "num-edges": 121,
 333 |         "num-unique-edges": 89,
 334 |         "min-edge-size": 1,
 335 |         "max-edge-size": 12,
 336 |         "mean-edge-size": 3.83,
 337 |         "min-degree": 1,
 338 |         "max-degree": 62,
 339 |         "mean-degree": 11.32,
 340 |         "num-components": 1,
 341 |         "gc-size": 41
 342 |       },
 343 |       "henry-iv-part-1": {
 344 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-iv-part-1.json",
 345 |         "num-nodes": 40,
 346 |         "num-edges": 99,
 347 |         "num-unique-edges": 74,
 348 |         "min-edge-size": 1,
 349 |         "max-edge-size": 8,
 350 |         "mean-edge-size": 3.48,
 351 |         "min-degree": 1,
 352 |         "max-degree": 50,
 353 |         "mean-degree": 8.62,
 354 |         "num-components": 2,
 355 |         "gc-size": 38
 356 |       },
 357 |       "henry-iv-part-2": {
 358 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-iv-part-2.json",
 359 |         "num-nodes": 60,
 360 |         "num-edges": 105,
 361 |         "num-unique-edges": 84,
 362 |         "min-edge-size": 1,
 363 |         "max-edge-size": 10,
 364 |         "mean-edge-size": 4.55,
 365 |         "min-degree": 1,
 366 |         "max-degree": 50,
 367 |         "mean-degree": 7.97,
 368 |         "num-components": 4,
 369 |         "gc-size": 56
 370 |       },
 371 |       "henry-viii": {
 372 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-viii.json",
 373 |         "num-nodes": 64,
 374 |         "num-edges": 74,
 375 |         "num-unique-edges": 65,
 376 |         "min-edge-size": 1,
 377 |         "max-edge-size": 12,
 378 |         "mean-edge-size": 4.61,
 379 |         "min-degree": 1,
 380 |         "max-degree": 23,
 381 |         "mean-degree": 5.33,
 382 |         "num-components": 3,
 383 |         "gc-size": 62
 384 |       },
 385 |       "henry-vi-part-1": {
 386 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-vi-part-1.json",
 387 |         "num-nodes": 81,
 388 |         "num-edges": 135,
 389 |         "num-unique-edges": 120,
 390 |         "min-edge-size": 1,
 391 |         "max-edge-size": 17,
 392 |         "mean-edge-size": 5.2,
 393 |         "min-degree": 1,
 394 |         "max-degree": 35,
 395 |         "mean-degree": 8.67,
 396 |         "num-components": 3,
 397 |         "gc-size": 78
 398 |       },
 399 |       "henry-vi-part-2": {
 400 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-vi-part-2.json",
 401 |         "num-nodes": 88,
 402 |         "num-edges": 130,
 403 |         "num-unique-edges": 115,
 404 |         "min-edge-size": 1,
 405 |         "max-edge-size": 14,
 406 |         "mean-edge-size": 6.34,
 407 |         "min-degree": 1,
 408 |         "max-degree": 57,
 409 |         "mean-degree": 9.36,
 410 |         "num-components": 2,
 411 |         "gc-size": 85
 412 |       },
 413 |       "henry-vi-part-3": {
 414 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-vi-part-3.json",
 415 |         "num-nodes": 67,
 416 |         "num-edges": 107,
 417 |         "num-unique-edges": 89,
 418 |         "min-edge-size": 1,
 419 |         "max-edge-size": 13,
 420 |         "mean-edge-size": 5.03,
 421 |         "min-degree": 1,
 422 |         "max-degree": 56,
 423 |         "mean-degree": 8.03,
 424 |         "num-components": 1,
 425 |         "gc-size": 67
 426 |       },
 427 |       "henry-v": {
 428 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_henry-v.json",
 429 |         "num-nodes": 58,
 430 |         "num-edges": 91,
 431 |         "num-unique-edges": 70,
 432 |         "min-edge-size": 1,
 433 |         "max-edge-size": 13,
 434 |         "mean-edge-size": 4.68,
 435 |         "min-degree": 1,
 436 |         "max-degree": 43,
 437 |         "mean-degree": 7.34,
 438 |         "num-components": 2,
 439 |         "gc-size": 57
 440 |       },
 441 |       "julius-caesar": {
 442 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_julius-caesar.json",
 443 |         "num-nodes": 60,
 444 |         "num-edges": 103,
 445 |         "num-unique-edges": 81,
 446 |         "min-edge-size": 1,
 447 |         "max-edge-size": 16,
 448 |         "mean-edge-size": 4.6,
 449 |         "min-degree": 1,
 450 |         "max-degree": 63,
 451 |         "mean-degree": 7.9,
 452 |         "num-components": 1,
 453 |         "gc-size": 60
 454 |       },
 455 |       "king-john": {
 456 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_king-john.json",
 457 |         "num-nodes": 37,
 458 |         "num-edges": 67,
 459 |         "num-unique-edges": 59,
 460 |         "min-edge-size": 1,
 461 |         "max-edge-size": 18,
 462 |         "mean-edge-size": 4.84,
 463 |         "min-degree": 1,
 464 |         "max-degree": 31,
 465 |         "mean-degree": 8.76,
 466 |         "num-components": 1,
 467 |         "gc-size": 37
 468 |       },
 469 |       "king-lear": {
 470 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_king-lear.json",
 471 |         "num-nodes": 45,
 472 |         "num-edges": 124,
 473 |         "num-unique-edges": 94,
 474 |         "min-edge-size": 1,
 475 |         "max-edge-size": 12,
 476 |         "mean-edge-size": 3.85,
 477 |         "min-degree": 1,
 478 |         "max-degree": 45,
 479 |         "mean-degree": 10.6,
 480 |         "num-components": 1,
 481 |         "gc-size": 45
 482 |       },
 483 |       "loves-labors-lost": {
 484 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_loves-labors-lost.json",
 485 |         "num-nodes": 24,
 486 |         "num-edges": 64,
 487 |         "num-unique-edges": 48,
 488 |         "min-edge-size": 1,
 489 |         "max-edge-size": 17,
 490 |         "mean-edge-size": 6.39,
 491 |         "min-degree": 1,
 492 |         "max-degree": 36,
 493 |         "mean-degree": 17.04,
 494 |         "num-components": 1,
 495 |         "gc-size": 24
 496 |       },
 497 |       "macbeth": {
 498 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_macbeth.json",
 499 |         "num-nodes": 49,
 500 |         "num-edges": 114,
 501 |         "num-unique-edges": 76,
 502 |         "min-edge-size": 1,
 503 |         "max-edge-size": 10,
 504 |         "mean-edge-size": 3.54,
 505 |         "min-degree": 1,
 506 |         "max-degree": 59,
 507 |         "mean-degree": 8.24,
 508 |         "num-components": 1,
 509 |         "gc-size": 49
 510 |       },
 511 |       "measure-for-measure": {
 512 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_measure-for-measure.json",
 513 |         "num-nodes": 32,
 514 |         "num-edges": 100,
 515 |         "num-unique-edges": 76,
 516 |         "min-edge-size": 1,
 517 |         "max-edge-size": 17,
 518 |         "mean-edge-size": 3.86,
 519 |         "min-degree": 1,
 520 |         "max-degree": 50,
 521 |         "mean-degree": 12.06,
 522 |         "num-components": 1,
 523 |         "gc-size": 32
 524 |       },
 525 |       "much-ado-about-nothing": {
 526 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_much-ado-about-nothing.json",
 527 |         "num-nodes": 29,
 528 |         "num-edges": 72,
 529 |         "num-unique-edges": 56,
 530 |         "min-edge-size": 1,
 531 |         "max-edge-size": 13,
 532 |         "mean-edge-size": 4.17,
 533 |         "min-degree": 1,
 534 |         "max-degree": 32,
 535 |         "mean-degree": 10.34,
 536 |         "num-components": 1,
 537 |         "gc-size": 29
 538 |       },
 539 |       "othello": {
 540 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_othello.json",
 541 |         "num-nodes": 34,
 542 |         "num-edges": 117,
 543 |         "num-unique-edges": 90,
 544 |         "min-edge-size": 1,
 545 |         "max-edge-size": 13,
 546 |         "mean-edge-size": 4.12,
 547 |         "min-degree": 1,
 548 |         "max-degree": 66,
 549 |         "mean-degree": 14.18,
 550 |         "num-components": 2,
 551 |         "gc-size": 33
 552 |       },
 553 |       "pericles": {
 554 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_pericles.json",
 555 |         "num-nodes": 69,
 556 |         "num-edges": 96,
 557 |         "num-unique-edges": 78,
 558 |         "min-edge-size": 1,
 559 |         "max-edge-size": 11,
 560 |         "mean-edge-size": 3.76,
 561 |         "min-degree": 1,
 562 |         "max-degree": 30,
 563 |         "mean-degree": 5.23,
 564 |         "num-components": 3,
 565 |         "gc-size": 66
 566 |       },
 567 |       "richard-iii": {
 568 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_richard-iii.json",
 569 |         "num-nodes": 69,
 570 |         "num-edges": 135,
 571 |         "num-unique-edges": 122,
 572 |         "min-edge-size": 1,
 573 |         "max-edge-size": 12,
 574 |         "mean-edge-size": 4.5,
 575 |         "min-degree": 1,
 576 |         "max-degree": 82,
 577 |         "mean-degree": 8.81,
 578 |         "num-components": 3,
 579 |         "gc-size": 65
 580 |       },
 581 |       "richard-ii": {
 582 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_richard-ii.json",
 583 |         "num-nodes": 49,
 584 |         "num-edges": 77,
 585 |         "num-unique-edges": 72,
 586 |         "min-edge-size": 1,
 587 |         "max-edge-size": 16,
 588 |         "mean-edge-size": 5.35,
 589 |         "min-degree": 1,
 590 |         "max-degree": 34,
 591 |         "mean-degree": 8.41,
 592 |         "num-components": 1,
 593 |         "gc-size": 49
 594 |       },
 595 |       "romeo-and-juliet": {
 596 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_romeo-and-juliet.json",
 597 |         "num-nodes": 50,
 598 |         "num-edges": 129,
 599 |         "num-unique-edges": 92,
 600 |         "min-edge-size": 1,
 601 |         "max-edge-size": 15,
 602 |         "mean-edge-size": 3.91,
 603 |         "min-degree": 1,
 604 |         "max-degree": 53,
 605 |         "mean-degree": 10.08,
 606 |         "num-components": 2,
 607 |         "gc-size": 49
 608 |       },
 609 |       "the-comedy-of-errors": {
 610 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-comedy-of-errors.json",
 611 |         "num-nodes": 24,
 612 |         "num-edges": 54,
 613 |         "num-unique-edges": 41,
 614 |         "min-edge-size": 1,
 615 |         "max-edge-size": 16,
 616 |         "mean-edge-size": 3.96,
 617 |         "min-degree": 1,
 618 |         "max-degree": 22,
 619 |         "mean-degree": 8.92,
 620 |         "num-components": 1,
 621 |         "gc-size": 24
 622 |       },
 623 |       "the-merchant-of-venice": {
 624 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-merchant-of-venice.json",
 625 |         "num-nodes": 33,
 626 |         "num-edges": 76,
 627 |         "num-unique-edges": 62,
 628 |         "min-edge-size": 1,
 629 |         "max-edge-size": 10,
 630 |         "mean-edge-size": 3.75,
 631 |         "min-degree": 1,
 632 |         "max-degree": 27,
 633 |         "mean-degree": 8.64,
 634 |         "num-components": 1,
 635 |         "gc-size": 33
 636 |       },
 637 |       "the-merry-wives-of-windsor": {
 638 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-merry-wives-of-windsor.json",
 639 |         "num-nodes": 23,
 640 |         "num-edges": 147,
 641 |         "num-unique-edges": 99,
 642 |         "min-edge-size": 1,
 643 |         "max-edge-size": 13,
 644 |         "mean-edge-size": 3.5,
 645 |         "min-degree": 1,
 646 |         "max-degree": 53,
 647 |         "mean-degree": 22.39,
 648 |         "num-components": 1,
 649 |         "gc-size": 23
 650 |       },
 651 |       "the-taming-of-the-shrew": {
 652 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-taming-of-the-shrew.json",
 653 |         "num-nodes": 45,
 654 |         "num-edges": 119,
 655 |         "num-unique-edges": 103,
 656 |         "min-edge-size": 1,
 657 |         "max-edge-size": 14,
 658 |         "mean-edge-size": 5.32,
 659 |         "min-degree": 1,
 660 |         "max-degree": 50,
 661 |         "mean-degree": 14.07,
 662 |         "num-components": 2,
 663 |         "gc-size": 28
 664 |       },
 665 |       "the-tempest": {
 666 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-tempest.json",
 667 |         "num-nodes": 22,
 668 |         "num-edges": 62,
 669 |         "num-unique-edges": 39,
 670 |         "min-edge-size": 1,
 671 |         "max-edge-size": 15,
 672 |         "mean-edge-size": 4.73,
 673 |         "min-degree": 1,
 674 |         "max-degree": 39,
 675 |         "mean-degree": 13.32,
 676 |         "num-components": 1,
 677 |         "gc-size": 22
 678 |       },
 679 |       "the-two-gentlemen-of-verona": {
 680 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-two-gentlemen-of-verona.json",
 681 |         "num-nodes": 19,
 682 |         "num-edges": 70,
 683 |         "num-unique-edges": 44,
 684 |         "min-edge-size": 1,
 685 |         "max-edge-size": 9,
 686 |         "mean-edge-size": 2.5,
 687 |         "min-degree": 1,
 688 |         "max-degree": 33,
 689 |         "mean-degree": 9.21,
 690 |         "num-components": 1,
 691 |         "gc-size": 19
 692 |       },
 693 |       "the-winters-tale": {
 694 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_the-winters-tale.json",
 695 |         "num-nodes": 44,
 696 |         "num-edges": 71,
 697 |         "num-unique-edges": 63,
 698 |         "min-edge-size": 1,
 699 |         "max-edge-size": 12,
 700 |         "mean-edge-size": 4.77,
 701 |         "min-degree": 1,
 702 |         "max-degree": 31,
 703 |         "mean-degree": 7.7,
 704 |         "num-components": 2,
 705 |         "gc-size": 43
 706 |       },
 707 |       "timon-of-athens": {
 708 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_timon-of-athens.json",
 709 |         "num-nodes": 76,
 710 |         "num-edges": 105,
 711 |         "num-unique-edges": 88,
 712 |         "min-edge-size": 1,
 713 |         "max-edge-size": 15,
 714 |         "mean-edge-size": 5.91,
 715 |         "min-degree": 1,
 716 |         "max-degree": 49,
 717 |         "mean-degree": 8.17,
 718 |         "num-components": 1,
 719 |         "gc-size": 76
 720 |       },
 721 |       "titus-andronicus": {
 722 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_titus-andronicus.json",
 723 |         "num-nodes": 37,
 724 |         "num-edges": 83,
 725 |         "num-unique-edges": 72,
 726 |         "min-edge-size": 1,
 727 |         "max-edge-size": 19,
 728 |         "mean-edge-size": 6.06,
 729 |         "min-degree": 1,
 730 |         "max-degree": 46,
 731 |         "mean-degree": 13.59,
 732 |         "num-components": 1,
 733 |         "gc-size": 37
 734 |       },
 735 |       "troilus-and-cressida": {
 736 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_troilus-and-cressida.json",
 737 |         "num-nodes": 40,
 738 |         "num-edges": 138,
 739 |         "num-unique-edges": 103,
 740 |         "min-edge-size": 1,
 741 |         "max-edge-size": 16,
 742 |         "mean-edge-size": 3.83,
 743 |         "min-degree": 1,
 744 |         "max-degree": 49,
 745 |         "mean-degree": 13.22,
 746 |         "num-components": 2,
 747 |         "gc-size": 39
 748 |       },
 749 |       "twelfth-night": {
 750 |         "url": "https://zenodo.org/records/11211879/files/hyperbard_twelfth-night.json",
 751 |         "num-nodes": 22,
 752 |         "num-edges": 96,
 753 |         "num-unique-edges": 71,
 754 |         "min-edge-size": 1,
 755 |         "max-edge-size": 14,
 756 |         "mean-edge-size": 3.99,
 757 |         "min-degree": 1,
 758 |         "max-degree": 38,
 759 |         "mean-degree": 17.41,
 760 |         "num-components": 1,
 761 |         "gc-size": 22
 762 |       }
 763 |     }
 764 |   },
 765 |   "hypertext-conference": {
 766 |     "url": "https://zenodo.org/records/10206136/files/hypertext-conference.json",
 767 |     "data-type": "hypergraph",
 768 |     "num-nodes": 113,
 769 |     "num-edges": 19036,
 770 |     "num-unique-edges": 2434,
 771 |     "min-edge-size": 2,
 772 |     "max-edge-size": 6,
 773 |     "mean-edge-size": 2.05,
 774 |     "min-degree": 2,
 775 |     "max-degree": 1446,
 776 |     "mean-degree": 345.56,
 777 |     "num-components": 1,
 778 |     "gc-size": 113
 779 |   },
 780 |   "invs13": {
 781 |     "url": "https://zenodo.org/records/10206151/files/InVS13.json",
 782 |     "data-type": "hypergraph",
 783 |     "num-nodes": 92,
 784 |     "num-edges": 9644,
 785 |     "num-unique-edges": 787,
 786 |     "min-edge-size": 2,
 787 |     "max-edge-size": 4,
 788 |     "mean-edge-size": 2.01,
 789 |     "min-degree": 5,
 790 |     "max-degree": 1089,
 791 |     "mean-degree": 210.65,
 792 |     "num-components": 1,
 793 |     "gc-size": 92
 794 |   },
 795 |   "invs15": {
 796 |     "url": "https://zenodo.org/records/10206154/files/InVS15.json",
 797 |     "data-type": "hypergraph",
 798 |     "num-nodes": 232,
 799 |     "num-edges": 73822,
 800 |     "num-unique-edges": 4909,
 801 |     "min-edge-size": 2,
 802 |     "max-edge-size": 4,
 803 |     "mean-edge-size": 2.03,
 804 |     "min-degree": 0,
 805 |     "max-degree": 3192,
 806 |     "mean-degree": 646.33,
 807 |     "num-components": 16,
 808 |     "gc-size": 217
 809 |   },
 810 |   "kaggle-whats-cooking": {
 811 |     "url": "https://zenodo.org/records/10157609/files/kaggle-whats-cooking.json",
 812 |     "data-type": "hypergraph",
 813 |     "num-nodes": 6714,
 814 |     "num-edges": 39774,
 815 |     "num-unique-edges": 39243,
 816 |     "min-edge-size": 1,
 817 |     "max-edge-size": 65,
 818 |     "mean-edge-size": 10.77,
 819 |     "min-degree": 1,
 820 |     "max-degree": 18048,
 821 |     "mean-degree": 63.78,
 822 |     "num-components": 1,
 823 |     "gc-size": 6714
 824 |   },
 825 |   "malawi-village": {
 826 |     "url": "https://zenodo.org/records/10206147/files/malawi-village.json",
 827 |     "data-type": "hypergraph",
 828 |     "num-nodes": 86,
 829 |     "num-edges": 99942,
 830 |     "num-unique-edges": 432,
 831 |     "min-edge-size": 2,
 832 |     "max-edge-size": 4,
 833 |     "mean-edge-size": 2.01,
 834 |     "min-degree": 12,
 835 |     "max-degree": 7636,
 836 |     "mean-degree": 2338.01,
 837 |     "num-components": 2,
 838 |     "gc-size": 84
 839 |   },
 840 |   "ndc-classes": {
 841 |     "url": "https://zenodo.org/records/10155772/files/ndc-classes.json",
 842 |     "data-type": "hypergraph",
 843 |     "num-nodes": 1161,
 844 |     "num-edges": 49726,
 845 |     "num-unique-edges": 1090,
 846 |     "min-edge-size": 1,
 847 |     "max-edge-size": 39,
 848 |     "mean-edge-size": 3.14,
 849 |     "min-degree": 1,
 850 |     "max-degree": 5358,
 851 |     "mean-degree": 134.58,
 852 |     "num-components": 183,
 853 |     "gc-size": 628
 854 |   },
 855 |   "ndc-substances": {
 856 |     "url": "https://zenodo.org/records/10929019/files/NDC-substances.json",
 857 |     "data-type": "hypergraph",
 858 |     "num-nodes": 5556,
 859 |     "num-edges": 112919,
 860 |     "num-unique-edges": 10273,
 861 |     "min-edge-size": 1,
 862 |     "max-edge-size": 187,
 863 |     "mean-edge-size": 2.01,
 864 |     "min-degree": 1,
 865 |     "max-degree": 6693,
 866 |     "mean-degree": 40.95,
 867 |     "num-components": 13,
 868 |     "gc-size": 3414
 869 |   },
 870 |   "plant-pollinator-mpl-014": {
 871 |     "url": "https://zenodo.org/records/14561179/files/plant-pollinator-mpl-014.json",
 872 |     "num-nodes": 29,
 873 |     "num-edges": 81,
 874 |     "num-unique-edges": 49,
 875 |     "min-edge-size": 1,
 876 |     "max-edge-size": 10,
 877 |     "mean-edge-size": 2.21,
 878 |     "min-degree": 1,
 879 |     "max-degree": 40,
 880 |     "mean-degree": 6.17,
 881 |     "num-components": 2,
 882 |     "gc-size": 28
 883 |   },
 884 |   "plant-pollinator-mpl-015": {
 885 |     "url": "https://zenodo.org/records/13754154/files/plant-pollinator-mpl-015.json",
 886 |     "num-nodes": 131,
 887 |     "num-edges": 666,
 888 |     "num-unique-edges": 477,
 889 |     "min-edge-size": 1,
 890 |     "max-edge-size": 104,
 891 |     "mean-edge-size": 4.4,
 892 |     "min-degree": 1,
 893 |     "max-degree": 124,
 894 |     "mean-degree": 22.39,
 895 |     "num-components": 2,
 896 |     "gc-size": 130
 897 |   },
 898 |   "plant-pollinator-mpl-016": {
 899 |     "url": "https://zenodo.org/records/13754237/files/plant-pollinator-mpl-016.json",
 900 |     "num-nodes": 26,
 901 |     "num-edges": 179,
 902 |     "num-unique-edges": 93,
 903 |     "min-edge-size": 1,
 904 |     "max-edge-size": 17,
 905 |     "mean-edge-size": 2.3,
 906 |     "min-degree": 1,
 907 |     "max-degree": 86,
 908 |     "mean-degree": 15.85,
 909 |     "num-components": 1,
 910 |     "gc-size": 26
 911 |   },
 912 |   "plant-pollinator-mpl-021": {
 913 |     "url": "https://zenodo.org/records/14561215/files/plant-pollinator-mpl-021.json",
 914 |     "num-nodes": 91,
 915 |     "num-edges": 677,
 916 |     "num-unique-edges": 207,
 917 |     "min-edge-size": 1,
 918 |     "max-edge-size": 25,
 919 |     "mean-edge-size": 1.76,
 920 |     "min-degree": 1,
 921 |     "max-degree": 188,
 922 |     "mean-degree": 13.11,
 923 |     "num-components": 2,
 924 |     "gc-size": 90
 925 |   },
 926 |   "plant-pollinator-mpl-034": {
 927 |     "url": "https://zenodo.org/records/14561227/files/plant-pollinator-mpl-034.json",
 928 |     "num-nodes": 26,
 929 |     "num-edges": 128,
 930 |     "num-unique-edges": 63,
 931 |     "min-edge-size": 1,
 932 |     "max-edge-size": 21,
 933 |     "mean-edge-size": 2.44,
 934 |     "min-degree": 2,
 935 |     "max-degree": 47,
 936 |     "mean-degree": 12.0,
 937 |     "num-components": 2,
 938 |     "gc-size": 25
 939 |   },
 940 |   "plant-pollinator-mpl-044": {
 941 |     "url": "https://zenodo.org/records/14561246/files/plant-pollinator-mpl-044.json",
 942 |     "num-nodes": 110,
 943 |     "num-edges": 609,
 944 |     "num-unique-edges": 251,
 945 |     "min-edge-size": 1,
 946 |     "max-edge-size": 25,
 947 |     "mean-edge-size": 1.85,
 948 |     "min-degree": 1,
 949 |     "max-degree": 101,
 950 |     "mean-degree": 10.23,
 951 |     "num-components": 2,
 952 |     "gc-size": 107
 953 |   },
 954 |   "plant-pollinator-mpl-046": {
 955 |     "url": "https://zenodo.org/records/14561257/files/plant-pollinator-mpl-046.json",
 956 |     "num-nodes": 16,
 957 |     "num-edges": 44,
 958 |     "num-unique-edges": 42,
 959 |     "min-edge-size": 1,
 960 |     "max-edge-size": 16,
 961 |     "mean-edge-size": 6.32,
 962 |     "min-degree": 4,
 963 |     "max-degree": 30,
 964 |     "mean-degree": 17.38,
 965 |     "num-components": 1,
 966 |     "gc-size": 16
 967 |   },
 968 |   "plant-pollinator-mpl-049": {
 969 |     "url": "https://zenodo.org/records/13754332/files/plant-pollinator-mpl-049.json",
 970 |     "num-nodes": 37,
 971 |     "num-edges": 225,
 972 |     "num-unique-edges": 118,
 973 |     "min-edge-size": 1,
 974 |     "max-edge-size": 24,
 975 |     "mean-edge-size": 2.62,
 976 |     "min-degree": 1,
 977 |     "max-degree": 80,
 978 |     "mean-degree": 15.95,
 979 |     "num-components": 1,
 980 |     "gc-size": 37
 981 |   },
 982 |   "plant-pollinator-mpl-057": {
 983 |     "url": "https://zenodo.org/records/14561268/files/plant-pollinator-mpl-057.json",
 984 |     "num-nodes": 114,
 985 |     "num-edges": 883,
 986 |     "num-unique-edges": 319,
 987 |     "min-edge-size": 1,
 988 |     "max-edge-size": 37,
 989 |     "mean-edge-size": 2.17,
 990 |     "min-degree": 1,
 991 |     "max-degree": 119,
 992 |     "mean-degree": 16.84,
 993 |     "num-components": 1,
 994 |     "gc-size": 114
 995 |   },
 996 |   "plant-pollinator-mpl-062": {
 997 |     "url": "https://zenodo.org/records/13753744/files/plant-pollinator-mpl-062.json",
 998 |     "num-nodes": 456,
 999 |     "num-edges": 1044,
1000 |     "num-unique-edges": 1008,
1001 |     "min-edge-size": 1,
1002 |     "max-edge-size": 157,
1003 |     "mean-edge-size": 14.61,
1004 |     "min-degree": 17,
1005 |     "max-degree": 58,
1006 |     "mean-degree": 33.45,
1007 |     "num-components": 1,
1008 |     "gc-size": 456
1009 |   },
1010 |   "recipe-rec": {
1011 |     "url": "https://zenodo.org/records/14003376/files/recipe-rec.json",
1012 |     "num-nodes": 9271,
1013 |     "num-edges": 77733,
1014 |     "num-unique-edges": 74870,
1015 |     "min-edge-size": 1,
1016 |     "max-edge-size": 28,
1017 |     "mean-edge-size": 6.63,
1018 |     "min-degree": 1,
1019 |     "max-degree": 22956,
1020 |     "mean-degree": 55.57,
1021 |     "num-components": 4,
1022 |     "gc-size": 9265
1023 |   },
1024 |   "science-gallery": {
1025 |     "url": "https://zenodo.org/records/10206142/files/science-gallery.json",
1026 |     "data-type": "hypergraph",
1027 |     "num-nodes": 10972,
1028 |     "num-edges": 338765,
1029 |     "num-unique-edges": 52706,
1030 |     "min-edge-size": 2,
1031 |     "max-edge-size": 5,
1032 |     "mean-edge-size": 2.12,
1033 |     "min-degree": 1,
1034 |     "max-degree": 486,
1035 |     "mean-degree": 65.41,
1036 |     "num-components": 304,
1037 |     "gc-size": 410
1038 |   },
1039 |   "senate-bills": {
1040 |     "url": "https://zenodo.org/records/10957697/files/senate-bills.json",
1041 |     "data-type": "hypergraph",
1042 |     "num-nodes": 294,
1043 |     "num-edges": 29157,
1044 |     "num-unique-edges": 21721,
1045 |     "min-edge-size": 2,
1046 |     "max-edge-size": 99,
1047 |     "mean-edge-size": 7.96,
1048 |     "min-degree": 1,
1049 |     "max-degree": 3514,
1050 |     "mean-degree": 789.62,
1051 |     "num-components": 1,
1052 |     "gc-size": 294
1053 |   },
1054 |   "senate-committees": {
1055 |     "url": "https://zenodo.org/records/10957699/files/senate-committees.json",
1056 |     "data-type": "hypergraph",
1057 |     "num-nodes": 282,
1058 |     "num-edges": 315,
1059 |     "num-unique-edges": 301,
1060 |     "min-edge-size": 4,
1061 |     "max-edge-size": 31,
1062 |     "mean-edge-size": 17.17,
1063 |     "min-degree": 1,
1064 |     "max-degree": 63,
1065 |     "mean-degree": 19.18,
1066 |     "num-components": 1,
1067 |     "gc-size": 282
1068 |   },
1069 |   "sfhh-conference": {
1070 |     "url": "https://zenodo.org/records/10198859/files/sfhh-conference.json",
1071 |     "data-type": "hypergraph",
1072 |     "num-nodes": 403,
1073 |     "num-edges": 54305,
1074 |     "num-unique-edges": 10541,
1075 |     "min-edge-size": 2,
1076 |     "max-edge-size": 9,
1077 |     "mean-edge-size": 2.15,
1078 |     "min-degree": 2,
1079 |     "max-degree": 1960,
1080 |     "mean-degree": 289.42,
1081 |     "num-components": 1,
1082 |     "gc-size": 403
1083 |   },
1084 |   "tags-ask-ubuntu": {
1085 |     "url": "https://zenodo.org/records/10155835/files/tags-ask-ubuntu.json",
1086 |     "data-type": "hypergraph",
1087 |     "num-nodes": 3029,
1088 |     "num-edges": 271233,
1089 |     "num-unique-edges": 147222,
1090 |     "min-edge-size": 1,
1091 |     "max-edge-size": 5,
1092 |     "mean-edge-size": 2.71,
1093 |     "min-degree": 1,
1094 |     "max-degree": 21004,
1095 |     "mean-degree": 242.42,
1096 |     "num-components": 9,
1097 |     "gc-size": 3021
1098 |   },
1099 |   "tags-math-sx": {
1100 |     "url": "https://zenodo.org/records/10155845/files/tags-math-sx.json",
1101 |     "data-type": "hypergraph",
1102 |     "num-nodes": 1629,
1103 |     "num-edges": 822059,
1104 |     "num-unique-edges": 170476,
1105 |     "min-edge-size": 1,
1106 |     "max-edge-size": 5,
1107 |     "mean-edge-size": 2.19,
1108 |     "min-degree": 1,
1109 |     "max-degree": 71046,
1110 |     "mean-degree": 1105.98,
1111 |     "num-components": 3,
1112 |     "gc-size": 1627
1113 |   },
1114 |   "tags-stack-overflow": {
1115 |     "url": "https://zenodo.org/records/10155885/files/tags-stack-overflow.json",
1116 |     "data-type": "hypergraph",
1117 |     "num-nodes": 49998,
1118 |     "num-edges": 14458875,
1119 |     "num-unique-edges": 5537637,
1120 |     "min-edge-size": 1,
1121 |     "max-edge-size": 5,
1122 |     "mean-edge-size": 2.97,
1123 |     "min-degree": 1,
1124 |     "max-degree": 1457906,
1125 |     "mean-degree": 858.45,
1126 |     "num-components": 61,
1127 |     "gc-size": 49931
1128 |   },
1129 |   "threads-ask-ubuntu": {
1130 |     "url": "https://zenodo.org/records/10373311/files/threads-ask-ubuntu.json",
1131 |     "data-type": "hypergraph",
1132 |     "num-nodes": 125602,
1133 |     "num-edges": 192947,
1134 |     "num-unique-edges": 166999,
1135 |     "min-edge-size": 1,
1136 |     "max-edge-size": 14,
1137 |     "mean-edge-size": 1.8,
1138 |     "min-degree": 1,
1139 |     "max-degree": 2332,
1140 |     "mean-degree": 2.76,
1141 |     "num-components": 39187,
1142 |     "gc-size": 82075
1143 |   },
1144 |   "threads-math-sx": {
1145 |     "url": "https://zenodo.org/records/10373324/files/threads-math-sx.json",
1146 |     "data-type": "hypergraph",
1147 |     "num-nodes": 176445,
1148 |     "num-edges": 719792,
1149 |     "num-unique-edges": 595749,
1150 |     "min-edge-size": 1,
1151 |     "max-edge-size": 21,
1152 |     "mean-edge-size": 2.24,
1153 |     "min-degree": 1,
1154 |     "max-degree": 12511,
1155 |     "mean-degree": 9.13,
1156 |     "num-components": 23181,
1157 |     "gc-size": 152702
1158 |   },
1159 |   "threads-stack-overflow": {
1160 |     "url": "https://zenodo.org/records/10373328/files/threads-stack-overflow.json",
1161 |     "data-type": "hypergraph",
1162 |     "num-nodes": 2675969,
1163 |     "num-edges": 11305356,
1164 |     "num-unique-edges": 9705575,
1165 |     "min-edge-size": 1,
1166 |     "max-edge-size": 67,
1167 |     "mean-edge-size": 2.26,
1168 |     "min-degree": 1,
1169 |     "max-degree": 36365,
1170 |     "mean-degree": 9.56,
1171 |     "num-components": 364286,
1172 |     "gc-size": 2301086
1173 |   }
1174 | }
1175 | 


--------------------------------------------------------------------------------