├── tgx ├── io │ ├── __init__.py │ ├── write.py │ └── read.py ├── classes │ ├── __init__.py │ ├── .ipynb_checkpoints │ │ └── graph-checkpoint.py │ └── graph.py ├── data │ ├── __init__.py │ ├── tgb.py │ └── builtin.py ├── viz │ ├── __init__.py │ ├── TEA.py │ └── TET.py ├── utils │ ├── __init__.py │ ├── .ipynb_checkpoints │ │ ├── edgelist-checkpoint.py │ │ ├── graph_stat-checkpoint.py │ │ └── plotting_utils-checkpoint.py │ ├── plotting_utils.py │ ├── graph_utils.py │ └── stat.py └── __init__.py ├── py_tgx.egg-info ├── top_level.txt ├── dependency_links.txt ├── SOURCES.txt └── PKG-INFO ├── docs ├── io │ └── io.md ├── data │ ├── tgb.md │ └── builtin.md ├── viz │ ├── vis_tet.md │ └── vis_tea.md ├── classes │ └── graph.md ├── utils │ ├── graph_stats.md │ ├── graph_utils.md │ └── plotting_utils.md ├── 2023_TGX_logo.png ├── gallery │ ├── TEA │ │ ├── Enron.png │ │ ├── MOOC.png │ │ ├── UCI.png │ │ ├── CanParl.png │ │ ├── Flights.png │ │ ├── LastFM.png │ │ ├── Reddit.png │ │ ├── UNVote.png │ │ ├── USLegis.png │ │ ├── Contacts.png │ │ ├── SocialEvo.png │ │ ├── tgbl-coin.png │ │ ├── tgbl-wiki.png │ │ ├── tgbn-genre.png │ │ ├── tgbn-trade.png │ │ ├── tgbl-comment.png │ │ ├── tgbl-flight.png │ │ ├── tgbl-review.png │ │ └── tgbn-reddit.png │ ├── TET │ │ ├── Enron.png │ │ ├── MOOC.png │ │ ├── UCI.png │ │ ├── CanParl.png │ │ ├── Flights.png │ │ ├── LastFM.png │ │ ├── Reddit.png │ │ ├── UNVote.png │ │ ├── USLegis.png │ │ ├── Contacts.png │ │ ├── SocialEvo.png │ │ ├── tgbl-coin.png │ │ ├── tgbl-wiki.png │ │ ├── tgbn-genre.png │ │ ├── tgbn-trade.png │ │ ├── tgbl-comment.png │ │ ├── tgbl-flight.png │ │ ├── tgbl-review.png │ │ └── tgbn-reddit.png │ ├── degree │ │ ├── mooc_ave_degree_per_ts.png │ │ ├── uci_ave_degree_per_ts.png │ │ ├── UNvote_ave_degree_per_ts.png │ │ ├── enron_ave_degree_per_ts.png │ │ ├── lastfm_ave_degree_per_ts.png │ │ ├── reddit_ave_degree_per_ts.png │ │ ├── CanParl_ave_degree_per_ts.png │ │ ├── Contacts_ave_degree_per_ts.png │ │ ├── Flights_ave_degree_per_ts.png │ │ ├── SocialEvo_ave_degree_per_ts.png │ │ ├── USLegis_ave_degree_per_ts.png │ │ ├── tgbl-coin_ave_degree_per_ts.png │ │ ├── tgbl-wiki_ave_degree_per_ts.png │ │ ├── tgbl-flight_ave_degree_per_ts.png │ │ ├── tgbl-review_ave_degree_per_ts.png │ │ ├── tgbn-genre_ave_degree_per_ts.png │ │ ├── tgbn-reddit_ave_degree_per_ts.png │ │ ├── tgbn-trade_ave_degree_per_ts.png │ │ └── tgbl-comment_ave_degree_per_ts.png │ ├── node_edge │ │ ├── reddit_nodes_per_ts.png │ │ ├── mooc_node&edge_per_ts.png │ │ ├── uci_node&edge_per_ts.png │ │ ├── CanParl_node&edge_per_ts.png │ │ ├── Flights_node&edge_per_ts.png │ │ ├── UNvote_node&edge_per_ts.png │ │ ├── USLegis_node&edge_per_ts.png │ │ ├── enron_node&edge_per_ts.png │ │ ├── lastfm_node&edge_per_ts.png │ │ ├── reddit_node&edge_per_ts.png │ │ ├── Contacts_node&edge_per_ts.png │ │ ├── SocialEvo_node&edge_per_ts.png │ │ ├── tgbl-coin_node&edge_per_ts.png │ │ ├── tgbl-wiki_node&edge_per_ts.png │ │ ├── tgbl-comment_node&edge_per_ts.png │ │ ├── tgbl-flight_node&edge_per_ts.png │ │ ├── tgbl-review_node&edge_per_ts.png │ │ ├── tgbn-genre_node&edge_per_ts.png │ │ ├── tgbn-reddit_node&edge_per_ts.png │ │ └── tgbn-trade_node&edge_per_ts.png │ ├── uci.md │ ├── mooc.md │ ├── enron.md │ ├── lastfm.md │ ├── unvote.md │ ├── flight.md │ ├── uslegis.md │ ├── contact.md │ ├── tgbl-coin.md │ ├── socialevo.md │ ├── tgbn-genre.md │ ├── tgbn-trade.md │ ├── tgbl-flight.md │ ├── tgbl-wiki.md │ ├── tgbn-reddit.md │ ├── reddit.md │ ├── tgbl-comment.md │ ├── tgbl-review.md │ ├── canparl.md │ ├── 0-tet-tgb.md │ ├── 0-tea-tgb.md │ ├── 0-tet-builtin.md │ ├── 0-degree-tgb.md │ ├── 0-tea-builtin.md │ ├── 0-node_edge-tgb.md │ ├── 0-degree-builtin.md │ ├── 0-node_edge-builtin.md │ └── dataset.md ├── tutorials │ ├── toy_data.csv │ └── data_loader.ipynb ├── index.md └── contribute.md ├── imgs └── 2023_TGX_logo.png ├── pyproject.toml ├── requirements.txt ├── setup.py ├── setup.cfg ├── .github └── workflows │ └── ci.yml ├── full_requirements.txt ├── LICENSE ├── README.md ├── examples ├── data_viz.py ├── starting_example.py └── .ipynb_checkpoints │ └── test-checkpoint.py ├── .gitignore └── mkdocs.yml /tgx/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tgx/classes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tgx/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /py_tgx.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | tgx 2 | -------------------------------------------------------------------------------- /py_tgx.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/io/io.md: -------------------------------------------------------------------------------- 1 | # IO 2 | 3 | ::: tgx.io.read 4 | -------------------------------------------------------------------------------- /tgx/viz/__init__.py: -------------------------------------------------------------------------------- 1 | # from tgx.viz import TEA -------------------------------------------------------------------------------- /docs/data/tgb.md: -------------------------------------------------------------------------------- 1 | ## TGB Datasets 2 | ::: tgx.data.tgb -------------------------------------------------------------------------------- /docs/viz/vis_tet.md: -------------------------------------------------------------------------------- 1 | ## TET Plots 2 | 3 | ::: tgx.viz.TET -------------------------------------------------------------------------------- /docs/viz/vis_tea.md: -------------------------------------------------------------------------------- 1 | ## TEA Plots 2 | 3 | ::: tgx.viz.TEA 4 | -------------------------------------------------------------------------------- /docs/classes/graph.md: -------------------------------------------------------------------------------- 1 | ## Graph 2 | ::: tgx.classes.graph 3 | 4 | -------------------------------------------------------------------------------- /docs/data/builtin.md: -------------------------------------------------------------------------------- 1 | ## Builtin Datasets 2 | ::: tgx.data.builtin -------------------------------------------------------------------------------- /docs/utils/graph_stats.md: -------------------------------------------------------------------------------- 1 | ## Graph Stats 2 | ::: tgx.utils.stat 3 | 4 | 5 | -------------------------------------------------------------------------------- /docs/utils/graph_utils.md: -------------------------------------------------------------------------------- 1 | ## Graph Utils 2 | 3 | ::: tgx.utils.graph_utils -------------------------------------------------------------------------------- /docs/utils/plotting_utils.md: -------------------------------------------------------------------------------- 1 | ## Plotting Utils 2 | 3 | ::: tgx.utils.plotting_utils -------------------------------------------------------------------------------- /docs/2023_TGX_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/2023_TGX_logo.png -------------------------------------------------------------------------------- /imgs/2023_TGX_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/imgs/2023_TGX_logo.png -------------------------------------------------------------------------------- /docs/gallery/TEA/Enron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Enron.png -------------------------------------------------------------------------------- /docs/gallery/TEA/MOOC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/MOOC.png -------------------------------------------------------------------------------- /docs/gallery/TEA/UCI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/UCI.png -------------------------------------------------------------------------------- /docs/gallery/TET/Enron.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Enron.png -------------------------------------------------------------------------------- /docs/gallery/TET/MOOC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/MOOC.png -------------------------------------------------------------------------------- /docs/gallery/TET/UCI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/UCI.png -------------------------------------------------------------------------------- /docs/gallery/TEA/CanParl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/CanParl.png -------------------------------------------------------------------------------- /docs/gallery/TEA/Flights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Flights.png -------------------------------------------------------------------------------- /docs/gallery/TEA/LastFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/LastFM.png -------------------------------------------------------------------------------- /docs/gallery/TEA/Reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Reddit.png -------------------------------------------------------------------------------- /docs/gallery/TEA/UNVote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/UNVote.png -------------------------------------------------------------------------------- /docs/gallery/TEA/USLegis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/USLegis.png -------------------------------------------------------------------------------- /docs/gallery/TET/CanParl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/CanParl.png -------------------------------------------------------------------------------- /docs/gallery/TET/Flights.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Flights.png -------------------------------------------------------------------------------- /docs/gallery/TET/LastFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/LastFM.png -------------------------------------------------------------------------------- /docs/gallery/TET/Reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Reddit.png -------------------------------------------------------------------------------- /docs/gallery/TET/UNVote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/UNVote.png -------------------------------------------------------------------------------- /docs/gallery/TET/USLegis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/USLegis.png -------------------------------------------------------------------------------- /docs/gallery/TEA/Contacts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Contacts.png -------------------------------------------------------------------------------- /docs/gallery/TEA/SocialEvo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/SocialEvo.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbl-coin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-coin.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbl-wiki.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-wiki.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbn-genre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-genre.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbn-trade.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-trade.png -------------------------------------------------------------------------------- /docs/gallery/TET/Contacts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Contacts.png -------------------------------------------------------------------------------- /docs/gallery/TET/SocialEvo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/SocialEvo.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbl-coin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-coin.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbl-wiki.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-wiki.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbn-genre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-genre.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbn-trade.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-trade.png -------------------------------------------------------------------------------- /docs/tutorials/toy_data.csv: -------------------------------------------------------------------------------- 1 | time, source, destination 2 | 0,1,2 3 | 0,2,1 4 | 0,3,1 5 | 1,2,2 6 | 1,1,2 7 | 1,3,1 -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbl-comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-comment.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbl-flight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-flight.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbl-review.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-review.png -------------------------------------------------------------------------------- /docs/gallery/TEA/tgbn-reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-reddit.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbl-comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-comment.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbl-flight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-flight.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbl-review.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-review.png -------------------------------------------------------------------------------- /docs/gallery/TET/tgbn-reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-reddit.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /docs/gallery/degree/mooc_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/mooc_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/uci_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/uci_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/reddit_nodes_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/reddit_nodes_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/UNvote_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/UNvote_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/enron_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/enron_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/lastfm_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/lastfm_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/reddit_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/reddit_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/mooc_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/mooc_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/uci_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/uci_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/CanParl_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/CanParl_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/Contacts_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/Contacts_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/Flights_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/Flights_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/SocialEvo_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/SocialEvo_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/USLegis_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/USLegis_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbl-coin_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-coin_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbl-wiki_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-wiki_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/CanParl_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/CanParl_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/Flights_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/Flights_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/UNvote_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/UNvote_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/USLegis_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/USLegis_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/enron_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/enron_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/lastfm_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/lastfm_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/reddit_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/reddit_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbl-flight_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-flight_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbl-review_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-review_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbn-genre_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-genre_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbn-reddit_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-reddit_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbn-trade_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-trade_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/Contacts_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/Contacts_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/SocialEvo_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/SocialEvo_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbl-coin_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-coin_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbl-wiki_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-wiki_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/degree/tgbl-comment_ave_degree_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-comment_ave_degree_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbl-comment_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-comment_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbl-flight_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-flight_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbl-review_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-review_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbn-genre_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-genre_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbn-reddit_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-reddit_node&edge_per_ts.png -------------------------------------------------------------------------------- /docs/gallery/node_edge/tgbn-trade_node&edge_per_ts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-trade_node&edge_per_ts.png -------------------------------------------------------------------------------- /tgx/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from tgx.utils.graph_stat import * 2 | # from tgx.utils.graph_utils import * 3 | # from tgx.utils.plotting_utils import * 4 | 5 | # from . import * -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools>=69.1.0 2 | wheel>=0.42.0 3 | networkx>=3.2.1 4 | args>=0.1.0 5 | requests>=2.28.2 6 | matplotlib>=3.8.0 7 | pandas>=1.5.3 8 | numpy>=1.26.0 9 | seaborn>=0.13.0 10 | tqdm>=4.66.1 11 | scikit-learn>=1.3.1 12 | py-tgb>=0.9.2 13 | -------------------------------------------------------------------------------- /docs/gallery/uci.md: -------------------------------------------------------------------------------- 1 | ## UCI 2 | #### TEA Plot 3 | ![UCI](TEA/UCI.png){ width="400"} 4 | #### TET Plot 5 | ![UCI](TET/UCI.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![UCI](degree/uci_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![UCI](node_edge/uci_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/mooc.md: -------------------------------------------------------------------------------- 1 | 2 | ## MOOC 3 | #### TEA Plot 4 | ![image](TEA/MOOC.png){ width="400"} 5 | #### TET Plot 6 | ![image](TET/MOOC.png){ width="400"} 7 | 8 | #### Average degree over time 9 | ![image](degree/mooc_ave_degree_per_ts.png){ width="400"} 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/mooc_node&edge_per_ts.png){ width="400"} -------------------------------------------------------------------------------- /docs/gallery/enron.md: -------------------------------------------------------------------------------- 1 | ## Enron 2 | #### TEA Plot 3 | ![image](TEA/Enron.png){ width="400"} 4 | ### TET Plot 5 | ![image](TET/Enron.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/enron_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/enron_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/lastfm.md: -------------------------------------------------------------------------------- 1 | 2 | ## LastFM 3 | #### TEA Plot 4 | ![image](TEA/LastFM.png){ width="400"} 5 | #### TET Plot 6 | ![image](TEA/LastFM.png){ width="400"} 7 | #### Average degree over time 8 | ![image](degree/lastfm_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/lastfm_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/unvote.md: -------------------------------------------------------------------------------- 1 | ## UN Vote 2 | #### TEA Plot 3 | ![image](TEA/UNVote.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/UNVote.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/UNvote_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/UNvote_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/flight.md: -------------------------------------------------------------------------------- 1 | ## Flights 2 | #### TEA Plot 3 | ![image](TEA/Flights.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/Flights.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/Flights_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | 12 | #### Node and Edge over time 13 | ![image](node_edge/Flights_node&edge_per_ts.png){ width="400"} 14 | -------------------------------------------------------------------------------- /docs/gallery/uslegis.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## US Legis 4 | #### TEA Plot 5 | ![image](TEA/USLegis.png){ width="400"} 6 | #### TET Plot 7 | ![image](TET/USLegis.png){ width="400"} 8 | #### Average degree over time 9 | ![image](degree/USLegis_ave_degree_per_ts.png){ width="400"} 10 | 11 | 12 | #### Node and Edge over time 13 | ![image](node_edge/USLegis_node&edge_per_ts.png){ width="400"} 14 | -------------------------------------------------------------------------------- /docs/gallery/contact.md: -------------------------------------------------------------------------------- 1 | ## Contacts 2 | #### TEA Plot 3 | ![image](TEA/Contacts.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/Contacts.png){ width="400"} 6 | 7 | 8 | #### Average degree over time 9 | ![image](degree/Contacts_ave_degree_per_ts.png){ width="400"} 10 | 11 | 12 | #### Node and Edge over time 13 | ![image](node_edge/Contacts_node&edge_per_ts.png){ width="400"} 14 | -------------------------------------------------------------------------------- /docs/gallery/tgbl-coin.md: -------------------------------------------------------------------------------- 1 | ## tgbl-coin 2 | #### TEA Plot 3 | ![image](TEA/tgbl-coin.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbl-coin.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbl-coin_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbl-coin_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /tgx/__init__.py: -------------------------------------------------------------------------------- 1 | from tgx.classes.graph import Graph 2 | 3 | from tgx.data.builtin import builtin 4 | from tgx.data.tgb import tgb_data 5 | 6 | from tgx.io.read import read_csv 7 | from tgx.io.write import write_csv 8 | 9 | from tgx.viz.TEA import TEA 10 | from tgx.viz.TET import TET 11 | 12 | from tgx.utils.stat import * 13 | from tgx.utils.graph_utils import * 14 | 15 | -------------------------------------------------------------------------------- /docs/gallery/socialevo.md: -------------------------------------------------------------------------------- 1 | ## Social Evo 2 | #### TEA Plot 3 | ![image](TEA/SocialEvo.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/SocialEvo.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/SocialEvo_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/SocialEvo_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/tgbn-genre.md: -------------------------------------------------------------------------------- 1 | ## tgbn-genre 2 | #### TEA Plot 3 | ![image](TEA/tgbn-genre.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbn-genre.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbn-genre_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbn-genre_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/tgbn-trade.md: -------------------------------------------------------------------------------- 1 | ## tgbn-trade 2 | #### TEA Plot 3 | ![image](TEA/tgbn-trade.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbn-trade.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbn-trade_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbn-trade_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/tgbl-flight.md: -------------------------------------------------------------------------------- 1 | ## tgbl-flight 2 | #### TEA Plot 3 | ![image](TEA/tgbl-flight.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbl-flight.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbl-flight_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbl-flight_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/tgbl-wiki.md: -------------------------------------------------------------------------------- 1 | ## tgbl-wiki 2 | #### TEA Plot 3 | ![image](TEA/tgbl-wiki.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbl-wiki.png){ width="400"} 6 | 7 | 8 | 9 | #### Average degree over time 10 | ![image](degree/tgbl-wiki_ave_degree_per_ts.png){ width="400"} 11 | 12 | 13 | #### Node and Edge over time 14 | ![image](node_edge/tgbl-wiki_node&edge_per_ts.png){ width="400"} 15 | -------------------------------------------------------------------------------- /docs/gallery/tgbn-reddit.md: -------------------------------------------------------------------------------- 1 | ## tgbn-reddit 2 | #### TEA Plot 3 | ![image](TEA/tgbn-reddit.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbn-reddit.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbn-reddit_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbn-reddit_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/reddit.md: -------------------------------------------------------------------------------- 1 | ## Reddit 2 | #### TEA Plot 3 | ![image](TEA/Reddit.png){ width="400"} 4 | 5 | 6 | #### TET Plot 7 | ![image](TET/Reddit.png){ width="400"} 8 | 9 | #### Average degree over time 10 | ![image](degree/reddit_ave_degree_per_ts.png){ width="400"} 11 | 12 | #### Node and Edge over time 13 | ![image](node_edge/reddit_node&edge_per_ts.png){ width="400"} -------------------------------------------------------------------------------- /docs/gallery/tgbl-comment.md: -------------------------------------------------------------------------------- 1 | ## tgbl-comment 2 | #### TEA Plot 3 | ![image](TEA/tgbl-comment.png){ width="400"} 4 | #### TET Plot 5 | ![image](TET/tgbl-comment.png){ width="400"} 6 | 7 | #### Average degree over time 8 | ![image](degree/tgbl-comment_ave_degree_per_ts.png){ width="400"} 9 | 10 | 11 | #### Node and Edge over time 12 | ![image](node_edge/tgbl-comment_node&edge_per_ts.png){ width="400"} 13 | -------------------------------------------------------------------------------- /docs/gallery/tgbl-review.md: -------------------------------------------------------------------------------- 1 | 2 | ## tgbl-review 3 | #### TEA Plot 4 | ![image](TEA/tgbl-review.png){ width="400"} 5 | #### TET Plot 6 | ![image](TET/tgbl-review.png){ width="400"} 7 | 8 | #### Average degree over time 9 | ![image](degree/tgbl-review_ave_degree_per_ts.png){ width="400"} 10 | 11 | 12 | #### Node and Edge over time 13 | ![image](node_edge/tgbl-review_node&edge_per_ts.png){ width="400"} 14 | -------------------------------------------------------------------------------- /docs/gallery/canparl.md: -------------------------------------------------------------------------------- 1 | 2 | ## Can. Parl. 3 | #### TEA Plot 4 | ![Canadian Parliment](TEA/CanParl.png){ width="400"} 5 | #### TET Plot 6 | ![Canadian Parliment](TET/CanParl.png){ width="400"} 7 | #### Average degree over time 8 | ![Canadian Parliment](degree/CanParl_ave_degree_per_ts.png){ width="400"} 9 | #### Node and Edge over time 10 | ![Canadian Parliment](node_edge/CanParl_node&edge_per_ts.png){ width="400"} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | def readme(): 5 | with open("README.md") as f: 6 | return f.read() 7 | 8 | 9 | setup( 10 | name="py-tgx", 11 | version="0.4.0", 12 | description="Temporal Graph Visualization with TGX", 13 | url="https://github.com/ComplexData-MILA/TGX", 14 | keywords="Temporal Graph Visualization", 15 | license="MIT", 16 | packages=find_packages(), 17 | ) -------------------------------------------------------------------------------- /docs/gallery/0-tet-tgb.md: -------------------------------------------------------------------------------- 1 | ## TET Plots 2 | 3 | 4 | 5 | ### tgbl-wiki 6 | ![image](TET/tgbl-wiki.png){ width="400"} 7 | 8 | ### tgbl-review 9 | ![image](TET/tgbl-review.png){ width="400"} 10 | 11 | ### tgbl-coin 12 | ![image](TET/tgbl-coin.png){ width="400"} 13 | 14 | ### tgbl-comment 15 | ![image](TET/tgbl-comment.png){ width="400"} 16 | 17 | ### tgbl-flight 18 | ![image](TET/tgbl-flight.png){ width="400"} 19 | 20 | ### tgbn-trade 21 | ![image](TET/tgbn-trade.png){ width="400"} 22 | 23 | ### tgbn-genre 24 | ![image](TET/tgbn-genre.png){ width="400"} 25 | 26 | ### tgbn-reddit 27 | ![image](TET/tgbn-reddit.png){ width="400"} 28 | -------------------------------------------------------------------------------- /docs/gallery/0-tea-tgb.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | 4 | 5 | ### tgbl-wiki 6 | ![image](TEA/tgbl-wiki.png){ width="400"} 7 | 8 | ### tgbl-review 9 | ![image](TEA/tgbl-review.png){ width="400"} 10 | 11 | ### tgbl-coin 12 | ![image](TEA/tgbl-coin.png){ width="400"} 13 | 14 | 15 | ### tgbl-comment 16 | ![image](TEA/tgbl-comment.png){ width="400"} 17 | 18 | ### tgbl-flight 19 | ![image](TEA/tgbl-flight.png){ width="400"} 20 | 21 | ### tgbn-trade 22 | ![image](TEA/tgbn-trade.png){ width="400"} 23 | 24 | ### tgbn-genre 25 | ![image](TEA/tgbn-genre.png){ width="400"} 26 | 27 | ### tgbn-reddit 28 | ![image](TEA/tgbn-reddit.png){ width="400"} 29 | 30 | 31 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = py-tgx 3 | version = 0.2.2 4 | author = ["Razieh Shirzadkhani ", "shenyang Huang ", "Elahe Kooshafar", "Farimah Poursafaei"] 5 | author_email = razieh.shirzadkhani@gmail.com 6 | description = Temporal Graph Analysis project repo 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/fpour/TGX 10 | project_urls = 11 | Bug Tracker = https://github.com/fpour/TGX/issues 12 | classifiers = 13 | Programming Language :: Python :: 3.9 14 | 15 | [options] 16 | package_dir = 17 | = ./ 18 | packages = find: 19 | python_requires = >=3.6 20 | 21 | -------------------------------------------------------------------------------- /docs/gallery/0-tet-builtin.md: -------------------------------------------------------------------------------- 1 | ## TET Plots 2 | 3 | ### Reddit 4 | ![image](TET/Reddit.png){ width="400"} 5 | 6 | ### MOOC 7 | ![image](TET/MOOC.png){ width="400"} 8 | 9 | ### LastFM 10 | ![image](TET/LastFM.png){ width="400"} 11 | 12 | ### Enron 13 | ![image](TET/Enron.png){ width="400"} 14 | 15 | ### Social Evo 16 | ![image](TET/SocialEvo.png){ width="400"} 17 | 18 | ### UCI 19 | ![UCI](TET/UCI.png){ width="400"} 20 | 21 | ### Flights 22 | ![image](TET/Flights.png){ width="400"} 23 | 24 | ### Can. Parl. 25 | ![Canadian Parliment](TET/CanParl.png){ width="400"} 26 | 27 | ### US Legis 28 | ![image](TET/USLegis.png){ width="400"} 29 | 30 | ### UN Vote 31 | ![image](TET/UNVote.png){ width="400"} 32 | 33 | ### Contacts 34 | ![image](TET/Contacts.png){ width="400"} 35 | 36 | 37 | -------------------------------------------------------------------------------- /py_tgx.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE 2 | README.md 3 | pyproject.toml 4 | setup.cfg 5 | setup.py 6 | ./py_tgx.egg-info/PKG-INFO 7 | ./py_tgx.egg-info/SOURCES.txt 8 | ./py_tgx.egg-info/dependency_links.txt 9 | ./py_tgx.egg-info/top_level.txt 10 | ./tgx/__init__.py 11 | ./tgx/classes/__init__.py 12 | ./tgx/classes/graph.py 13 | ./tgx/data/__init__.py 14 | ./tgx/data/builtin.py 15 | ./tgx/data/tgb.py 16 | ./tgx/io/__init__.py 17 | ./tgx/io/read.py 18 | ./tgx/io/write.py 19 | ./tgx/utils/__init__.py 20 | ./tgx/utils/graph_utils.py 21 | ./tgx/utils/plotting_utils.py 22 | ./tgx/utils/stat.py 23 | ./tgx/viz/TEA.py 24 | ./tgx/viz/TET.py 25 | ./tgx/viz/__init__.py 26 | py_tgx.egg-info/PKG-INFO 27 | py_tgx.egg-info/SOURCES.txt 28 | py_tgx.egg-info/dependency_links.txt 29 | py_tgx.egg-info/top_level.txt 30 | tgx/__init__.py -------------------------------------------------------------------------------- /docs/gallery/0-degree-tgb.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | ## TEA plots 4 | 5 | 6 | ### tgbl-wiki 7 | ![image](degree/tgbl-wiki_ave_degree_per_ts.png){ width="400"} 8 | 9 | ### tgbl-review 10 | ![image](degree/tgbl-review_ave_degree_per_ts.png){ width="400"} 11 | 12 | ### tgbl-coin 13 | ![image](degree/tgbl-coin_ave_degree_per_ts.png){ width="400"} 14 | 15 | 16 | ### tgbl-comment 17 | ![image](degree/tgbl-comment_ave_degree_per_ts.png){ width="400"} 18 | 19 | ### tgbl-flight 20 | ![image](degree/tgbl-flight_ave_degree_per_ts.png){ width="400"} 21 | 22 | ### tgbn-trade 23 | ![image](degree/tgbn-trade_ave_degree_per_ts.png){ width="400"} 24 | 25 | ### tgbn-genre 26 | ![image](degree/tgbn-genre_ave_degree_per_ts.png){ width="400"} 27 | 28 | ### tgbn-reddit 29 | ![image](degree/tgbn-reddit_ave_degree_per_ts.png){ width="400"} 30 | 31 | 32 | -------------------------------------------------------------------------------- /docs/gallery/0-tea-builtin.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | ## TEA plots 4 | 5 | 6 | ### Reddit 7 | ![image](TEA/Reddit.png){ width="400"} 8 | 9 | ### MOOC 10 | ![image](TEA/MOOC.png){ width="400"} 11 | 12 | ### LastFM 13 | ![image](TEA/LastFM.png){ width="400"} 14 | 15 | ### Enron 16 | ![image](TEA/Enron.png){ width="400"} 17 | 18 | ### Social Evo 19 | ![image](TEA/SocialEvo.png){ width="400"} 20 | 21 | 22 | ### UCI 23 | ![UCI](TEA/UCI.png){ width="400"} 24 | 25 | ### Flights 26 | ![image](TEA/Flights.png){ width="400"} 27 | 28 | ### Can. Parl. 29 | ![Canadian Parliment](TEA/CanParl.png){ width="400"} 30 | 31 | ### US Legis 32 | ![image](TEA/USLegis.png){ width="400"} 33 | 34 | ### UN Vote 35 | ![image](TEA/UNVote.png){ width="400"} 36 | 37 | ### Contacts 38 | ![image](TEA/Contacts.png){ width="400"} 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | permissions: 8 | contents: write 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v4 15 | with: 16 | python-version: 3.x 17 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 18 | - uses: actions/cache@v3 19 | with: 20 | key: mkdocs-material-${{ env.cache_id }} 21 | path: .cache 22 | restore-keys: | 23 | mkdocs-material- 24 | - run: pip install mkdocs-material 25 | - run: pip install mkdocs-glightbox 26 | - run: pip install mkdocstrings-python 27 | - run: pip install mkdocs-jupyter 28 | - run: mkdocs gh-deploy --force 29 | -------------------------------------------------------------------------------- /docs/gallery/0-node_edge-tgb.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | ## TEA plots 4 | 5 | ### tgbl-wiki 6 | ![image](node_edge/tgbl-wiki_node&edge_per_ts.png){ width="400"} 7 | 8 | ### tgbl-review 9 | ![image](node_edge/tgbl-review_node&edge_per_ts.png){ width="400"} 10 | 11 | ### tgbl-coin 12 | ![image](node_edge/tgbl-coin_node&edge_per_ts.png){ width="400"} 13 | 14 | 15 | ### tgbl-comment 16 | ![image](node_edge/tgbl-comment_node&edge_per_ts.png){ width="400"} 17 | 18 | ### tgbl-flight 19 | ![image](node_edge/tgbl-flight_node&edge_per_ts.png){ width="400"} 20 | 21 | ### tgbn-trade 22 | ![image](node_edge/tgbn-trade_node&edge_per_ts.png){ width="400"} 23 | 24 | ### tgbn-genre 25 | ![image](node_edge/tgbn-genre_node&edge_per_ts.png){ width="400"} 26 | 27 | ### tgbn-reddit 28 | ![image](node_edge/tgbn-reddit_node&edge_per_ts.png){ width="400"} 29 | 30 | 31 | -------------------------------------------------------------------------------- /tgx/io/write.py: -------------------------------------------------------------------------------- 1 | # # a = {1:[(1,3), (4,5), (5,6)], 2 | # # 2:[(2,5)]} 3 | # # print(a.items()) 4 | 5 | 6 | # Details = {"Destination": "China", 7 | # "Nationality": "Italian", "Age": []} 8 | 9 | # print("Original:", Details) 10 | 11 | # # appending the list 12 | # Details["Age"] += [20, "Twenty"] 13 | # print("Modified:", Details) 14 | 15 | 16 | # a1 = [(1,2,3), (1,2,3), (2,3,4)] 17 | # d={} 18 | # lis = [] 19 | # t = 1 20 | # for i in a1: 21 | # q1=i[0] 22 | # q2=i[1] 23 | # q3=i[2] 24 | # if q1 not in d: 25 | # d[q1] = [] 26 | # print(d) 27 | # d[q1].append((q2,q3)) 28 | # if q1 != t: 29 | # d[t] = lis 30 | # lis=[] 31 | # t = q1 32 | # lis.append((q2,q3)) 33 | # d[t] = lis 34 | # print(d) 35 | 36 | 37 | # for i, l in a.items(): 38 | # for s in l: 39 | # print (i, s[0], s[1]) 40 | 41 | def write_csv(): 42 | pass -------------------------------------------------------------------------------- /full_requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.3 2 | aiosignal==1.3.1 3 | args==0.1.0 4 | async-timeout==4.0.3 5 | attrs==23.2.0 6 | certifi==2024.2.2 7 | charset-normalizer==3.3.2 8 | clint==0.5.1 9 | contourpy==1.2.0 10 | cycler==0.12.1 11 | fonttools==4.49.0 12 | frozenlist==1.4.1 13 | fsspec==2024.2.0 14 | idna==3.6 15 | Jinja2==3.1.3 16 | joblib==1.3.2 17 | kiwisolver==1.4.5 18 | MarkupSafe==2.1.5 19 | matplotlib==3.8.3 20 | multidict==6.0.5 21 | networkx==3.2.1 22 | numpy==1.26.4 23 | packaging==23.2 24 | pandas==1.5.3 25 | pillow==10.2.0 26 | psutil==5.9.8 27 | py-tgb==0.9.2 28 | -e git+ssh://git@github.com/ComplexData-MILA/TGX.git@c872f31f3f062fcb837d8555081fc104064976cf#egg=py_tgx 29 | pyparsing==3.1.1 30 | python-dateutil==2.8.2 31 | pytz==2024.1 32 | requests==2.31.0 33 | scikit-learn==1.4.1.post1 34 | scipy==1.12.0 35 | seaborn==0.13.2 36 | six==1.16.0 37 | threadpoolctl==3.3.0 38 | torch_geometric==2.5.0 39 | tqdm==4.66.2 40 | urllib3==2.2.1 41 | yarl==1.9.4 42 | -------------------------------------------------------------------------------- /docs/gallery/0-degree-builtin.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | ## TEA plots 4 | 5 | 6 | ### Reddit 7 | ![image](degree/reddit_ave_degree_per_ts.png){ width="400"} 8 | 9 | ### MOOC 10 | ![image](degree/mooc_ave_degree_per_ts.png){ width="400"} 11 | 12 | ### LastFM 13 | ![image](degree/lastfm_ave_degree_per_ts.png){ width="400"} 14 | 15 | ### Enron 16 | ![image](degree/enron_ave_degree_per_ts.png){ width="400"} 17 | 18 | ### Social Evo 19 | ![image](degree/SocialEvo_ave_degree_per_ts.png){ width="400"} 20 | 21 | 22 | ### UCI 23 | ![UCI](degree/uci_ave_degree_per_ts.png){ width="400"} 24 | 25 | ### Flights 26 | ![image](degree/Flights_ave_degree_per_ts.png){ width="400"} 27 | 28 | ### Can. Parl. 29 | ![Canadian Parliment](degree/CanParl_ave_degree_per_ts.png){ width="400"} 30 | 31 | ### US Legis 32 | ![image](degree/USLegis_ave_degree_per_ts.png){ width="400"} 33 | 34 | ### UN Vote 35 | ![image](degree/UNvote_ave_degree_per_ts.png){ width="400"} 36 | 37 | ### Contacts 38 | ![image](degree/Contacts_ave_degree_per_ts.png){ width="400"} 39 | -------------------------------------------------------------------------------- /docs/gallery/0-node_edge-builtin.md: -------------------------------------------------------------------------------- 1 | # Plot by type 2 | 3 | ## TEA plots 4 | 5 | 6 | ### Reddit 7 | ![image](node_edge/reddit_node&edge_per_ts.png){ width="400"} 8 | 9 | ### MOOC 10 | ![image](node_edge/mooc_node&edge_per_ts.png){ width="400"} 11 | 12 | ### LastFM 13 | ![image](node_edge/lastfm_node&edge_per_ts.png){ width="400"} 14 | 15 | ### Enron 16 | ![image](node_edge/enron_node&edge_per_ts.png){ width="400"} 17 | 18 | ### Social Evo 19 | ![image](node_edge/SocialEvo_node&edge_per_ts.png){ width="400"} 20 | 21 | 22 | ### UCI 23 | ![UCI](node_edge/uci_node&edge_per_ts.png){ width="400"} 24 | 25 | ### Flights 26 | ![image](node_edge/Flights_node&edge_per_ts.png){ width="400"} 27 | 28 | ### Can. Parl. 29 | ![Canadian Parliment](node_edge/CanParl_node&edge_per_ts.png){ width="400"} 30 | 31 | ### US Legis 32 | ![image](node_edge/USLegis_node&edge_per_ts.png){ width="400"} 33 | 34 | ### UN Vote 35 | ![image](node_edge/UNvote_node&edge_per_ts.png){ width="400"} 36 | 37 | ### Contacts 38 | ![image](node_edge/Contacts_node&edge_per_ts.png){ width="400"} 39 | 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ![TGX logo](docs/2023_TGX_logo.png) 3 | 4 | # Temporal Graph Analysis with TGX 5 |

6 | 7 | 8 | 9 |

10 | 11 | This repository contains the code for the paper "Temporal Graph Analysis with TGX" (WSDM 2024, Demo Track). 12 | 13 | TGX overview: 14 | - TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 15 | - TGX provides numerous temporal graph visualization plots and statistics out of the box. 16 | 17 | 18 | ## Dependecies 19 | TGX implementation works with `python >= 3.9` and can be installed as follows. 20 | 21 | 1. Set up virtual environment (conda should work as well). 22 | ``` 23 | python -m venv tgx_env/ 24 | source tgx_env/bin/activate 25 | ``` 26 | 27 | 2. Upgrade pip (Optional) 28 | ``` 29 | pip install --upgrade pip 30 | ``` 31 | 32 | 3. Install external packages 33 | ``` 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | 4. Install local dependencies under root directory `/TGX`. 38 | ``` 39 | pip install -e . 40 | ``` 41 | 42 | 5. [Aternative] Install TGX from [`PyPi`](https://pypi.org/project/py-tgx/): 43 | 44 | ``` 45 | pip install py-tgx 46 | ``` 47 | 48 | 6. [optional] Install `mkdocs` dependencies to serve the documentation locally. 49 | ``` 50 | pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils 51 | ``` 52 | 53 | 54 | For tutorials on how to use TGX to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb) 55 | 56 | 57 | ### Citation 58 | If TGX is useful for your work, please consider citing it: 59 | ```bibtex 60 | @article{shirzadkhani2024temporal, 61 | title={Temporal Graph Analysis with TGX}, 62 | author={Shirzadkhani, Razieh and Huang, Shenyang and Kooshafar, Elahe and Rabbany, Reihaneh and Poursafaei, Farimah}, 63 | journal={arXiv preprint arXiv:2402.03651}, 64 | year={2024} 65 | } 66 | ``` 67 | -------------------------------------------------------------------------------- /examples/data_viz.py: -------------------------------------------------------------------------------- 1 | import tgx 2 | from tgx.utils.plotting_utils import plot_for_snapshots 3 | from tgx.utils.graph_utils import subsampling 4 | 5 | """ 6 | A master example to show all visualization in TGX 7 | """ 8 | 9 | # === load built in datasets === 10 | dataset = tgx.builtin.uci() 11 | 12 | # === load the tgb datasets === 13 | # data_name = "tgbl-wiki" #"tgbl-review" 14 | # dataset = tgx.tgb_data(data_name) #tgb datasets 15 | 16 | # initialize a Graph object from the loaded dataset 17 | # & discretize its timestamps... 18 | ctdg = tgx.Graph(dataset) 19 | time_scale = "weekly" # other choices: "daily", "hourly", ... 20 | dtdg = ctdg.discretize(time_scale=time_scale)[0] 21 | 22 | # === example for subsampling 23 | sub_edges = subsampling(ctdg, selection_strategy="random", N=1000) 24 | subgraph = tgx.Graph(edgelist=sub_edges) 25 | 26 | 27 | # === plot the statistics 28 | tgx.degree_over_time(dtdg, network_name=dataset.name) 29 | tgx.nodes_over_time(dtdg, network_name=dataset.name) 30 | tgx.edges_over_time(dtdg, network_name=dataset.name) 31 | tgx.nodes_and_edges_over_time(dtdg, network_name=dataset.name) 32 | 33 | # Number of Connected Components 34 | tgx.connected_components_per_ts(dtdg, network_name=dataset.name) 35 | 36 | # Degree Density 37 | tgx.degree_density(dtdg, k=3, network_name=dataset.name) 38 | 39 | tgx.TET(dtdg, 40 | network_name=dataset.name) 41 | 42 | # tgx.TET(dtdg, 43 | # network_name=dataset.name, 44 | # figsize = (9, 5), 45 | # axis_title_font_size = 24, 46 | # ticks_font_size = 24) 47 | 48 | # tgx.TEA(dtdg, 49 | # network_name=dataset.name) 50 | 51 | 52 | 53 | # === compute statistics 54 | test_ratio = 0.15 55 | tgx.get_reoccurrence(ctdg, test_ratio=test_ratio) 56 | tgx.get_surprise(ctdg, test_ratio=test_ratio) 57 | tgx.get_novelty(dtdg) 58 | tgx.get_avg_node_activity(dtdg) 59 | 60 | # Size of Largest Connected Component 61 | component_sizes = tgx.size_connected_components(dtdg) 62 | largest_component_sizes = [max(inner_list) if inner_list else 0 for inner_list in component_sizes] 63 | filename = f"{dataset.name}_largest_connected_component_size" 64 | plot_for_snapshots(largest_component_sizes, y_title="Size of Largest Connected Component", filename="./"+filename) 65 | 66 | # Average Node Engagement 67 | engagements = tgx.get_avg_node_engagement(dtdg) 68 | filename = f"{dataset.name}_average_node_engagement" 69 | plot_for_snapshots(engagements, y_title="Average Engagement", filename="./"+filename) 70 | 71 | -------------------------------------------------------------------------------- /tgx/utils/.ipynb_checkpoints/edgelist-checkpoint.py: -------------------------------------------------------------------------------- 1 | 2 | def edgelist_discritizer(edgelist, 3 | unique_ts, 4 | time_interval = None, 5 | max_intervals = 200): 6 | 7 | total_time = unique_ts[-1] - unique_ts[0] 8 | if time_interval is not None: 9 | if isinstance(time_interval, str): 10 | if time_interval == "daily": 11 | interval_size = 86400 12 | elif time_interval == "weekly": 13 | interval_size = 86400 * 7 14 | elif time_interval == "monthly": 15 | interval_size = 86400 * 30 16 | elif time_interval == "yearly": 17 | interval_size = 86400* 365 18 | if int(total_time / interval_size) > max_intervals: 19 | user_input = input("Too many timestamps, discretizing data to 200 timestamps, do you want to proceed?(y/n): ") 20 | if user_input.lower() == 'n': 21 | print('Cannot proceed to TEA and TET plot') 22 | exit() 23 | else: 24 | interval_size = max_intervals 25 | elif isinstance(time_interval, int): 26 | if time_interval > max_intervals: 27 | raise ValueError(f"The maximum number of time intervals is {max_intervals}.") 28 | else: 29 | interval_size = int(total_time / (time_interval)) 30 | 31 | else: 32 | raise TypeError("Invalid time interval") 33 | else: 34 | user_input = input(f"discretizing data to {max_intervals} timestamps, do you want to proceed?(y/n): ") 35 | if user_input.lower() == 'n': 36 | print('Cannot proceed to TEA and TET plot') 37 | exit() 38 | else: 39 | interval_size = int(total_time / 100) 40 | num_intervals = int(total_time/interval_size) 41 | print(f'Discretizing data to {num_intervals} timestamps...') 42 | if num_intervals == 0: 43 | print("Warning! Only one timestamp exist in the data.") 44 | updated_edgelist = {} 45 | new_ts = {} 46 | curr_t = 0 47 | for ts, edge_data in edgelist.items(): 48 | bin_ts = int(ts / interval_size) 49 | if bin_ts >= num_intervals: 50 | bin_ts -= 1 51 | 52 | if bin_ts not in new_ts: 53 | new_ts[bin_ts] = curr_t 54 | curr_t += 1 55 | 56 | if new_ts[bin_ts] not in updated_edgelist: 57 | updated_edgelist[new_ts[bin_ts]] = {} 58 | 59 | for (u,v), n in edge_data.items(): 60 | if (u, v) not in updated_edgelist[new_ts[bin_ts]]: 61 | updated_edgelist[new_ts[bin_ts]][(u, v)] = n 62 | else: 63 | updated_edgelist[new_ts[bin_ts]][(u, v)] += n 64 | return updated_edgelist -------------------------------------------------------------------------------- /examples/starting_example.py: -------------------------------------------------------------------------------- 1 | import tgx 2 | from tgx.utils.plotting_utils import plot_for_snapshots 3 | import argparse 4 | import sys 5 | 6 | def get_args(): 7 | parser = argparse.ArgumentParser('*** discretizing time steps of a TG dataset ***') 8 | parser.add_argument('-d', '--data', type=str, help='dataset name', default='tgbl-wiki') 9 | parser.add_argument('-t', '--time', type=str, help='time granularity', default='daily') 10 | 11 | try: 12 | args = parser.parse_args() 13 | except: 14 | parser.print_help() 15 | sys.exit(0) 16 | return args, sys.argv 17 | 18 | args, _ = get_args() 19 | 20 | 21 | # === load the datasets from tgb or built-in === 22 | 23 | # load a built-in dataset 24 | # dataset = tgx.builtin.uci() 25 | 26 | # load a TGB dataset 27 | data_name = args.data # args.name can be supported TGB datasets, such as: "tgbl-coin", "tgbl-review", "tgbl-wiki" 28 | dataset = tgx.tgb_data(data_name) 29 | 30 | # make a Graph object from loaded dataset 31 | ctdg = tgx.Graph(dataset) 32 | # ctdg.save2csv("ctdg") #! save the graph to csv files 33 | 34 | time_scale = args.time #choices are: "minutely", "monthly", "weekly", "daily", "hourly" 35 | dtdg = ctdg.discretize(time_scale=time_scale)[0] 36 | print(f"INFO: Discretize {data_name} to `{time_scale}`") 37 | 38 | 39 | 40 | # === plotting the statistics === 41 | tgx.degree_over_time(dtdg, network_name=dataset.name) 42 | tgx.nodes_over_time(dtdg, network_name=dataset.name) 43 | tgx.edges_over_time(dtdg, network_name=dataset.name) 44 | tgx.nodes_and_edges_over_time(dtdg, network_name=dataset.name) 45 | 46 | tgx.TET(dtdg, 47 | network_name=dataset.name, 48 | figsize = (9, 5), 49 | axis_title_font_size = 24, 50 | ticks_font_size = 24) 51 | 52 | 53 | tgx.TEA(dtdg, 54 | network_name=dataset.name) 55 | 56 | 57 | 58 | # === compute statistics === 59 | test_ratio = 0.15 60 | tgx.get_reoccurrence(ctdg, test_ratio=test_ratio) 61 | tgx.get_surprise(ctdg, test_ratio=test_ratio) 62 | tgx.get_novelty(dtdg) 63 | 64 | 65 | # Number of Connected Components 66 | tgx.connected_components_per_ts(dtdg, network_name=dataset.name) 67 | 68 | # Degree Density 69 | tgx.degree_density(dtdg, k=3, network_name=dataset.name) 70 | 71 | # Size of Largest Connected Component 72 | component_sizes = tgx.size_connected_components(dtdg) 73 | largest_component_sizes = [max(inner_list) if inner_list else 0 for inner_list in component_sizes] 74 | filename = f"{dataset.name}_largest_connected_component_size" 75 | plot_for_snapshots(largest_component_sizes, y_title="Size of Largest Connected Component", filename="./"+filename) 76 | 77 | # Average Node Engagement 78 | engagements = tgx.get_avg_node_engagement(dtdg) 79 | filename = f"{dataset.name}_average_node_engagement" 80 | plot_for_snapshots(engagements, y_title="Average Engagement", filename="./"+filename) 81 | 82 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | !requirements*.txt 2 | !full_requirements*.txt 3 | #dataset 4 | *.png 5 | *.pdf 6 | *.cpython-39.pyc 7 | *.pyc 8 | *.xz 9 | *.parquet 10 | *.gz 11 | *.tar 12 | *.pdf 13 | *.csv 14 | *.zip 15 | *.json 16 | *.npy 17 | *.pt 18 | *.out 19 | *.pkl 20 | *.txt 21 | *.csv 22 | *.npz 23 | __pycache__/ 24 | */.ipynb_checkpoints/ 25 | # Byte-compiled / optimized / DLL files 26 | 27 | raw/ 28 | books/ 29 | electronics/ 30 | software/ 31 | *.py[cod] 32 | *$py.class 33 | saved_models/ 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | .eggs/ 46 | lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | wheels/ 52 | pip-wheel-metadata/ 53 | share/python-wheels/ 54 | *.egg-info/ 55 | .installed.cfg 56 | *.egg 57 | MANIFEST 58 | 59 | # PyInstaller 60 | # Usually these files are written by a python script from a template 61 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 62 | *.manifest 63 | *.spec 64 | 65 | # Installer logs 66 | pip-log.txt 67 | pip-delete-this-directory.txt 68 | 69 | # Unit test / coverage reports 70 | htmlcov/ 71 | .tox/ 72 | .nox/ 73 | .coverage 74 | .coverage.* 75 | .cache 76 | nosetests.xml 77 | coverage.xml 78 | *.cover 79 | *.py,cover 80 | .hypothesis/ 81 | .pytest_cache/ 82 | 83 | # Translations 84 | *.mo 85 | *.pot 86 | 87 | # Django stuff: 88 | *.log 89 | local_settings.py 90 | db.sqlite3 91 | db.sqlite3-journal 92 | 93 | # Flask stuff: 94 | instance/ 95 | .webassets-cache 96 | 97 | # Scrapy stuff: 98 | .scrapy 99 | 100 | # Sphinx documentation 101 | docs/_build/ 102 | 103 | # PyBuilder 104 | target/ 105 | 106 | # Jupyter Notebook 107 | .ipynb_checkpoints 108 | 109 | # IPython 110 | profile_default/ 111 | ipython_config.py 112 | 113 | # pyenv 114 | .python-version 115 | 116 | # pipenv 117 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 118 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 119 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 120 | # install all needed dependencies. 121 | #Pipfile.lock 122 | 123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 124 | __pypackages__/ 125 | __pycache__/ 126 | 127 | # Celery stuff 128 | celerybeat-schedule 129 | celerybeat.pid 130 | 131 | # SageMath parsed files 132 | *.sage.py 133 | 134 | # Environments 135 | .env 136 | .venv 137 | env/ 138 | venv/ 139 | ENV/ 140 | ENV_tgx/ 141 | env.bak/ 142 | venv.bak/ 143 | cc_env.sh 144 | .github/ 145 | 146 | # Spyder project settings 147 | .spyderproject 148 | .spyproject 149 | 150 | # Rope project settings 151 | .ropeproject 152 | 153 | # mkdocs documentation 154 | /site 155 | 156 | # mypy 157 | .mypy_cache/ 158 | .dmypy.json 159 | dmypy.json 160 | 161 | # Pyre type checker 162 | .pyre/ 163 | 164 | # PyCharm 165 | .idea -------------------------------------------------------------------------------- /tgx/utils/.ipynb_checkpoints/graph_stat-checkpoint.py: -------------------------------------------------------------------------------- 1 | from tgx.utils.plotting_utils import plot_for_snapshots, plot_nodes_edges_per_ts 2 | 3 | __all__ = ["average_degree_per_ts", 4 | "nodes_per_ts", 5 | "edges_per_ts", 6 | "nodes_and_edges_per_ts"] 7 | 8 | 9 | def average_degree_per_ts(graph: list, 10 | total_nodes: int, 11 | network_name: str, 12 | plot_path: str = None) -> None: 13 | ''' 14 | input: a list containing graph snapshots 15 | ''' 16 | print("Plotting average degree per timestamp") 17 | ave_degree = _calculate_average_degree_per_ts(graph, total_nodes) 18 | filename = f"{network_name}_ave_degree_per_ts" 19 | plot_for_snapshots(ave_degree, filename, "Average degree", plot_path = plot_path) 20 | print("Plotting Done!") 21 | return 22 | 23 | 24 | def nodes_per_ts(graph: list, 25 | network_name: str, 26 | plot_path: str = None) -> None: 27 | ''' 28 | input: a list containing graph snapshots 29 | ''' 30 | print("Plotting number of nodes per timestamp") 31 | active_nodes = _calculate_node_per_ts(graph) 32 | filename = f"{network_name}_nodes_per_ts" 33 | plot_for_snapshots(active_nodes, filename, "Number of nodes", plot_path = plot_path) 34 | print("Plotting Done!") 35 | return 36 | 37 | def edges_per_ts(graph: list, 38 | plot_path: str, 39 | network_name: str) -> None: 40 | ''' 41 | input: a list containing graph snapshots 42 | ''' 43 | print("Plotting number of edges per timestamp") 44 | active_edges = _calculate_edge_per_ts(graph) 45 | filename = f"{network_name}_edges_per_ts" 46 | plot_for_snapshots(active_edges, plot_path, filename, "Number of edges") 47 | print("Plotting Done!") 48 | return 49 | 50 | def nodes_and_edges_per_ts(graph: list, 51 | network_name: str, 52 | plot_path: str = None): 53 | 54 | edges = _calculate_edge_per_ts(graph) 55 | nodes = _calculate_node_per_ts(graph) 56 | ts = list(range(0, len(graph))) 57 | 58 | return plot_nodes_edges_per_ts(edges, nodes, ts, network_name, plot_path = plot_path) 59 | 60 | 61 | def _calculate_average_degree_per_ts(graph, total_nodes): 62 | total_ts = len(graph) 63 | ave_degree = [] 64 | for t1 in range(total_ts): 65 | num_edges = graph[t1].number_of_edges() 66 | ave_degree.append(num_edges*2/ total_nodes) 67 | return ave_degree 68 | 69 | 70 | def _calculate_node_per_ts(graph): 71 | active_nodes = [] 72 | for ts in range(len(graph)): 73 | active_nodes.append(graph[ts].number_of_nodes()) 74 | return active_nodes 75 | 76 | def _calculate_edge_per_ts(graph): 77 | active_edges = [] 78 | for ts in range(len(graph)): 79 | active_edges.append(graph[ts].number_of_edges()) 80 | return active_edges 81 | 82 | 83 | -------------------------------------------------------------------------------- /docs/gallery/dataset.md: -------------------------------------------------------------------------------- 1 | # Plot by dataset 2 | 3 | 4 | ## Reddit 5 | #### TEA Plot 6 | ![image](TEA/Reddit.png){ width="400"} 7 | 8 | 9 | #### TET Plot 10 | ![image](TET/Reddit.png){ width="400"} 11 | 12 | 13 | ## MOOC 14 | #### TEA Plot 15 | ![image](TEA/MOOC.png){ width="400"} 16 | #### TET Plot 17 | ![image](TET/MOOC.png){ width="400"} 18 | 19 | ## LastFM 20 | #### TEA Plot 21 | ![image](TEA/LastFM.png){ width="400"} 22 | #### TET Plot 23 | ![image](TEA/LastFM.png){ width="400"} 24 | 25 | ## Enron 26 | #### TEA Plot 27 | ![image](TEA/Enron.png){ width="400"} 28 | ### TET Plot 29 | ![image](TET/Enron.png){ width="400"} 30 | 31 | ## Social Evo 32 | #### TEA Plot 33 | ![image](TEA/SocialEvo.png){ width="400"} 34 | #### TET Plot 35 | ![image](TET/SocialEvo.png){ width="400"} 36 | 37 | ## UCI 38 | #### TEA Plot 39 | ![UCI](TEA/UCI.png){ width="400"} 40 | #### TET Plot 41 | ![UCI](TET/UCI.png){ width="400"} 42 | 43 | 44 | ## Flights 45 | #### TEA Plot 46 | ![image](TEA/Flights.png){ width="400"} 47 | #### TET Plot 48 | ![image](TET/Flights.png){ width="400"} 49 | 50 | 51 | ## Can. Parl. 52 | #### TEA Plot 53 | ![Canadian Parliment](TEA/CanParl.png){ width="400"} 54 | #### TET Plot 55 | ![Canadian Parliment](TET/CanParl.png){ width="400"} 56 | 57 | ## US Legis 58 | #### TEA Plot 59 | ![image](TEA/USLegis.png){ width="400"} 60 | #### TET Plot 61 | ![image](TET/USLegis.png){ width="400"} 62 | 63 | ## UN Vote 64 | #### TEA Plot 65 | ![image](TEA/UNVote.png){ width="400"} 66 | #### TET Plot 67 | ![image](TET/UNVote.png){ width="400"} 68 | 69 | ## Contacts 70 | #### TEA Plot 71 | ![image](TEA/Contacts.png){ width="400"} 72 | #### TET Plot 73 | ![image](TET/Contacts.png){ width="400"} 74 | 75 | ## tgbl-wiki 76 | #### TEA Plot 77 | ![image](TEA/tgbl-wiki.png){ width="400"} 78 | #### TET Plot 79 | ![image](TET/tgbl-wiki.png){ width="400"} 80 | 81 | ## tgbl-review 82 | #### TEA Plot 83 | ![image](TEA/tgbl-review.png){ width="400"} 84 | #### TET Plot 85 | ![image](TET/tgbl-review.png){ width="400"} 86 | 87 | ## tgbl-coin 88 | #### TEA Plot 89 | ![image](TEA/tgbl-coin.png){ width="400"} 90 | #### TET Plot 91 | ![image](TET/tgbl-coin.png){ width="400"} 92 | 93 | ## tgbl-comment 94 | #### TEA Plot 95 | ![image](TEA/tgbl-comment.png){ width="400"} 96 | #### TET Plot 97 | ![image](TET/tgbl-comment.png){ width="400"} 98 | 99 | ## tgbl-flight 100 | #### TEA Plot 101 | ![image](TEA/tgbl-flight.png){ width="400"} 102 | #### TET Plot 103 | ![image](TET/tgbl-flight.png){ width="400"} 104 | 105 | ## tgbn-trade 106 | #### TEA Plot 107 | ![image](TEA/tgbn-trade.png){ width="400"} 108 | #### TET Plot 109 | ![image](TET/tgbn-trade.png){ width="400"} 110 | 111 | ## tgbn-genre 112 | #### TEA Plot 113 | ![image](TEA/tgbn-genre.png){ width="400"} 114 | #### TET Plot 115 | ![image](TET/tgbn-genre.png){ width="400"} 116 | 117 | ## tgbn-reddit 118 | #### TEA Plot 119 | ![image](TEA/tgbn-reddit.png){ width="400"} 120 | #### TET Plot 121 | ![image](TET/tgbn-reddit.png){ width="400"} 122 | -------------------------------------------------------------------------------- /py_tgx.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: py-tgx 3 | Version: 0.4.0 4 | Summary: Temporal Graph Visualization with TGX 5 | Home-page: https://github.com/ComplexData-MILA/TGX 6 | Author: ["Razieh Shirzadkhani ", "shenyang Huang ", "Elahe Kooshafar", "Farimah Poursafaei"] 7 | Author-email: razieh.shirzadkhani@gmail.com 8 | License: MIT 9 | Project-URL: Bug Tracker, https://github.com/fpour/TGX/issues 10 | Keywords: Temporal Graph Visualization 11 | Classifier: Programming Language :: Python :: 3.9 12 | Requires-Python: >=3.6 13 | Description-Content-Type: text/markdown 14 | License-File: LICENSE 15 | 16 | 17 | ![TGX logo](docs/2023_TGX_logo.png) 18 | 19 | # Temporal Graph Analysis with TGX 20 |

21 | 22 | 23 | 24 |

25 | 26 | This repository contains the code for the paper "Temporal Graph Analysis with TGX" (WSDM 2024, Demo Track). 27 | 28 | TGX overview: 29 | - TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 30 | - TGX provides numerous temporal graph visualization plots and statistics out of the box. 31 | 32 | 33 | ## Dependecies 34 | TGX implementation works with `python >= 3.9` and can be installed as follows. 35 | 36 | 1. Set up virtual environment (conda should work as well). 37 | ``` 38 | python -m venv tgx_env/ 39 | source tgx_env/bin/activate 40 | ``` 41 | 42 | 2. Upgrade pip (Optional) 43 | ``` 44 | pip install --upgrade pip 45 | ``` 46 | 47 | 3. Install external packages 48 | ``` 49 | pip install -r requirements.txt 50 | ``` 51 | 52 | 4. Install local dependencies under root directory `/TGX`. 53 | ``` 54 | pip install -e . 55 | ``` 56 | 57 | 5. [Aternative] Install TGX from [`PyPi`](https://pypi.org/project/py-tgx/): 58 | 59 | ``` 60 | pip install py-tgx 61 | ``` 62 | 63 | 6. [optional] Install `mkdocs` dependencies to serve the documentation locally. 64 | ``` 65 | pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils 66 | ``` 67 | 68 | 69 | For tutorials on how to use TGX to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb) 70 | 71 | 72 | ### Citation 73 | If TGX is useful for your work, please consider citing it: 74 | ```bibtex 75 | @article{shirzadkhani2024temporal, 76 | title={Temporal Graph Analysis with TGX}, 77 | author={Shirzadkhani, Razieh and Huang, Shenyang and Kooshafar, Elahe and Rabbany, Reihaneh and Poursafaei, Farimah}, 78 | journal={arXiv preprint arXiv:2402.03651}, 79 | year={2024} 80 | } 81 | ``` 82 | -------------------------------------------------------------------------------- /tgx/classes/.ipynb_checkpoints/graph-checkpoint.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from typing import Optional 3 | 4 | 5 | class Graph(): 6 | def __init__(self, 7 | edgelist: Optional[dict] = None, 8 | discretized: Optional[bool] = True): 9 | """ 10 | Create a Graph object with specific characteristics 11 | Args: 12 | edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}} 13 | discretized: whether the given edgelist was discretized or not 14 | """ 15 | 16 | self.edgelist = edgelist 17 | self.subsampled_graph = None 18 | if discretized: 19 | self.discrite_graph = self._generate_graph() 20 | self.discrite_edgelist = edgelist 21 | else: 22 | self.continuous_edgelist = edgelist 23 | 24 | 25 | def number_of_nodes(self, edgelist: dict = None) -> int: 26 | """ 27 | Calculate total number of nodes present in an edgelist 28 | """ 29 | if self.edgelist is None: 30 | return [] 31 | elif edgelist is None: 32 | edgelist = self.edgelist 33 | node_list = {} 34 | for _, edge_data in edgelist.items(): 35 | for (u,v), _ in edge_data.items(): 36 | if u not in node_list: 37 | node_list[u] = 1 38 | if v not in node_list: 39 | node_list[v] = 1 40 | return len(node_list.keys()) 41 | 42 | def nodes(self) -> list: 43 | """ 44 | Return a list of nodes present in an edgelist 45 | """ 46 | node_list = {} 47 | for _, edge_data in self.edgelist.items(): 48 | for (u,v), _ in edge_data.items(): 49 | if u not in node_list: 50 | node_list[u] = 1 51 | if v not in node_list: 52 | node_list[v] = 1 53 | 54 | self.node_list = list(node_list.keys()) 55 | return list(node_list.keys()) 56 | 57 | def _generate_graph(self, 58 | edgelist: Optional[dict] = None 59 | ) -> list: 60 | ''' 61 | Generate a list of graph snapshots. Each snapshot is a 62 | Networkx graph object. 63 | Parameters: 64 | edgelist: a dictionary containing in the form of {t: {(u, v), freq}} 65 | Returns: 66 | G_times: a list of networkx graphs 67 | ''' 68 | if self.edgelist is None: 69 | return [] 70 | elif edgelist is None: 71 | edgelist = self.edgelist 72 | G_times = [] 73 | G = nx.Graph() 74 | cur_t = 0 75 | for ts, edge_data in edgelist.items(): 76 | for (u,v), n in edge_data.items(): 77 | if (ts != cur_t): 78 | G_times.append(G) 79 | G = nx.Graph() 80 | cur_t = ts 81 | G.add_edge(u, v, freq=n) 82 | G_times.append(G) 83 | return G_times 84 | 85 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | 2 | ![TGX logo](2023_TGX_logo.png) 3 | 4 | # Temporal Graph Analysis with TGX (WSDM 2024 Demo Track) 5 |

6 | 7 | 8 | 9 |

10 | 11 | TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 12 | TGX provides numerous temporal graph visualization plots and statistics out of the box. 13 | 14 | 15 | ### Data Loading ### 16 | For detailed tutorial on how to load the datasets into `tgx.Graph`, see [`docs/tutorials/data_loader.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_loader.ipynb) 17 | 18 | 1. Load TGB datasets 19 | ``` 20 | import tgx 21 | dataset = tgx.tgb_data("tgbl-wiki") 22 | ctdg = tgx.Graph(dataset) 23 | ``` 24 | 25 | 2. Load built-in datasets 26 | ``` 27 | dataset = tgx.builtin.uci() 28 | ctdg = tgx.Graph(dataset) 29 | ``` 30 | 31 | 3. Load custom datasets from `.csv` 32 | ``` 33 | from tgx.io.read import read_csv 34 | toy_fname = "docs/tutorials/toy_data.csv" 35 | edgelist = read_csv(toy_fname, header=True,index=False, t_col=0,) 36 | tgx.Graph(edgelist=edgelist) 37 | ``` 38 | 39 | ### Visualization and Statistics ### 40 | For detailed tutorial on how to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb) 41 | 42 | 1. Discretize the network (required for viz) 43 | 44 | ``` 45 | dataset = tgx.builtin.uci() 46 | ctdg = tgx.Graph(dataset) 47 | time_scale = "weekly" 48 | dtdg, ts_list = ctdg.discretize(time_scale=time_scale, store_unix=True) 49 | ``` 50 | 51 | 2. Plot the number of nodes over time 52 | 53 | ``` 54 | tgx.degree_over_time(dtdg, network_name="uci") 55 | ``` 56 | 57 | 3. Compute novelty index 58 | ``` 59 | tgx.get_novelty(dtdg) 60 | ``` 61 | 62 | 63 | ### Install dependency 64 | Our implementation works with python >= 3.9 and can be installed as follows 65 | 66 | 1. set up virtual environment (conda should work as well) 67 | ``` 68 | python -m venv ~/tgx_env/ 69 | source ~/tgx_env/bin/activate 70 | ``` 71 | 72 | 2. install external packages 73 | ``` 74 | pip install -r requirements.txt 75 | ``` 76 | 77 | 3. install local dependencies under root directory `/TGX` 78 | 81 | ``` 82 | pip install -e . 83 | ``` 84 | 85 | 86 | 87 | 3. [alternatively] install from test-pypi 88 | 89 | ``` 90 | pip install -i https://test.pypi.org/simple/ py-tgx 91 | ``` 92 | You can specify the version with `==`, note that the pypi version might not always be the most updated version 93 | 94 | 95 | 4. [optional] install mkdocs dependencies to serve the documentation locally 96 | ``` 97 | pip install mkdocs-glightbox 98 | ``` 99 | 100 | ### Creating new branch ### 101 | 102 | first create the branch on github 103 | ``` 104 | git fetch origin 105 | 106 | git checkout -b test origin/test 107 | ``` 108 | -------------------------------------------------------------------------------- /examples/.ipynb_checkpoints/test-checkpoint.py: -------------------------------------------------------------------------------- 1 | import tgx 2 | import time 3 | import numpy as np 4 | # from tgx.utils.graph_utils import subsampling, train_test_split, edgelist_discritizer 5 | from tgx.utils.graph_stat import get_novelty, get_avg_node_activity, get_reoccurrence, get_surprise 6 | 7 | data_path = '/network/scratch/r/razieh.shirzadkhani/' 8 | # dataset = tgx.data.reddit(root=data_path) 9 | 10 | 11 | dataset = tgx.data.uci(root=data_path) 12 | data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 13 | # get_avg_node_activity(data) 14 | # print(dataset.name) 15 | # dataset = tgx.data.lastfm(root=data_path) 16 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 17 | # get_avg_node_activity(data) 18 | # print(dataset.name) 19 | # dataset = tgx.data.mooc(root=data_path) 20 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 21 | # get_avg_node_activity(data) 22 | # print(dataset.name) 23 | # dataset = tgx.data.canparl(root=data_path) 24 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 25 | # get_avg_node_activity(data) 26 | # print(dataset.name) 27 | # dataset = tgx.data.contacts(root=data_path) 28 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 29 | # get_avg_node_activity(data) 30 | # print(dataset.name) 31 | # dataset= tgx.data.enron(root=data_path) 32 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 33 | # get_avg_node_activity(data) 34 | # print(dataset.name) 35 | # dataset = tgx.data.flights(root=data_path) 36 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 37 | # get_avg_node_activity(data) 38 | # print(dataset.name) 39 | 40 | # dataset = tgx.data.unvote(root=data_path) 41 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 42 | # get_avg_node_activity(data) 43 | # print(dataset.name) 44 | # dataset = tgx.data.uslegis(root=data_path) 45 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 46 | # get_avg_node_activity(data) 47 | # print(dataset.name) 48 | # dataset = tgx.data.social_evo(root=data_path) 49 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals) 50 | # get_avg_node_activity(data) 51 | # print(dataset.name) 52 | # print(dataset.name) 53 | # data = tgx.read_edgelist(data=dataset, discretize=True, intervals=dataset.intervals) 54 | # get_novelty(data) 55 | 56 | # total_nodes = G.number_of_nodes() 57 | # plot_path = "./examples/plots/" 58 | # n_sampling = 1000 59 | # new_edges = subsampling(G, random_selection=True, N=n_sampling) 60 | # new_edges = edgelist_discritizer(new_edges, time_interval=50) 61 | # G.subsampled_graph = G._generate_graph(new_edges) 62 | # print(G.number_of_nodes(G.subsampled_graph)) 63 | # tgx.nodes_and_edges_per_ts(G.subsampled_graph, plot_path=plot_path, network_name=dataset.name) 64 | # tgx.average_degree_per_ts(G.subsampled_graph, n_sampling, plot_path=plot_path, network_name=dataset.name) 65 | 66 | 67 | # TEA_path = "./examples/plots/TEA/1" 68 | 69 | # tgx.TEA(data, filepath = TEA_path, network_name=dataset.name+'disc') 70 | # TET_path = "./examples/plots/TET/" 71 | # tgx.TET(data, filepath = TET_path, network_name=dataset.name) 72 | 73 | # print("--- Total elapsed time: %s seconds ---" % (time.time() - start_time)) 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/contribute.md: -------------------------------------------------------------------------------- 1 | 2 | ![TGX logo](2023_TGX_logo.png) 3 | 4 | # TGX Community Contribution Guidelines 5 | 6 | TGX is a community driven project and we hope to continue to add new features to it so that it is useful for a wide range of analysis and scenarios. This guide documents the best way to make various types of contribution to TGX, including what is required before submitting a code change. Note that as the package evolves, this guideline will be improved as well, so feel free to check back here for more information in the near future too. 7 | 8 | Contributing to TGX is more than submitting code changes, we also welcome new research suggestions, inviting new users, testing releases and improving the documentation. Raising issues on Github to point out any issues or directions of improvement are also welcome. 9 | 10 | 11 | ## Reporting issues on Github 12 | 13 | Creating issues on Github is a useful way to manage problems and identify priorities. When possible, please add appropriate tags to the issue you are creating. For example, if it is related to the documentation website or error in documentation, you can include the `documentation` tag. If it is related to package installation, you can add the `install` tag. Overall, combining multiple issues that are related into a single one to report is also helpful to avoid trackig too many issues. 14 | 15 | ### Bugs 16 | 17 | Bug reports are useful when they are accompanied by ways to understand and reproduce the bug. When reporting a bug, try to include detailed steps on how you encountered the bug and how to reproduce it. If you would like to propose a fix to the bug, feel free to link it to a pull request on the issue as well. 18 | 19 | ### Feedbacks and Improvements 20 | 21 | Feedbacks and improvements on TGX are welcome. If you would like to see new temporal graph statistics or visualization added, feel free to reach out directly by [email](mailto:shenyang.huang@mail.mcgill.ca) or create an issue on Github. You are of course more than welcome to add visualization and statistics from your own paper as well. 22 | 23 | ## Preparing code changes 24 | 25 | If you are interested in adding new features or fixing bugs in TGX, thanks for your help. This section walks you through on how to propose code changes to TGX. 26 | 27 | ### Deciding what to work on 28 | 29 | The first step is to decide on which aspects you want to improve for TGX. The best way is to look at currently [active issues](https://github.com/ComplexData-MILA/TGX/issues) on Github. You can find the future improvements we are planning in the [Roadmap](https://github.com/ComplexData-MILA/TGX/issues/43), from there you can find detailed instructions on what each task means. You are also very welcome to fix any bugs that you encounters or someone else encounters by proposing pull request on the github. Once you have decided on what you want to fix, reach out to us on TG slack (check the most recent link to join on the [TG website](https://www.cs.mcgill.ca/~shuang43/rg.html)) or by [email](mailto:shenyang.huang@mail.mcgill.ca) and let's work on it together. 30 | 31 | ### setting up dev environment 32 | 33 | First, identify the issue you want to solve and create a new branch linked to the issue. (see create a branch button on right side of the issue.) Install TGX to the latest version as instructed below. 34 | 35 | 1. Set up virtual environment (conda should work as well). 36 | 37 | ``` 38 | python -m venv tgx_env/ 39 | source tgx_env/bin/activate 40 | ``` 41 | 42 | 2. Upgrade pip (Optional) 43 | 44 | ``` 45 | pip install --upgrade pip 46 | ``` 47 | 48 | 3. Install external packages 49 | 50 | ``` 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | 4. Install local dependencies under root directory `/TGX`. 55 | 56 | ``` 57 | pip install -e . 58 | ``` 59 | 60 | 5. Install `mkdocs` dependencies to serve the documentation locally. 61 | 62 | ``` 63 | pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils 64 | ``` 65 | 66 | 6. Switch to the branch you created from the issue (swap out `test` with name of your branch) 67 | ``` 68 | git fetch origin 69 | 70 | git checkout -b test origin/test 71 | ``` 72 | 73 | ### Creating Pull Request 74 | 75 | please make sure you have tested your code before creating a pull request, also created documentation for any new functions that you have added. 76 | Once you created the pull request, you can reach out for a code review on slack or by email. -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Temporal Graph Analysis with TGX 2 | site_url: https://shenyanghuang.github.io/TGX 3 | nav: 4 | - Home: index.md 5 | - API: 6 | - ReadWrite: 7 | - Read Data: io/io.md 8 | - Utils: 9 | - Graph stats: utils/graph_stats.md 10 | - Graph Utils: utils/graph_utils.md 11 | - Plotting Utils: utils/plotting_utils.md 12 | - Visualization: 13 | - TEA Plot: viz/vis_tea.md 14 | - TET Plot: viz/vis_tet.md 15 | - Classes: 16 | - Graph: classes/graph.md 17 | - Data: 18 | - Builtin: data/builtin.md 19 | - TGB: data/tgb.md 20 | - Tutorials: 21 | - Load data: tutorials/data_loader.ipynb 22 | - Visualization and Statistics: tutorials/data_viz_stats.ipynb 23 | - Gallery: 24 | - Plots by type: 25 | - TEA Plots: 26 | - Builtin: gallery/0-tea-builtin.md 27 | - TGB: gallery/0-tea-tgb.md 28 | - TET Plots: 29 | - Builtin: gallery/0-tet-builtin.md 30 | - TGB: gallery/0-tet-tgb.md 31 | - Average degree over time: 32 | - Builtin: gallery/0-degree-builtin.md 33 | - TGB: gallery/0-degree-tgb.md 34 | - Node and Edge over time: 35 | - Builtin: gallery/0-node_edge-builtin.md 36 | - TGB: gallery/0-node_edge-tgb.md 37 | - Plots by dataset: 38 | - Builtin: 39 | - Reddit: gallery/reddit.md 40 | - MOOC: gallery/mooc.md 41 | - LastFM: gallery/lastfm.md 42 | - Enron: gallery/enron.md 43 | - Social Evo: gallery/socialevo.md 44 | - UCI: gallery/uci.md 45 | - Flights: gallery/flight.md 46 | - Can Parl: gallery/canparl.md 47 | - US Legis: gallery/uslegis.md 48 | - UN Vote: gallery/unvote.md 49 | - Contacts: gallery/contact.md 50 | - TGB: 51 | - tgbl-wiki: gallery/tgbl-wiki.md 52 | - tgbl-review: gallery/tgbl-review.md 53 | - tgbl-coin: gallery/tgbl-coin.md 54 | - tgbl-comment: gallery/tgbl-comment.md 55 | - tgbl-flight: gallery/tgbl-flight.md 56 | - tgbn-trade: gallery/tgbn-trade.md 57 | - tgbn-genre: gallery/tgbn-genre.md 58 | - tgbn-reddit: gallery/tgbn-reddit.md 59 | 60 | - Contribute: contribute.md 61 | - Github: 'https://github.com/ComplexData-MILA/TGX' 62 | 63 | 64 | theme: 65 | logo: tgx_logo_no_txt.png 66 | name: material 67 | features: 68 | - navigation.tabs 69 | - toc.integrate 70 | - navigation.footer 71 | - navigation.indexes 72 | - navigation.sections 73 | - navigation.top 74 | - navigation.tracking 75 | - navigation.tabs.sticky 76 | - navigation.expand 77 | - search.suggest 78 | - search.highlight 79 | - content.tabs.link 80 | - content.code.annotation 81 | - content.code.copy 82 | language: en 83 | palette: 84 | - scheme: default 85 | toggle: 86 | icon: material/toggle-switch-off-outline 87 | name: Switch to dark mode 88 | primary: navy 89 | accent: red 90 | - scheme: slate 91 | toggle: 92 | icon: material/toggle-switch 93 | name: Switch to light mode 94 | primary: blue 95 | accent: lime 96 | 97 | 98 | plugins: 99 | - search 100 | 101 | - glightbox: 102 | touchNavigation: true 103 | loop: false 104 | effect: zoom 105 | slide_effect: slide 106 | width: 100% 107 | height: auto 108 | zoomable: true 109 | draggable: true 110 | skip_classes: 111 | - custom-skip-class-name 112 | auto_caption: false 113 | caption_position: bottom 114 | - mkdocstrings: 115 | watch: 116 | - tgx/ 117 | handlers: 118 | python: 119 | setup_commands: 120 | - import sys 121 | - sys.path.append("docs") 122 | - sys.path.append("tgx") 123 | selection: 124 | new_path_syntax: true 125 | rendering: 126 | show_root_heading: false 127 | heading_level: 3 128 | show_root_full_path: false 129 | 130 | 131 | - mkdocs-jupyter: 132 | execute: false 133 | 134 | markdown_extensions: 135 | - pymdownx.arithmatex: 136 | generic: true 137 | - attr_list 138 | - md_in_html 139 | -------------------------------------------------------------------------------- /tgx/utils/.ipynb_checkpoints/plotting_utils-checkpoint.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def create_ts_list(start, end, metric=None, interval=None): 7 | if metric == "Unix" or metric == "unix" or metric == "UNIX": 8 | start = datetime.datetime.fromtimestamp(start).date() 9 | end = datetime.datetime.fromtimestamp(end).date() 10 | if interval == 'daily': 11 | date_list = pd.date_range(start = start, end = end, freq="D") 12 | elif interval == "month": 13 | date_list = pd.date_range(start = start, end = end, freq="M") 14 | elif interval == "year": 15 | date_list = pd.date_range(start = start, end = end, freq="Y") 16 | timelist = [] 17 | for dates in date_list: 18 | timelist.append(dates.strftime("%Y/%m/%d")) 19 | else: 20 | timelist = list(range(start, end, interval)) 21 | # print(timelist) 22 | return timelist 23 | 24 | 25 | 26 | def plot_nodes_edges_per_ts(edges: list, 27 | nodes: list, 28 | ts: list, 29 | network_name: str, 30 | plot_path: str = None, 31 | ylabel_1: str = 'Edges per Timestamp', 32 | ylabel_2: str = 'Nodes per Timestamp'): 33 | """ 34 | Plot nodes and edges per timestamp in one figure 35 | Parameters: 36 | edges: A list containing number of edges per timestamp 37 | nodes: A list containing number of nodes per timestamp 38 | ts: list of timestamps 39 | network_name: Name of the network to be used in the output file name 40 | plot_path: Path to save the output figure 41 | ylabel_1: Label for the edges per timestamp line 42 | ylabel_2: Label for the nodes per timestamp line 43 | """ 44 | fig = plt.figure(facecolor='w', figsize=(11, 6)) 45 | ax1 = fig.add_subplot(111) 46 | ax2 = ax1.twinx() 47 | 48 | c1, = ax1.plot(ts, edges, color='black', lw=3, label=ylabel_1) 49 | c2, = ax2.plot(ts, nodes, color='gray', linestyle='dashed', lw=3, label=ylabel_2) 50 | curves = [c1, c2] 51 | ax1.legend(curves, [curve.get_label() for curve in curves], fontsize = 18) 52 | ax1.set_xlabel('Time', fontsize=20) 53 | ax1.set_ylabel(ylabel_1, fontsize=20) 54 | ax2.set_ylabel(ylabel_2, fontsize=20) 55 | ax1.tick_params(labelsize=20) 56 | ax2.tick_params(labelsize=20) 57 | ax1.set_ylim(0) 58 | ax2.set_ylim(0) 59 | ax1.set_xlim(0, len(ts)-1) 60 | if plot_path is not None: 61 | filename = f"{network_name}_node&edge_per_ts" 62 | plt.savefig(f'{plot_path}/{filename}') 63 | plt.show() 64 | 65 | def plot_for_snapshots(data: list, 66 | filename: str, 67 | y_title: str, 68 | show_ave: bool=True, 69 | plot_path:str = None, 70 | plot_title:str = None): 71 | ''' 72 | Plot a variable for different timestamps 73 | Parameters: 74 | data: A list of desired variable to be plotted 75 | filename: Name of the output file name 76 | y_title: Title of the y axis 77 | show_ave: Whether to plot a line showing the average of the variable over all timestamps 78 | plot_path: The path to save the output file 79 | ''' 80 | ts = list(range(0, len(data))) 81 | # plt.rcParams["font.family"] = "Times New Roman" 82 | fig = plt.figure(facecolor='w', figsize=(9,6)) 83 | ax = fig.add_subplot(111) 84 | ax.plot(ts, data, color='black', lw=3) 85 | 86 | ax.set_xlabel('Time', fontsize=20) 87 | ax.set_ylabel(y_title, fontsize=20) 88 | ax.tick_params(labelsize=20) 89 | # ax.set_ylim(0, 7.5) 90 | ax.set_xlim(0, len(ts)-1) 91 | ax.set_title(plot_title, fontsize=20) 92 | if show_ave: 93 | ave_deg = [np.average(data) for i in range(len(ts))] 94 | ax.plot(ts, ave_deg, color='#ca0020', linestyle='dashed', lw=3) 95 | if plot_path is not None: 96 | plt.savefig(f'{plot_path}/{filename}') 97 | plt.show() 98 | 99 | if __name__ == "__main__": 100 | create_ts_list(86400, 86400*365, "unix", "month") 101 | create_ts_list(2015, 2022, interval=2) -------------------------------------------------------------------------------- /tgx/data/tgb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | Data_specifications = { 4 | 'tgbl-wiki' : {'discretize' : True, 'time_scale': 'daily'}, 5 | 'tgbl-review' : {'discretize' : True, 'time_scale': 'yearly'}, 6 | 'tgbl-coin' : {'discretize' : True, 'time_scale': 'weekly'}, 7 | 'tgbl-comment' : {'discretize' : True, 'time_scale': 'monthly'}, 8 | 'tgbl-flight' : {'discretize' : True, 'time_scale': 'monthly'}, 9 | 'tgbn-trade' : {'discretize' : False, 'time_scale': None}, 10 | 'tgbn-genre' : {'discretize' : True, 'time_scale': 'monthly'}, 11 | 'tgbn-reddit' : {'discretize' : True, 'time_scale': 'monthly'}, 12 | 'tgbn-token' : {'discretize' : True, 'time_scale': 'weekly'} 13 | } 14 | 15 | class tgb_data(object): 16 | def __init__(self, dname: str, 17 | edge_feat: bool = False, 18 | w: bool = False, 19 | edge_label: bool = False, 20 | edge_idxs: bool = False): 21 | """ 22 | Data class for loading default (in-package) temporal datasets 23 | 24 | In order to use "tgb" datasets install tgb package 25 | for more detals visit here: https://tgb.complexdatalab.com/ 26 | 27 | In order to use dgb datasets download and extract dataset file 28 | from here: https://zenodo.org/record/7213796#.Y1cO6y8r30o 29 | and locate them in ./data/ directory. 30 | """ 31 | self.tgb(dname, 32 | edge_feat = edge_feat, 33 | w = w, 34 | edge_label = edge_label, 35 | edge_idxs = edge_idxs) 36 | 37 | return 38 | 39 | @classmethod 40 | def tgb(self, dname: str, 41 | edge_feat: bool = False, 42 | w: bool = False, 43 | edge_label: bool = False, 44 | edge_idxs: bool = False): 45 | """ 46 | Load datasets from "tgb" package. To load these datasets you need to install tgb package. 47 | Parameters: 48 | dname: str, name of the dataset from the list: 49 | ["tgbl-wiki", "tgbl-review", 50 | "tgbl-coin", "tgbl-comment", 51 | "tgbl-flight","tgbn-trade", 52 | "tgbn-genre", "tgbn-reddit"] 53 | edge_feat: list of edge features 54 | w: edge weights 55 | edge_label: edge labels 56 | edge_idxs: edge indexes 57 | 58 | """ 59 | try: 60 | from tgb.linkproppred.dataset import LinkPropPredDataset 61 | from tgb.nodeproppred.dataset import NodePropPredDataset 62 | except: 63 | print("First install TGB package using 'pip install py-tgb'") 64 | 65 | if "tgbl" in dname: 66 | dataset = LinkPropPredDataset(name=dname, root="datasets", preprocess=True) 67 | elif "tgbn" in dname: 68 | dataset = NodePropPredDataset(name=dname, root="datasets", preprocess=True) 69 | else: 70 | raise ValueError("Invalid tgb dataset name") 71 | 72 | data = dataset.full_data 73 | data = np.array([data['sources'], data["destinations"], data["timestamps"]]) 74 | self.data = np.transpose(data) 75 | 76 | if edge_feat: 77 | self.edge_feat = data['edge_feat'] 78 | if w: 79 | self.w = data['w'] 80 | if edge_label: 81 | self.edge_label = data['edge_label'] 82 | if edge_idxs: 83 | self.edge_idxs = data['edge_idxs'] 84 | 85 | if (dname in Data_specifications): 86 | self.discretize = Data_specifications[dname]['discretize'] 87 | self.time_scale = Data_specifications[dname]['time_scale'] 88 | else: 89 | self.discretize = False 90 | self.time_scale = None 91 | self.train_mask = dataset.train_mask 92 | self.val_mask = dataset.val_mask 93 | self.test_mask = dataset.test_mask 94 | self.name = dname 95 | 96 | return self 97 | 98 | 99 | def read_specifications(self, 100 | data: type): 101 | """ 102 | Load dataset specifications for dgb datasets 103 | Parameters: 104 | data: str, name of the dataset 105 | """ 106 | self.name = data 107 | self.discretize = Data_specifications[data]['discretize'] 108 | self.time_scale = Data_specifications[data]['time_scale'] 109 | return self -------------------------------------------------------------------------------- /tgx/utils/plotting_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import matplotlib.colors as mcolors 6 | from matplotlib.ticker import MaxNLocator 7 | 8 | def create_ts_list(start, end, metric=None, interval=None): 9 | if metric == "Unix" or metric == "unix" or metric == "UNIX": 10 | start = datetime.datetime.fromtimestamp(start).date() 11 | end = datetime.datetime.fromtimestamp(end).date() 12 | if interval == 'daily': 13 | date_list = pd.date_range(start = start, end = end, freq="D") 14 | elif interval == "month": 15 | date_list = pd.date_range(start = start, end = end, freq="M") 16 | elif interval == "year": 17 | date_list = pd.date_range(start = start, end = end, freq="Y") 18 | timelist = [] 19 | for dates in date_list: 20 | timelist.append(dates.strftime("%Y/%m/%d")) 21 | else: 22 | timelist = list(range(start, end, interval)) 23 | # print(timelist) 24 | return timelist 25 | 26 | 27 | 28 | def plot_nodes_edges_per_ts(edges: list, 29 | nodes: list, 30 | ts: list, 31 | filename: str = None, 32 | ylabel_1: str = 'Edges per Timestamp', 33 | ylabel_2: str = 'Nodes per Timestamp'): 34 | """ 35 | Plot nodes and edges per timestamp in one figure 36 | Parameters: 37 | edges: A list containing number of edges per timestamp 38 | nodes: A list containing number of nodes per timestamp 39 | ts: list of timestamps 40 | filename: Name of the output file name, containing the path 41 | ylabel_1: Label for the edges per timestamp line 42 | ylabel_2: Label for the nodes per timestamp line 43 | """ 44 | fig = plt.figure(facecolor='w', figsize=(11, 6)) 45 | ax1 = fig.add_subplot(111) 46 | ax2 = ax1.twinx() 47 | 48 | c1, = ax1.plot(ts, edges, color='black', lw=3, label=ylabel_1) 49 | c2, = ax2.plot(ts, nodes, color='gray', linestyle='dashed', lw=3, label=ylabel_2) 50 | curves = [c1, c2] 51 | ax1.legend(curves, [curve.get_label() for curve in curves], fontsize = 18) 52 | ax1.set_xlabel('Time', fontsize=20) 53 | ax1.set_ylabel(ylabel_1, fontsize=20) 54 | ax2.set_ylabel(ylabel_2, fontsize=20) 55 | ax1.tick_params(labelsize=20) 56 | ax2.tick_params(labelsize=20) 57 | ax1.set_ylim(0) 58 | ax2.set_ylim(0) 59 | ax1.set_xlim(0, len(ts)-1) 60 | if filename is not None: 61 | plt.savefig(f'{filename}') 62 | else: 63 | plt.show() 64 | 65 | def plot_for_snapshots(data: list, 66 | y_title: str, 67 | filename: str = None, 68 | show_ave: bool=True, ): 69 | ''' 70 | Plot a variable for different timestamps 71 | Parameters: 72 | data: A list of desired variable to be plotted 73 | y_title: Title of the y axis 74 | filename: Name of the output file name, containing the path 75 | show_ave: Whether to plot a line showing the average of the variable over all timestamps 76 | ''' 77 | ts = list(range(0, len(data))) 78 | # plt.rcParams["font.family"] = "Times New Roman" 79 | fig = plt.figure(facecolor='w', figsize=(9,6)) 80 | ax = fig.add_subplot(111) 81 | ax.plot(ts, data, color='black', lw=3) 82 | 83 | ax.set_xlabel('Time', fontsize=20) 84 | ax.set_ylabel(y_title, fontsize=20) 85 | ax.tick_params(labelsize=20) 86 | ax.set_xlim(0, len(ts)-1) 87 | if show_ave: 88 | ave_deg = [np.average(data) for i in range(len(ts))] 89 | ax.plot(ts, ave_deg, color='#ca0020', linestyle='dashed', lw=3) 90 | if filename is not None: 91 | plt.savefig(f'{filename}') 92 | else: 93 | plt.show() 94 | 95 | 96 | def plot_density_map(data: list, 97 | y_title: str, 98 | filename: str = None,): 99 | ''' 100 | Plot a density map using fig and ax 101 | Parameters: 102 | data: A list of desired variable to be plotted 103 | y_title: Title of the y axis 104 | filename: Name of the output file name, containing the path 105 | ''' 106 | max_value = max(max(inner) for inner in data if inner) 107 | c = np.zeros((max_value, len(data))) 108 | 109 | for i, row in enumerate(data): 110 | for value in row: 111 | c[value - 1][i] += 1 112 | 113 | # Plot 114 | fig = plt.figure(facecolor='w', figsize=(9, 6)) 115 | ax = fig.add_subplot(111) 116 | 117 | norm = mcolors.Normalize(vmin=0, vmax=1) 118 | cax = ax.imshow(c, cmap='viridis', interpolation='nearest', norm=norm) 119 | cbar = fig.colorbar(cax) 120 | cbar.set_label('Frequency') 121 | 122 | ax.set_title("Heatmap of Node Degrees Over Time") 123 | ax.set_xlabel('Time', fontsize=20) 124 | ax.set_ylabel(y_title, fontsize=20) 125 | ax.tick_params(labelsize=20) 126 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 127 | 128 | # Adjust the aspect ratio of the plot 129 | ax.set_aspect('auto') 130 | 131 | if filename is not None: 132 | plt.savefig(f'{filename}') 133 | else: 134 | plt.show() 135 | 136 | if __name__ == "__main__": 137 | create_ts_list(86400, 86400*365, "unix", "month") 138 | create_ts_list(2015, 2022, interval=2) -------------------------------------------------------------------------------- /tgx/data/builtin.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import zipfile 3 | import requests 4 | import os 5 | 6 | 7 | 8 | __all__ = ["data"] 9 | 10 | root_path = "." 11 | 12 | 13 | DataPath={ 14 | 'USLegis' : "/data/USLegis/ml_USLegis.csv", 15 | 'CanParl' : "/data/CanParl/ml_CanParl.csv", 16 | 'UNtrade' : "/data/UNtrade/ml_UNtrade.csv", 17 | 'UNvote' : "/data/UNvote/ml_UNvote.csv", 18 | 'reddit' : "/data/reddit/ml_reddit.csv", 19 | 'Wikipedia' : "/data/wikipedia/ml_wikipedia.csv", 20 | 'enron' : "/data/enron/ml_enron.csv", 21 | 'mooc' : "/data/mooc/ml_mooc.csv", 22 | 'uci' : "/data/uci/ml_uci.csv", 23 | 'SocialEvo' : "/data/SocialEvo/ml_SocialEvo.csv", 24 | 'Flights' : "/data/Flights/ml_Flights.csv", 25 | 'lastfm' : "/data/lastfm/ml_lastfm.csv", 26 | 'Contacts' : "/data/Contacts/ml_Contacts.csv" 27 | } 28 | 29 | Data_specifications = { 30 | 'USLegis' : {'discretize' : False, 'time_scale': None}, 31 | 'CanParl' : {'discretize' : False, 'time_scale': None}, 32 | 'UNvote' : {'discretize' : False, 'time_scale': None}, 33 | 'reddit' : {'discretize' : True, 'time_scale': 'daily'}, 34 | 'enron' : {'discretize' : True, 'time_scale': 'monthly'}, 35 | 'mooc' : {'discretize' : True, 'time_scale': 'daily'}, 36 | 'uci' : {'discretize' : True, 'time_scale': 'weekly'}, 37 | 'SocialEvo' : {'discretize' : True, 'time_scale': 'weekly'}, 38 | 'Flights' : {'discretize' : False, 'time_scale': 121}, 39 | 'Contacts' : {'discretize' : True, 'time_scale': 'daily'}, 40 | 'lastfm' : {'discretize' : True, 'time_scale': 'monthly'} 41 | } 42 | 43 | def download(url: str, output_path: str): 44 | get_response = requests.get(url,stream=True) 45 | file_name = url.split("/")[-1] 46 | fpath = output_path + "/" + file_name 47 | with open(fpath, 'wb') as f: 48 | for chunk in get_response.iter_content(chunk_size=1024): 49 | if chunk: # filter out keep-alive new chunks 50 | f.write(chunk) 51 | return fpath 52 | 53 | 54 | 55 | class builtin(object): 56 | def __init__(self): 57 | """ 58 | Data class for loading default (in-package) temporal datasets 59 | 60 | In order to use "tgb" datasets install tgb package 61 | for more detals visit here: https://tgb.complexdatalab.com/ 62 | 63 | In order to use dgb datasets download and extract dataset file 64 | from here: https://zenodo.org/record/7213796#.Y1cO6y8r30o 65 | and locate them in ./data/ directory. 66 | """ 67 | pass 68 | 69 | 70 | def read_specifications(self, 71 | data: type): 72 | """ 73 | Load dataset specifications for dgb datasets 74 | Parameters: 75 | data: str, name of the dataset 76 | """ 77 | self.name = data 78 | self.path = DataPath[data] 79 | # self.header = Data_specifications[data]['header'] 80 | # self.index = Data_specifications[data]['index'] 81 | self.discretize = Data_specifications[data]['discretize'] 82 | self.time_scale = Data_specifications[data]['time_scale'] 83 | return self 84 | 85 | def load_dgb_data(self): 86 | try: 87 | data = pd.read_csv(f"{self.root}{self.path}", index_col=0) 88 | except: 89 | self.download_file(self) 90 | data = pd.read_csv(f"{self.root}{self.path}", index_col=0) 91 | 92 | self.data = data.iloc[:, 0:3].to_numpy() 93 | return self 94 | 95 | def download_file(self): 96 | 97 | print("Data missing, download recommended!") 98 | inp = input('Will you download the dataset(s) now? (y/N)\n').lower() 99 | url = f"https://zenodo.org/record/7213796/files/{self.name}.zip" 100 | path_download = f"./data" 101 | print(path_download) 102 | print(url) 103 | if inp == 'y': 104 | if not os.path.exists(path_download): 105 | os.mkdir(path_download) 106 | print("Folder %s created!" % path_download) 107 | 108 | print(f"Downloading {self.name} dataset . . .") 109 | zip_path = download(url, path_download) 110 | with zipfile.ZipFile(zip_path, "r") as f: 111 | f.extractall(path_download) 112 | print("Download completed") 113 | 114 | else: 115 | print("Download cancelled") 116 | 117 | 118 | @classmethod 119 | def mooc(self, root=root_path): 120 | data = "mooc" 121 | self.root = root 122 | self.read_specifications(self, data) 123 | self.load_dgb_data(self) 124 | return self 125 | 126 | @classmethod 127 | def uci(self, root=root_path): 128 | data = "uci" 129 | self.root = root 130 | self.read_specifications(self, data) 131 | self.load_dgb_data(self) 132 | return self 133 | 134 | @classmethod 135 | def uslegis(self, root=root_path): 136 | data = "USLegis" 137 | self.root = root 138 | self.read_specifications(self, data) 139 | self.load_dgb_data(self) 140 | return self 141 | 142 | @classmethod 143 | def canparl(self, root=root_path): 144 | data = "CanParl" 145 | self.root = root 146 | self.read_specifications(self, data) 147 | self.load_dgb_data(self) 148 | return self 149 | 150 | @classmethod 151 | def untrade(self, root=root_path): 152 | data = "UNtrade" 153 | self.root = root 154 | self.read_specifications(self, data) 155 | self.load_dgb_data(self) 156 | return self 157 | 158 | @classmethod 159 | def unvote(self, root=root_path): 160 | data = "UNvote" 161 | self.root = root 162 | self.read_specifications(self, data) 163 | self.load_dgb_data(self) 164 | return self 165 | 166 | @classmethod 167 | def reddit(self, root=root_path): 168 | data = "reddit" 169 | self.root = root 170 | self.read_specifications(self, data) 171 | self.load_dgb_data(self) 172 | return self 173 | 174 | @classmethod 175 | def wikipedia(self, root=root_path): 176 | data = "Wikipedia" 177 | self.root = root 178 | self.read_specifications(self, data) 179 | self.load_dgb_data(self) 180 | return self 181 | 182 | @classmethod 183 | def enron(self, root=root_path): 184 | data = "enron" 185 | self.root = root 186 | self.read_specifications(self, data) 187 | self.load_dgb_data(self) 188 | return self 189 | 190 | @classmethod 191 | def social_evo(self, root=root_path): 192 | data = "SocialEvo" 193 | self.root = root 194 | self.read_specifications(self, data) 195 | self.load_dgb_data(self) 196 | return self 197 | 198 | @classmethod 199 | def flights(self, root=root_path): 200 | data = "Flights" 201 | self.root = root 202 | self.read_specifications(self, data) 203 | self.load_dgb_data(self) 204 | return self 205 | 206 | @classmethod 207 | def lastfm(self, root=root_path): 208 | data = "lastfm" 209 | self.root = root 210 | self.read_specifications(self, data) 211 | self.load_dgb_data(self) 212 | return self 213 | 214 | @classmethod 215 | def contacts(self, root=root_path): 216 | data = "Contacts" 217 | self.root = root 218 | self.read_specifications(self, data) 219 | self.load_dgb_data(self) 220 | return self 221 | -------------------------------------------------------------------------------- /tgx/io/read.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import csv 3 | import numpy as np 4 | from typing import Optional, Union 5 | # from tgx.datasets.data_loader import read_dataset 6 | 7 | 8 | # data: Optional[object] = None, 9 | # is_discretized: bool = False, 10 | # discretize: bool = False, 11 | # time_scale: Union[str, int, None] = None, 12 | 13 | def read_csv(fname: Union[str, object] = None, 14 | header: bool = False, 15 | index: bool = False, 16 | t_col: int = 2,) -> dict: 17 | 18 | """ 19 | Read temporal edgelist and store it in a dictionary. 20 | Parameters: 21 | fname: directory of a dataset in .csv format or data object created from loading dgb/tgb datasets 22 | header: whether first line of data file is header 23 | index: whether the first column is row indices 24 | t_col: column indext for timestamps (0 or 2) 25 | ts_sorted: if data are sorted based on timestamp 26 | 27 | Returns: 28 | temp_edgelist: A dictionary of edges and their frequency at each time interval 29 | """ 30 | 31 | start_col = 0 32 | if index: 33 | start_col = 1 34 | t_col += 1 35 | 36 | if t_col < 2: 37 | u_col = t_col + 1 38 | else: 39 | u_col = start_col 40 | v_col = u_col + 1 41 | 42 | cols_to_read = [u_col, v_col, t_col] 43 | 44 | if (isinstance(fname, str)): 45 | return _load_edgelist(fname, cols_to_read, header=header) 46 | elif isinstance(fname, type) or isinstance(fname, object): 47 | return _datasets_edgelist_loader(fname.data) 48 | else: 49 | raise TypeError("Invalid input") 50 | 51 | 52 | def _load_edgelist(fname, columns, header): 53 | """ 54 | read edges from the file and store them in a dictionary 55 | Parameters: 56 | fname: file address 57 | columns: order of the nodes and timestamp 58 | header: Whether the data file contains header 59 | """ 60 | try: 61 | edgelist = open(fname, "r") 62 | except: 63 | raise FileNotFoundError("No such file or directory.") 64 | edgelist.readline() 65 | lines = list(edgelist.readlines()) 66 | edgelist.close() 67 | 68 | u_idx, v_idx, ts_idx = columns 69 | temp_edgelist = {} 70 | unique_edges = {} 71 | edges_list = [] 72 | total_edges = 0 73 | sorted = True 74 | previous_t = 0 75 | if header: 76 | first_line = 1 77 | else: 78 | first_line = 0 79 | for i in range(first_line, len(lines)): 80 | line = lines[i] 81 | values = line.split(',') 82 | t = int(float(values[ts_idx])) 83 | u = values[u_idx].strip() 84 | v = values[v_idx].strip() 85 | 86 | if i == first_line: 87 | curr_t = t 88 | 89 | # Check if the dataset is sorted 90 | if t < previous_t: 91 | sorted = False 92 | previous_t = t 93 | 94 | if t not in temp_edgelist: 95 | temp_edgelist[t] = {} 96 | if (u, v) not in temp_edgelist[t]: 97 | temp_edgelist[t][(u, v)] = 1 98 | else: 99 | temp_edgelist[t][(u, v)] += 1 100 | 101 | # temp_edgelist[t].append((u, v)) 102 | if (u,v) not in unique_edges: 103 | unique_edges[(u, v)] = 1 104 | total_edges += 1 105 | # temp_edgelist[curr_t] = edges_list 106 | 107 | if sorted is False: 108 | print("edgelist not sorted, sorting dataset...") 109 | myKeys = list(temp_edgelist.keys()) 110 | myKeys.sort() 111 | temp_edgelist = {i: temp_edgelist[i] for i in myKeys} 112 | 113 | print("Number of loaded edges: " + str(total_edges)) 114 | print("Number of unique edges:" , len(unique_edges.keys())) 115 | print("Available timestamps: ", len(temp_edgelist.keys())) 116 | return temp_edgelist 117 | 118 | def _datasets_edgelist_loader(data) -> dict: 119 | """ 120 | load built-in datasets and tgb datasets 121 | """ 122 | temp_edgelist = {} 123 | total_edges = 0 124 | unique_edges = {} 125 | first_line = 0 126 | previous_t = 0 127 | edges_list = [] 128 | sorted = True 129 | for line in data: 130 | u = line[0] 131 | v = line[1] 132 | t = int(float(line[2])) 133 | if first_line == 0: 134 | curr_t = t 135 | first_line += 1 136 | 137 | # Check if the dataset is sorted 138 | if t < previous_t: 139 | sorted = False 140 | previous_t = t 141 | 142 | if t != curr_t: 143 | temp_edgelist[curr_t] = edges_list 144 | edges_list = [] 145 | curr_t = t 146 | 147 | edges_list.append((u, v)) 148 | if (u,v) not in unique_edges: 149 | unique_edges[(u, v)] = 1 150 | total_edges += 1 151 | temp_edgelist[curr_t] = edges_list 152 | 153 | if sorted is False: 154 | print("Sorting dataset...") 155 | myKeys = list(temp_edgelist.keys()) 156 | myKeys.sort() 157 | temp_edgelist = {i: temp_edgelist[i] for i in myKeys} 158 | 159 | print("Number of loaded edges: " + str(total_edges)) 160 | print("Number of unique edges:" + str(len(unique_edges.keys()))) 161 | print("Available timestamps: ", len(temp_edgelist.keys())) 162 | 163 | return temp_edgelist 164 | 165 | 166 | def _load_edgelist_with_discretizer( 167 | fname : str, 168 | columns : list, 169 | time_scale : Union[str , int] = 86400, 170 | header : Optional[bool] = True) -> dict: 171 | """ 172 | load temporal edgelist into a dictionary 173 | assumption: the edges are ordered in increasing order of their timestamp 174 | ''' 175 | the timestamp in the edgelist is based cardinal 176 | more detail see here: https://github.com/srijankr/jodie 177 | need to merge edges in a period of time into an interval 178 | 86400 is # of secs in a day, good interval size 179 | ''' 180 | """ 181 | # print("Info: Interval size:", interval_size) 182 | edgelist = open(fname, "r") 183 | edgelist.readline() 184 | lines = list(edgelist.readlines()) 185 | edgelist.close() 186 | 187 | 188 | u_idx, v_idx, ts_idx = columns 189 | 190 | if isinstance(time_scale, str): 191 | if time_scale == "daily": 192 | interval_size = 86400 193 | elif time_scale == "weekly": 194 | interval_size = 86400 * 7 195 | elif time_scale == "monthly": 196 | interval_size = 86400 * 30 197 | elif time_scale == "yearly": 198 | interval_size = 86400* 365 199 | elif isinstance(time_scale, int): 200 | last_line = lines[-1] 201 | values = last_line.split(',') 202 | total_time = float(values[ts_idx]) 203 | interval_size = int(total_time / (time_scale-1)) 204 | else: 205 | raise TypeError("Invalid time interval") 206 | 207 | temporal_edgelist = {} 208 | total_n_edges = 0 209 | 210 | if header: 211 | first_line = 1 212 | else: 213 | first_line = 0 214 | 215 | 216 | for i in range(first_line, len(lines)): 217 | line = lines[i] 218 | values = line.split(',') 219 | 220 | total_n_edges += 1 221 | # values = line.strip().split(',') 222 | u = values[u_idx] # source node 223 | v = values[v_idx] # destination node 224 | ts = float(values[ts_idx]) # timestamp 225 | ts_bin_id = int(ts / interval_size) 226 | if ts_bin_id not in temporal_edgelist: 227 | temporal_edgelist[ts_bin_id] = {} 228 | temporal_edgelist[ts_bin_id][(u, v)] = 1 229 | else: 230 | if (u, v) not in temporal_edgelist[ts_bin_id]: 231 | temporal_edgelist[ts_bin_id][(u, v)] = 1 232 | else: 233 | temporal_edgelist[ts_bin_id][(u, v)] += 1 234 | 235 | print("Loading edge-list: Maximum timestamp is ", ts) 236 | print("Loading edge-list: Maximum timestamp-bin-id is", ts_bin_id) 237 | print("Loading edge-list: Total number of edges:", total_n_edges) 238 | return temporal_edgelist 239 | 240 | 241 | 242 | 243 | -------------------------------------------------------------------------------- /docs/tutorials/data_loader.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Load built-in and ported datasets from TGB\n", 8 | "This tutorial shows you how to load built-in datasets\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import tgx" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### Access TGB datasets\n", 25 | "In order to load TGB datasets you should first install the TGB package:\n", 26 | "\n", 27 | "`pip install py-tgb`\n", 28 | "\n", 29 | "Then write name of the dataset in the parantheses:\n", 30 | "\n", 31 | "`tgx.data.tgb(\"name\")`\n", 32 | "\n", 33 | "The dataset names are as follow\n", 34 | "\n", 35 | "`tgbl-wiki`, `tgbl-review`, `tgbl-coin`, `tgbl-comment`, `tgbl-flight`\n", 36 | "\n", 37 | "`tgbn-trade`, `tgbn-genre`, `tgbn-reddit`" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "raw file found, skipping download\n", 50 | "Dataset directory is /mnt/f/code/TGB/tgb/datasets/tgbl_wiki\n", 51 | "loading processed file\n", 52 | "Number of loaded edges: 157474\n", 53 | "Number of unique edges:18257\n", 54 | "Available timestamps: 152757\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "data_name = \"tgbl-wiki\" \n", 60 | "dataset = tgx.tgb_data(data_name) #tgb datasets\n", 61 | "ctdg = tgx.Graph(dataset)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Access other datasets\n", 69 | "\n", 70 | "To load built-in TGX datasets (from [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg)). You can write the name of the dataset instead of `datasest_name`:\n", 71 | "\n", 72 | "`tgx.data.dataset_name`\n", 73 | "\n", 74 | "The dataset names are as:\n", 75 | "\n", 76 | "`mooc`, `uci`, `uslegis`, `unvote`, `untrade`, `flight`, `wikipedia`, `reddit`, `lastfm`, `contact`, `canparl`, `socialevo`, `enron`" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "Number of loaded edges: 59835\n", 89 | "Number of unique edges:20296\n", 90 | "Available timestamps: 58911\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "dataset = tgx.builtin.uci()\n", 96 | "ctdg = tgx.Graph(dataset)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Custom Datasets" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "You can load your own custom dataset from `.csv` files and read it into a `tgx.Graph` object\n", 111 | "\n", 112 | "Let's start by loading a toy dataset into pandas and then visualize the rows" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 4, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/html": [ 123 | "
\n", 124 | "\n", 137 | "\n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | "
timesourcedestination
0012
1021
2031
3122
4112
5131
\n", 185 | "
" 186 | ], 187 | "text/plain": [ 188 | " time source destination\n", 189 | "0 0 1 2\n", 190 | "1 0 2 1\n", 191 | "2 0 3 1\n", 192 | "3 1 2 2\n", 193 | "4 1 1 2\n", 194 | "5 1 3 1" 195 | ] 196 | }, 197 | "execution_count": 4, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "import pandas as pd\n", 204 | "toy_fname = 'toy_data.csv'\n", 205 | "df = pd.read_csv(toy_fname)\n", 206 | "df" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 5, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "name": "stdout", 216 | "output_type": "stream", 217 | "text": [ 218 | "Number of loaded edges: 5\n", 219 | "Number of unique edges: 4\n", 220 | "Available timestamps: 2\n" 221 | ] 222 | }, 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "" 227 | ] 228 | }, 229 | "execution_count": 5, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "from tgx.io.read import read_csv\n", 236 | "# header indicates if there is a header row at the top\n", 237 | "# index whether the first column is row indices\n", 238 | "# t_col indicates which column corresponds to timestamps\n", 239 | "edgelist = read_csv(toy_fname, \n", 240 | " header=True,\n", 241 | " index=False,\n", 242 | " t_col=0,)\n", 243 | "tgx.Graph(edgelist=edgelist)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "### Subsampling graphs\n", 251 | "\n", 252 | "To perform subsmpling graphs you should follow these steps:\n", 253 | "\n", 254 | "1. descritize the data\n", 255 | "\n", 256 | "2. create a graph object of data (G)\n", 257 | "\n", 258 | "3. subsample the graph by `tgx.utils.graph_utils.subsampling`\n", 259 | "\n", 260 | "4. create a new graph from the subsampled subgraph" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 6, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "Generate graph subsample...\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "from tgx.utils.graph_utils import subsampling\n", 278 | "\n", 279 | "sub_edges = subsampling(ctdg, selection_strategy=\"random\", N=1000) #N is # of nodes to be sampled \n", 280 | "subgraph = tgx.Graph(edgelist=sub_edges)" 281 | ] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3 (ipykernel)", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.9.6" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 4 305 | } 306 | -------------------------------------------------------------------------------- /tgx/viz/TEA.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from typing import Union, Optional 4 | from tgx.utils.graph_utils import discretize_edges 5 | from tgx.utils.plotting_utils import create_ts_list 6 | __all__ = ["TEA"] 7 | 8 | def TEA( 9 | temp_edgelist : Union[object, dict], 10 | filepath : Optional[str] = ".", 11 | fig_size : tuple = (7,5), 12 | font_size : int = 20, 13 | network_name : str = None, 14 | time_scale : Union[str, int] = None, 15 | real_dates : bool = None, 16 | test_split : bool = False, 17 | density : bool = False 18 | ): 19 | r""" 20 | generating TEA plot 21 | 22 | Parameters: 23 | temp_edgelist: a dictionary of temporal edges or a dataset object. 24 | filepath: Path to save the TEA Plot. 25 | fig_size: Size of the figure to save. 26 | font_size: Size of the text in the figure. 27 | network_name: Name of the dataset to be used in the TEA plot file. 28 | time_scale: time_scale for discretizing data if already not done. 29 | real_dates: Whether to use the real dates from dataset. 30 | test_split: Whether show the test split on the plot. 31 | density: Whether to return edge density and edge frequency dictioneries. 32 | """ 33 | if isinstance(temp_edgelist, object): 34 | if temp_edgelist.freq_data is None: 35 | temp_edgelist.count_freq() 36 | temp_edgelist = temp_edgelist.freq_data 37 | 38 | # check number of unique timestamps: 39 | unique_ts = list(temp_edgelist.keys()) 40 | # if len(unique_ts) > max_time_scale: 41 | # inp = input(f"There are {unique_ts} timestamps in the data.\nDo you want to discretize the data to 1000 timestamps?(y/n)").lower() 42 | # if inp == "y": 43 | # temp_edgelist = edgelist_discritizer(temp_edgelist, 44 | # unique_ts, 45 | # time_scale = max_time_scale) 46 | if time_scale is not None: 47 | temp_edgelist = discretize_edges(temp_edgelist, 48 | time_scale = time_scale) 49 | 50 | 51 | ts_edges_dist, ts_edges_dist_density, edge_frequency_dict = TEA_process_edgelist_per_timestamp(temp_edgelist) 52 | 53 | TEA_plot_edges_bar(ts_edges_dist, 54 | filepath = filepath, 55 | fig_size = fig_size, 56 | font_size = font_size, 57 | network_name=network_name, 58 | real_dates = real_dates, 59 | test_split = test_split) 60 | 61 | if density: 62 | return ts_edges_dist_density, edge_frequency_dict 63 | 64 | 65 | 66 | def TEA_process_edgelist_per_timestamp(temp_edgelist): 67 | # generate distribution of the edges history 68 | unique_ts = list(temp_edgelist.keys()) 69 | # unique_ts.sort() 70 | # print(f"There are {len(unique_ts)} timestamps.") 71 | 72 | # get node set & total number of nodes 73 | node_dict = {} 74 | for t, e_dict in temp_edgelist.items(): 75 | for e, exist in e_dict.items(): 76 | if e[0] not in node_dict: 77 | node_dict[e[0]] = 1 78 | if e[1] not in node_dict: 79 | node_dict[e[1]] = 1 80 | num_nodes = len(node_dict) 81 | num_e_fully_connected = num_nodes * (num_nodes - 1) 82 | 83 | edge_frequency_dict = {} # how many times an edge is seen 84 | ts_edges_dist = [] # contains different features specifying the characteristics of the edge distribution over time 85 | ts_edges_dist_density = [] 86 | for curr_t in unique_ts: 87 | 88 | # if curr_t < 2: 89 | # print("curr_t", curr_t) 90 | prev_ts = [ts for ts in unique_ts if ts < curr_t] 91 | edges_in_prev_ts = {} 92 | for bts in prev_ts: 93 | edges_in_prev_ts.update(temp_edgelist[bts]) 94 | 95 | curr_ts_edge_list = temp_edgelist[curr_t] 96 | for e in curr_ts_edge_list: 97 | if e not in edge_frequency_dict: 98 | edge_frequency_dict[e] = 1 99 | else: 100 | edge_frequency_dict[e] += 1 101 | 102 | if len(curr_ts_edge_list) > 0: 103 | curr_ts_edges_dist = {'ts': curr_t, 104 | 'new': len([e for e in curr_ts_edge_list if e not in edges_in_prev_ts]), 105 | 'repeated': len([e for e in curr_ts_edge_list if e in edges_in_prev_ts]), 106 | 'not_repeated': len([e for e in edges_in_prev_ts if e not in curr_ts_edge_list]), 107 | 'total_curr_ts': len(curr_ts_edge_list), 108 | 'total_seen_until_curr_ts': len(edges_in_prev_ts) + len(curr_ts_edge_list) 109 | } 110 | curr_ts_edges_dist_density = {'ts': curr_t, 111 | 'new': (curr_ts_edges_dist['new'] * 1.0) / num_e_fully_connected, 112 | 'repeated': (curr_ts_edges_dist['repeated'] * 1.0) / num_e_fully_connected, 113 | 'not_repeated': (curr_ts_edges_dist[ 114 | 'not_repeated'] * 1.0) / num_e_fully_connected, 115 | 'total_curr_ts': (curr_ts_edges_dist[ 116 | 'total_curr_ts'] * 1.0) / num_e_fully_connected, 117 | 'total_seen_until_curr_ts': (curr_ts_edges_dist[ 118 | 'total_seen_until_curr_ts'] * 1.0) / num_e_fully_connected, 119 | } 120 | else: 121 | curr_ts_edges_dist = {'ts': curr_t, 122 | 'new': 0, 123 | 'repeated': 0, 124 | 'not_repeated': 0, 125 | 'total_curr_ts': 0, 126 | 'total_seen_until_curr_ts': len(edges_in_prev_ts) + len(curr_ts_edge_list) 127 | } 128 | curr_ts_edges_dist_density = {'ts': curr_t, 129 | 'new': 0, 130 | 'repeated': 0, 131 | 'not_repeated': 0, 132 | 'total_curr_ts': 0, 133 | 'total_seen_until_curr_ts': 0, 134 | } 135 | ts_edges_dist.append(curr_ts_edges_dist) 136 | ts_edges_dist_density.append(curr_ts_edges_dist_density) 137 | # print(len(edges_in_prev_ts)) 138 | # print(len(ts_edges_dist)) 139 | # print(edge_frequency_dict) 140 | # break 141 | return ts_edges_dist, ts_edges_dist_density, edge_frequency_dict 142 | 143 | 144 | def TEA_plot_edges_bar(ts_edges_dist: list, 145 | filepath: str = ".", 146 | fig_size: list = (9,5), 147 | font_size: int = 20, 148 | network_name: str = None, 149 | real_dates: list = None, 150 | time_scale: list = None, 151 | test_split: bool = False, 152 | show: bool =False): 153 | r""" 154 | Making TEA plot and save into pdf file. 155 | Args: 156 | ts_edges_dist: list of dictionaries containing the edge distribution over time. 157 | filepath: Path to save the TEA Plot. 158 | fig_size: Size of the figure to save. 159 | font_size: Size of the text in the figure. 160 | network_name: Name of the dataset to be used in the TEA plot file. 161 | real_dates: list of real dates as ticks 162 | time_scale: time_scale for discretizing data if already not done. 163 | test_split: Whether show the test split on the plot. 164 | show: Whether to show the plot. 165 | """ 166 | 167 | 168 | ts_edges_dist_df = pd.DataFrame(ts_edges_dist, columns=['ts', 'new', 'repeated', 169 | 'not_repeated', 170 | 'total_curr_ts', 171 | 'total_seen_until_curr_ts']) 172 | 173 | 174 | ### Additional Stats ### 175 | mean = ts_edges_dist_df.mean(axis=0) 176 | # print("INFO: Network Name:", network_name) 177 | # print("INFO: AVG. stats. over all timestamps: ", mean) 178 | # print("INFO: ratio of avg.(new)/avg.(total_curr_ts): {:.2f}".format(mean['new'] / mean['total_curr_ts'])) 179 | ### 180 | 181 | fig, ax = plt.subplots(figsize=fig_size) # lastfm, mooc, reddit, UNtrade, UNvote 182 | plt.subplots_adjust(bottom=0.2, left=0.2) 183 | font_size = font_size 184 | ticks_font_size = 15 185 | plt.yticks(fontsize=ticks_font_size) 186 | plt.xticks(fontsize=ticks_font_size) 187 | if real_dates is not None: 188 | start = real_dates[0] 189 | end = real_dates[1] 190 | metric = real_dates[2] 191 | create_ts_list(start, end, metric=metric, interval=time_scale) 192 | else: 193 | duration = ts_edges_dist_df['ts'].tolist() 194 | timestamps = [i for i in range(len(duration))] 195 | 196 | new = ts_edges_dist_df['new'].tolist() 197 | repeated = ts_edges_dist_df['repeated'].tolist() 198 | # print(len(timestamps), repeated, new) 199 | # plotting stuffs 200 | # bar plot 201 | plt.bar(timestamps, repeated, label='Repeated', color='#404040', alpha=0.4) 202 | plt.bar(timestamps, new, label='New', bottom=repeated, color='#ca0020', alpha=0.8, hatch='//') 203 | # test split line 204 | if test_split: 205 | plt.axvline(x=(timestamps[int(0.85 * len(timestamps))]), color="blue", linestyle="--", linewidth=2) 206 | plt.text((timestamps[int(0.85 * len(timestamps))]), 0, 207 | 'x', va='center', ha='center', fontsize=font_size, fontweight='heavy', color='blue') 208 | 209 | plt.margins(x=0) 210 | plt.xlabel("Timestamp", fontsize=font_size) 211 | plt.ylabel("Number of edges", fontsize=font_size) 212 | plt.legend(fontsize = 13) 213 | if filepath is not None: 214 | plt.savefig(f"{filepath}/{network_name}_TEA.pdf") 215 | print("plot saved as " + f"{filepath}/{network_name}_TEA.pdf") 216 | if (show): 217 | plt.show() 218 | 219 | 220 | -------------------------------------------------------------------------------- /tgx/classes/graph.py: -------------------------------------------------------------------------------- 1 | # import networkx as nx 2 | import copy 3 | import csv 4 | import numpy as np 5 | from typing import Optional, Union 6 | from tgx.utils.graph_utils import discretize_edges, frequency_count, subsampling 7 | from tgx.io.read import read_csv 8 | 9 | #TODO should contain a new property tracking the number of timestamps#TODO should contain a new property tracking the number of timestamps 10 | class Graph(object): 11 | def __init__(self, 12 | dataset: Optional[object] = None, 13 | fname: Optional[str] = None, 14 | edgelist: Optional[dict] = None): 15 | """ 16 | Create a Graph object with specific characteristics 17 | Args: 18 | dataset: a dataset object 19 | edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}} 20 | """ 21 | 22 | if dataset is not None: 23 | if isinstance(dataset, type) or isinstance(dataset,object): 24 | data = read_csv(dataset) 25 | elif fname is not None and isinstance(fname, str): 26 | data = read_csv(fname) 27 | elif edgelist is not None and isinstance(edgelist, dict): 28 | data = edgelist 29 | else: 30 | raise TypeError("Please enter valid input.") 31 | 32 | init_key = list(data.keys())[0] 33 | if isinstance(data[init_key], list): 34 | data = self._list2dict(data) 35 | self.data = data 36 | self.subsampled_graph = None 37 | self.freq_data = None 38 | self.id_map = None #a map from original node id to new node id based on their order of appearance 39 | 40 | def _list2dict(self, data) -> dict: 41 | r""" 42 | convert data into a dictionary of dictionary of temporal edges 43 | """ 44 | new_data = {} 45 | for t in data.keys(): 46 | edgelist = {} 47 | for u,v in data[t]: 48 | edgelist[(u,v)] = 1 49 | new_data[t] = edgelist 50 | return new_data 51 | 52 | #TODO support edge features, edge weights, node features and more, currently supports, timestamp, source, destination 53 | def export_full_data(self): 54 | """ 55 | convert self.data inot a dictionary of numpy arrays similar to TGB LinkPropPredDataset 56 | """ 57 | num_edge = self.number_of_edges() 58 | sources = np.zeros(num_edge, dtype=np.int64) 59 | destinations = np.zeros(num_edge, dtype=np.int64) 60 | timestamps = np.zeros(num_edge, dtype=np.int64) 61 | idx = 0 62 | edgelist = self.data 63 | 64 | for ts, edge_data in edgelist.items(): 65 | for u,v in edge_data.keys(): 66 | sources[idx] = u 67 | destinations[idx] = v 68 | timestamps[idx] = ts 69 | idx += 1 70 | full_data = { 71 | "sources": sources, 72 | "destinations": destinations, 73 | "timestamps": timestamps, 74 | } 75 | return full_data 76 | 77 | def shift_time_to_zero(self) -> None: 78 | r""" 79 | shift all edges in the dataset to start with timestamp 0 80 | """ 81 | min_t = list(self.data.keys())[0] 82 | new_data = {} 83 | for ts in self.data.keys(): 84 | new_data[ts - min_t] = self.data[ts] 85 | self.data = new_data 86 | 87 | def discretize(self, 88 | time_scale: Union[str, int], 89 | store_unix: bool = True, 90 | freq_weight: bool = False) -> object: 91 | """ 92 | discretize the graph object based on the given time interval 93 | Args: 94 | time_scale: time interval to discretize the graph 95 | store_unix: whether to store converted unix time in a list 96 | freq_weight: whether to weight the edges by frequency in the new graph object 97 | """ 98 | new_G = copy.deepcopy(self) 99 | # discretie differently based on # of intervals of time granularity 100 | output = discretize_edges(self.data, 101 | time_scale = time_scale, 102 | store_unix = store_unix, 103 | freq_weight = freq_weight) 104 | disc_G = output[0] 105 | new_G.data = disc_G 106 | if (store_unix): 107 | return new_G, output[1] 108 | else: 109 | return (new_G, None) 110 | 111 | def count_freq(self): 112 | self.freq_data = frequency_count(self.data) 113 | return self 114 | 115 | def subsampling(self, 116 | node_list: Optional[list] = [], 117 | random_selection: Optional[bool] = True, 118 | N: Optional[int] = None) -> object: 119 | new_G = copy.deepcopy(self) 120 | new_G.data = subsampling(new_G, node_list = node_list, random_selection=random_selection, N=N) 121 | return new_G 122 | 123 | def number_of_edges(self) -> int: 124 | r""" 125 | Calculate total number of nodes present in an edgelist 126 | """ 127 | edgelist = self.data 128 | e_num = 0 129 | for _, edges in edgelist.items(): 130 | e_num += len(edges) 131 | 132 | return e_num 133 | 134 | def unique_edges(self) -> int: 135 | r""" 136 | Calculate the number of unique edges 137 | Parameters: 138 | graph_edgelist: Dictionary containing graph data 139 | """ 140 | unique_edges = {} 141 | for _, e_list in self.data.items(): 142 | for e in e_list: 143 | if e not in unique_edges: 144 | unique_edges[e] = 1 145 | return len(unique_edges) 146 | 147 | 148 | def total_nodes(self) -> int: 149 | r""" 150 | Calculate total number of unique nodes present in an edgelist 151 | """ 152 | edgelist = self.data 153 | node_list = {} 154 | for _, edge_data in edgelist.items(): 155 | for u,v in edge_data.keys(): 156 | if u not in node_list: 157 | node_list[u] = 1 158 | if v not in node_list: 159 | node_list[v] = 1 160 | return len(node_list) 161 | 162 | 163 | def max_nid(self) -> int: 164 | r""" 165 | find the largest node ID in the dataset 166 | """ 167 | edgelist = self.data 168 | max_id = 0 169 | for _, edge_data in edgelist.items(): 170 | for u,v in edge_data.keys(): 171 | if u > max_id: 172 | max_id = u 173 | if v > max_id: 174 | max_id = v 175 | return max_id #offset by 1 176 | 177 | def min_nid(self) -> int: 178 | r""" 179 | find the smallest node ID in the dataset 180 | """ 181 | edgelist = self.data 182 | min_id = 1000000000 183 | for _, edge_data in edgelist.items(): 184 | for u,v in edge_data.keys(): 185 | if u < min_id: 186 | min_id = u 187 | if v < min_id: 188 | min_id = v 189 | return min_id #offset by 1 190 | 191 | 192 | def map_nid(self) -> dict: 193 | r""" 194 | remap all node ids in the dataset to start from 0 and based on node order of appearance. Also updates self.data 195 | Output: 196 | id_map: a dictionary mapping original node id to new node id 197 | """ 198 | edgelist = self.data 199 | id_map = {} 200 | nid = 0 201 | new_edgelist = {} 202 | for ts, edge_data in edgelist.items(): 203 | new_edgelist[ts] = {} 204 | for u,v in edge_data.keys(): 205 | if u not in id_map: 206 | id_map[u] = nid 207 | nid += 1 208 | if v not in id_map: 209 | id_map[v] = nid 210 | nid += 1 211 | new_edgelist[ts][(id_map[u],id_map[v])] = edge_data[(u,v)] 212 | self.data = new_edgelist 213 | return id_map 214 | 215 | 216 | def node_per_ts(self): 217 | active_nodes = {} 218 | for ts in range(len(self.data)): 219 | edgelist_t = self.data[ts] 220 | active_nodes.append(self.edgelist_node_count(edgelist_t)) 221 | return active_nodes 222 | 223 | def edgelist_node_count(self, edge_data: list): 224 | node_list = {} 225 | for edge in edge_data: 226 | (u, v) = edge 227 | if u not in node_list: 228 | node_list[u] = 1 229 | if v not in node_list: 230 | node_list[v] = 1 231 | return len(node_list.keys()) 232 | 233 | def edgelist_node_list(self, edge_data: list): 234 | node_list = {} 235 | for edge in edge_data: 236 | (u, v) = edge 237 | if u not in node_list: 238 | node_list[u] = 1 239 | if v not in node_list: 240 | node_list[v] = 1 241 | return list(node_list.keys()) 242 | 243 | def nodes_list(self) -> list: 244 | r""" 245 | Return a list of nodes present in an edgelist 246 | """ 247 | node_list = {} 248 | edgelist = self.data 249 | for _, edge_data in edgelist.items(): 250 | for u,v in edge_data.keys(): 251 | if u not in node_list: 252 | node_list[u] = 1 253 | if v not in node_list: 254 | node_list[v] = 1 255 | self.node_list = list(node_list.keys()) 256 | return list(node_list.keys()) 257 | 258 | def check_time_gap(self) -> bool: 259 | r""" 260 | Check whether the edgelist timestamps have gaps or not (increments bigger than 1) 261 | Returns: 262 | time_gap: a boolean indicating whether there is a time gap or not 263 | """ 264 | time_gap = False 265 | ts = list(self.data.keys()) 266 | for i in range(1, len(ts)): 267 | if ts[i] - ts[i-1] > 1: 268 | time_gap = True 269 | return time_gap 270 | return time_gap 271 | 272 | def save2csv(self, 273 | fname:str = "output") -> None: 274 | r""" 275 | Save the graph object in an edgelist format to a csv file 276 | Args: 277 | fname: name of the csv file to save the graph, no csv suffix needed 278 | """ 279 | outname = fname + ".csv" 280 | #iterate through all edges 281 | with open(outname, 'w') as csvfile: 282 | print ("saving to ", outname) 283 | csvwriter = csv.writer(csvfile, delimiter=',') 284 | csvwriter.writerow(['timestamp'] + ['source'] + ['destination']) 285 | for t, edges_list in self.data.items(): 286 | for edge in edges_list: 287 | (u, v) = edge 288 | csvwriter.writerow([t] + [u] + [v]) 289 | 290 | 291 | -------------------------------------------------------------------------------- /tgx/utils/graph_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Union, Optional 3 | 4 | __all__ = ["train_test_split", 5 | "discretize_edges", 6 | "subsampling", 7 | "node_list", 8 | "is_discretized", 9 | "frequency_count"] 10 | 11 | SEC_IN_MIN = 60 12 | SEC_IN_HOUR = 3600 13 | SEC_IN_DAY = 86400 14 | SEC_IN_WEEK = 86400 * 7 15 | SEC_IN_MONTH = 86400 * 30 16 | SEC_IN_YEAR = 86400 * 365 17 | SEC_IN_BIYEARLY = 86400 * 365 * 2 18 | 19 | # helper function to do ceiling divison, i.e. 5/2 = 3 20 | def ceiling_division(n, d): 21 | q, r = divmod(n, d) 22 | return q + bool(r) 23 | 24 | 25 | 26 | def discretize_edges(edgelist: dict, 27 | time_scale: Union[int,str], 28 | store_unix: Optional[bool] = False, 29 | freq_weight: Optional[bool] = False) -> list: 30 | """ 31 | util function for discretizing edgelist, expected timestamp on edges are unixtimestamp 32 | this func supports discretization of edge timestamp 33 | 1. by providing the number of intervals (int), it will equally divide the data into that number of intervals. Note that the last bin can have less duration than others. 34 | 2. by providing a time granularity (str), it will divide the data into intervals based on the given granularity, i.e. "hourly", "daily", "weekly", "monthly", "yearly", the starting time of the dataset is consider the start of the first interval 35 | Parameters: 36 | edgelist: dict, dictionary of edges 37 | time_scale: int or str, time interval to discretize the graph 38 | store_unix: bool, whether to return the converted timestamps in unix format 39 | freq_weight: bool, whether to weight the edges based on their frequency 40 | Returns: 41 | output list: the first item in the list is always the updated edgelist (dict, dictionary of edges with discretized timestamps) and the second item is the converted timestamps in unix format (list) if store_unix is True 42 | """ 43 | unique_ts = list(edgelist.keys()) 44 | total_time = unique_ts[-1] - unique_ts[0] 45 | 46 | #! adding intermediate hour and days, to remove 47 | 48 | if time_scale is not None: 49 | if isinstance(time_scale, int): 50 | interval_size = total_time // time_scale #integer timestamp of the bin, discounting any bin that has a smaller duration than others 51 | elif isinstance(time_scale, str): 52 | if time_scale == "minutely": 53 | interval_size = SEC_IN_MIN 54 | elif time_scale == "hourly": 55 | interval_size = SEC_IN_HOUR 56 | elif time_scale == "2hourly": 57 | interval_size = 2*SEC_IN_HOUR 58 | elif time_scale == "4hourly": 59 | interval_size = 4*SEC_IN_HOUR 60 | elif time_scale == "6hourly": 61 | interval_size = 6*SEC_IN_HOUR 62 | elif time_scale == "12hourly": 63 | interval_size = 12*SEC_IN_HOUR 64 | elif time_scale == "daily": 65 | interval_size = SEC_IN_DAY 66 | elif time_scale == "2daily": 67 | interval_size = 2*SEC_IN_DAY 68 | elif time_scale == "4daily": 69 | interval_size = 4*SEC_IN_DAY 70 | elif time_scale == "weekly": 71 | interval_size = SEC_IN_WEEK 72 | elif time_scale == "monthly": 73 | interval_size = SEC_IN_MONTH 74 | elif time_scale == "yearly": 75 | interval_size = SEC_IN_YEAR 76 | elif time_scale == "biyearly": 77 | interval_size = SEC_IN_BIYEARLY 78 | else: 79 | raise TypeError("Invalid time interval") 80 | else: 81 | raise TypeError("Please provide a time interval") 82 | 83 | num_time_scale = ceiling_division(total_time, interval_size) 84 | print(f'Discretizing data to {num_time_scale} timestamps...') 85 | 86 | updated_edgelist = {} 87 | 88 | if (store_unix): 89 | unix_dict = [] 90 | start_time = int(unique_ts[0]) 91 | 92 | for ts, edges_list in edgelist.items(): 93 | #? no longer assume ts start with 0 94 | bin_ts = ceiling_division(ts-start_time, interval_size) #will correctly put edges into the last bin 95 | 96 | for edge in edges_list: 97 | if bin_ts not in updated_edgelist: 98 | updated_edgelist[bin_ts] = {edge: 1} 99 | else: 100 | if (not freq_weight): 101 | updated_edgelist[bin_ts][edge] = 1 102 | else: 103 | if (edge in updated_edgelist[bin_ts]): 104 | updated_edgelist[bin_ts][edge] += 1 105 | else: 106 | updated_edgelist[bin_ts][edge] = 1 107 | 108 | if (store_unix): 109 | #! should use bin_ts here 110 | unix_ts = start_time + bin_ts * interval_size 111 | 112 | # unix_ts = start_time + int(ts // interval_size) * interval_size #round to the nearest start time 113 | unix_ts = int(unix_ts) 114 | unix_dict.extend([unix_ts] * len(edges_list)) 115 | 116 | output = [updated_edgelist] 117 | if (store_unix): 118 | output.append(unix_dict) 119 | return output 120 | 121 | def subsampling(graph: object, 122 | node_list: Optional[list] = [], 123 | selection_strategy: str = "random", 124 | N: Optional[int] = 100 125 | ) -> dict: 126 | """ 127 | Subsampling a part of graph by only monitoring the contacts from specific nodes' list 128 | 129 | Parameters: 130 | graph: graph object 131 | node_list: list, a set of nodes to extract their contacts from the graph 132 | selection_strategy: str, currently supports random sampling 133 | N: int, number of nodes to be randomly sampled from graph 134 | 135 | Returns: 136 | new_edgelist: dict, a dictionary of edges corresponding to nodes in the node_list 137 | """ 138 | print("Generate graph subsample...") 139 | edgelist = graph.data 140 | nodes = graph.nodes_list() 141 | 142 | if (len(node_list) == 0): #decide on selection strategy if nodelist not provided 143 | if (selection_strategy == "random"): 144 | node_list = list(np.random.choice(nodes, size = N, replace = False)) 145 | else: 146 | raise ValueError("Selection strategy not supported", selection_strategy) 147 | 148 | new_edgelist = {} 149 | for t, edge_data in edgelist.items(): 150 | for (u,v), f in edge_data.items(): 151 | if u in node_list or v in node_list: 152 | if t not in new_edgelist: 153 | new_edgelist[t] = {} 154 | new_edgelist[t][(u, v)] = f 155 | else: 156 | new_edgelist[t][(u, v)] = f 157 | return new_edgelist 158 | 159 | def frequency_count(edgelist: dict): 160 | new_edgelist = {} 161 | 162 | for t, edges_list in edgelist.items(): 163 | for edge in edges_list: 164 | (u, v) = edge 165 | 166 | # Check if this is the first edge occurning in this timestamp 167 | if t not in new_edgelist: 168 | new_edgelist[t] = {} 169 | new_edgelist[t][(u, v)] = 1 170 | 171 | else: 172 | if (u, v) not in new_edgelist[t]: 173 | new_edgelist[t][(u, v)] = 1 # If the edge was not occured in this timestamp before 174 | else: 175 | new_edgelist[t][(u, v)] += 1 176 | 177 | return new_edgelist 178 | 179 | def node_list(dict_edgelist: dict) -> list: 180 | 181 | """ 182 | create a list of nodes from edgelist dictionary 183 | """ 184 | node_list = {} 185 | for _, edge_data in dict_edgelist.items(): 186 | for (u,v), _ in edge_data.items(): 187 | if u not in node_list: 188 | node_list[u] = 1 189 | if v not in node_list: 190 | node_list[v] = 1 191 | return list(node_list.keys()) 192 | 193 | 194 | def train_test_split(data : dict, 195 | val : bool = False, 196 | ratio : list = [85, 15]) -> dict: 197 | """ 198 | Generate train/test split for the data 199 | 200 | Parameters: 201 | data:dictionary of data 202 | val: whether we want to have a validation split as well 203 | ratio: list indication the ratio of the data in split. Sum of the list components should be 100. 204 | 205 | Returns: 206 | two (train/test) or three (train/val/test) data dictionaries 207 | """ 208 | sum = 0 209 | for i in ratio: 210 | sum += i 211 | if sum != 100: 212 | raise ValueError("invalid train/test split ratio. Sum of the ratios should be 100.") 213 | 214 | if val and len(ratio) != 3: 215 | raise Exception("Provide train/val/test ratio") 216 | elif not val and len(ratio) == 3: 217 | print("Warning! Data is being splitted to train and test only!") 218 | 219 | data_len = len(data) 220 | train_split = int(data_len * ratio[0] / 100) 221 | train_data = {k: v for k, v in data.items() if k < train_split} 222 | if val: 223 | val_split = int(data_len * ratio[1] / 100) + train_split 224 | val_data = {k: v for k, v in data.items() if train_split <= k < val_split} 225 | test_data = {k: v for k, v in data.items() if val_split <= k <= data_len} 226 | return train_data, val_data, test_data 227 | 228 | else: 229 | test_data = {k: v for k, v in data.items() if train_split <= k <= data_len} 230 | return train_data, test_data 231 | 232 | 233 | def is_discretized(edgelist: Optional[dict], 234 | max_timestamps: Optional[int] = 10000) -> bool: 235 | r""" 236 | Check if an edgelist is discretized or not. 237 | """ 238 | timestamps = list(edgelist.keys()) 239 | discretized = True 240 | if len(timestamps) > max_timestamps: 241 | discretized = False 242 | 243 | return discretized 244 | 245 | def list2csv(lst: list, 246 | fname: str, 247 | delimiter: str = ",", 248 | fmt: str = '%i'): 249 | out_list = np.array(lst) 250 | np.savetxt(fname, out_list, delimiter=delimiter, fmt=fmt) 251 | 252 | 253 | 254 | 255 | # def edgelist_discritizer(edgelist: dict, 256 | # time_scale: Union[str, int]): 257 | # """ 258 | # util function for discretizing edgelist, expected timestamp on edges are unixtimestamp 259 | # this func supports discretization in two different ways 260 | # 1. by providing the number of intervals (int), it will equally divide the data into that number of intervals. Note that the last bin can have less duration than others. 261 | # 2. by providing a time granularity (str), it will divide the data into intervals based on the given granularity, i.e. "hourly", "daily", "weekly", "monthly", "yearly" 262 | # In the second way however, the intervals will be based on utc timezone (dividing into days, hours this way) thus both first bin and last bin can have last duration than others. 263 | 264 | # Parameters: 265 | # edgelist: dict, dictionary of edges 266 | # time_scale: str or int, time interval to discretize the graph 267 | # Returns: 268 | # updated_edgelist: dict, dictionary of edges with discretized timestamps 269 | # """ 270 | 271 | # unique_ts = list(edgelist.keys()) 272 | 273 | # total_time = unique_ts[-1] - unique_ts[0] 274 | # if time_scale is not None: 275 | # if isinstance(time_scale, str): 276 | # if time_scale == "hourly": 277 | # interval_size = SEC_IN_HOUR 278 | # elif time_scale == "daily": 279 | # interval_size = SEC_IN_DAY 280 | # elif time_scale == "weekly": 281 | # interval_size = SEC_IN_WEEK 282 | # elif time_scale == "monthly": 283 | # interval_size = SEC_IN_MONTH 284 | # elif time_scale == "yearly": 285 | # interval_size = SEC_IN_YEAR 286 | # elif isinstance(time_scale, int): 287 | # interval_size = int(total_time / (time_scale)) 288 | # else: 289 | # raise TypeError("Invalid time interval") 290 | # else: 291 | # raise TypeError("Please provide a time interval") 292 | # num_time_scale = int(total_time/interval_size) 293 | # print(f'Discretizing data to {num_time_scale} timestamps...') 294 | # # if num_time_scale == 0: 295 | # # print("Warning! Only one timestamp exist in the data.") 296 | 297 | # updated_edgelist = {} 298 | # for ts, edges_list in edgelist.items(): 299 | # bin_ts = int(ts / interval_size) 300 | # if bin_ts >= num_time_scale: 301 | # bin_ts -= 1 302 | 303 | # for edge in edges_list: 304 | # if bin_ts not in updated_edgelist: 305 | # updated_edgelist[bin_ts] = [] 306 | # updated_edgelist[bin_ts].append(edge) 307 | # print("Discretization Done..!") 308 | # return updated_edgelist 309 | -------------------------------------------------------------------------------- /tgx/viz/TET.py: -------------------------------------------------------------------------------- 1 | # TET Plot 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | from tqdm import tqdm 6 | from typing import Union, Optional 7 | import matplotlib.pyplot as plt 8 | from tgx.utils.graph_utils import discretize_edges 9 | 10 | 11 | # some parameters to be used for drawing 12 | E_ABSENT = 0 13 | E_PRESENCE_GENERAL = 1 14 | E_SEEN_IN_TRAIN = 2 15 | E_IN_TEST = 3 16 | E_NOT_IN_TEST = 4 17 | 18 | TEST_RATIO = 0.15 19 | 20 | # new color controlling parameters; Date: Dec. 22, 2021 21 | E_ONLY_TRAIN = 10 22 | E_TRAIN_AND_TEST = 20 23 | E_TRANSDUCTIVE = 30 24 | E_INDUCTIVE = 40 25 | 26 | 27 | #! should be merged graph class? 28 | def TET(temp_edgelist : Union[object, dict], 29 | filepath: Optional[str] = ".", 30 | time_scale : Union[str, int] = None, 31 | network_name : str = None, 32 | add_frame : bool = True, 33 | test_split : bool = False, 34 | figsize : tuple = (9, 5), 35 | axis_title_font_size : int = 20, 36 | ticks_font_size : int = 20, 37 | show: bool = True): 38 | r""" 39 | Generate TET plots 40 | Args: 41 | temp_edgelist: a dictionary of temporal edges or a dataset object. 42 | filepath: Path to save the TEA Plot. 43 | figsize: Size of the figure to save. 44 | axis_title_font_size: The font size of xis titles. 45 | ticks_font_size: Size of the text in the figure. 46 | add_frame: Add the frame to the plot. 47 | network_name: Name of the dataset to be used in the TEA plot file. 48 | time_scale: time_scale for discretizing data if already not done. 49 | test_split: Whether show the test split on the plot. 50 | max_time_scale: Maximum number of time_scale to discretize data. 51 | show: Whether to show the plot. 52 | """ 53 | if isinstance(temp_edgelist, object): 54 | if temp_edgelist.freq_data is None: 55 | temp_edgelist.count_freq() 56 | temp_edgelist = temp_edgelist.freq_data 57 | 58 | # check number of unique timestamps: 59 | unique_ts = list(temp_edgelist.keys()) 60 | # if len(unique_ts) > max_time_scale: 61 | # inp = input(f"There are {unique_ts} timestamps in the data.\nDo you want to discretize the data to 1000 timestamps?(y/n)").lower() 62 | # if inp == "y": 63 | # temp_edgelist = edgelist_discritizer(temp_edgelist, 64 | # unique_ts, 65 | # time_scale = max_time_scale) 66 | if time_scale is not None: 67 | temp_edgelist = discretize_edges(temp_edgelist, 68 | time_scale = time_scale) 69 | 70 | edge_last_ts = generate_edge_last_timestamp(temp_edgelist) 71 | edge_idx_map = generate_edge_idx_map(temp_edgelist, edge_last_ts) 72 | idx_edge_map = {v: k for k, v in edge_idx_map.items()} # key: edge index; value: actual edge (source, destination) 73 | print("Info: Number of distinct edges (from index-edge map): {}".format(len(idx_edge_map))) 74 | 75 | unique_ts_list = list(temp_edgelist.keys()) 76 | e_presence_mat = generate_edge_presence_matrix(unique_ts_list, idx_edge_map, edge_idx_map, temp_edgelist) 77 | print("Info: edge-presence-matrix shape: {}".format(e_presence_mat.shape)) 78 | # print(np.unique(e_presence_mat, return_counts=True)) 79 | e_presence_mat, test_split_ts_value = process_presence_matrix(e_presence_mat, test_ratio_p=0.85) 80 | print("Info: edge-presence-matrix shape: {}".format(e_presence_mat.shape)) 81 | # print(np.unique(e_presence_mat, return_counts=True)) 82 | fig_param = set_fig_param(network_name, 83 | fig_name = filepath, 84 | figsize = figsize, 85 | axis_title_font_size = axis_title_font_size, 86 | ticks_font_size = ticks_font_size) 87 | 88 | plot_edge_presence_matrix(e_presence_mat, test_split_ts_value, unique_ts_list, list(idx_edge_map.keys()), 89 | fig_param, test_split = test_split, add_frames=add_frame, show=show) 90 | return 91 | 92 | 93 | def generate_edge_last_timestamp(edges_per_ts): 94 | """generates a dictionary containing the last timestamp of each edge""" 95 | edge_last_ts = {} 96 | for ts, e_list in edges_per_ts.items(): 97 | for e in e_list: 98 | if e not in edge_last_ts: 99 | edge_last_ts[e] = ts 100 | else: 101 | edge_last_ts[e] = max(ts, edge_last_ts[e]) 102 | return edge_last_ts 103 | 104 | 105 | def generate_edge_idx_map(edges_per_ts, edge_last_ts): 106 | """ 107 | generates index for edges according to two-level sorting policy: 108 | 1. the first level is based on their first appearance timestamp 109 | 2. the second level is based on their last appearance timestamp 110 | """ 111 | edge_idx_map = {} # key: actual edge (source, destination), value: edge index 112 | distinct_edge_idx = 0 113 | for ts, ts_e_list in edges_per_ts.items(): 114 | e_last_ts_this_timestamp = {} 115 | for e in ts_e_list: 116 | e_last_ts_this_timestamp[e] = edge_last_ts[e] 117 | e_last_ts_this_timestamp = dict(sorted(e_last_ts_this_timestamp.items(), key=lambda item: item[1])) 118 | for e in e_last_ts_this_timestamp: 119 | if e not in edge_idx_map: 120 | edge_idx_map[e] = distinct_edge_idx 121 | distinct_edge_idx += 1 122 | 123 | return edge_idx_map 124 | 125 | 126 | def generate_edge_presence_matrix(unique_ts_list, idx_edge_map, edge_idx_map, edges_per_ts): 127 | ''' 128 | Returns presence matrix with values 0 and 1 which indicate: 129 | value = 0 : edge is not present in this timestamp 130 | value = 1 : edge is present in this timestamp 131 | 132 | shape: (ts, total number of edges) 133 | ''' 134 | num_unique_ts = len(unique_ts_list) 135 | num_unique_edge = len(idx_edge_map) 136 | e_presence_mat = np.zeros([num_unique_ts, num_unique_edge], dtype=np.int8) 137 | unique_ts_list = np.sort(unique_ts_list) 138 | 139 | for x, ts in tqdm(enumerate(unique_ts_list)): 140 | es_ts = edges_per_ts[ts] 141 | for e in es_ts: 142 | e_presence_mat[num_unique_ts - x - 1, edge_idx_map[e]] = E_PRESENCE_GENERAL 143 | 144 | return e_presence_mat 145 | 146 | def process_presence_matrix(e_presence_matrix, test_ratio_p): 147 | """ 148 | there are 4 types of edge presence: 149 | 1. only in train 150 | 2. in train and in test 151 | 3. in test and train (which is the number 2 but in later timestamps) 152 | 4. only in test 153 | X: timestamp 154 | Y: edge index 155 | """ 156 | num_unique_ts = e_presence_matrix.shape[0] 157 | num_unique_edges = e_presence_matrix.shape[1] 158 | ts_idx_list = [i for i in range(num_unique_ts)] 159 | 160 | # generating timestamp list for train and test: 161 | test_split_ts_value = int(np.quantile(ts_idx_list, test_ratio_p)) 162 | train_ts_list = [ts for ts in ts_idx_list if ts <= test_split_ts_value] # any timestamp in train/validation split 163 | test_ts_list = [ts for ts in ts_idx_list if ts > test_split_ts_value] # test_split_ts_value is in train 164 | 165 | # first level processing: differentiate train set edges: 1) Only in train set, 2) in train & test set 166 | print("First level processing: ") 167 | print("Detecting edges present in train & test sets") 168 | for tr_ts in tqdm(train_ts_list): 169 | for eidx in range(num_unique_edges): 170 | if e_presence_matrix[num_unique_ts - tr_ts - 1, eidx] == E_PRESENCE_GENERAL: 171 | for test_ts_idx in range(test_split_ts_value + 1, num_unique_ts): 172 | if e_presence_matrix[num_unique_ts - test_ts_idx - 1, eidx] == E_PRESENCE_GENERAL: # if seen in 173 | # the test set 174 | e_presence_matrix[num_unique_ts - tr_ts - 1, eidx] = E_TRAIN_AND_TEST 175 | break 176 | 177 | # differentiate test set edges: 1) transductive (seen in train, repeating in test), 2) inductive (only in test) 178 | print("Detecting transductive edges (seen in train, repeating in test)") 179 | for ts in tqdm(test_ts_list): 180 | for eidx in range(num_unique_edges): 181 | if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL: 182 | for prev_ts_idx in range(test_split_ts_value, -1, -1): 183 | if e_presence_matrix[num_unique_ts - prev_ts_idx - 1, eidx] == E_TRAIN_AND_TEST: # if seen in 184 | # the training set 185 | e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_TRANSDUCTIVE 186 | break 187 | 188 | # second level processing 189 | print("Second level processing:") 190 | print("Detecting edges 1) Only in train set, 2) only in test (inductive)") 191 | for ts in tqdm(range(num_unique_ts)): 192 | for eidx in range(num_unique_edges): 193 | if ts <= test_split_ts_value: 194 | if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL: 195 | e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_ONLY_TRAIN 196 | else: 197 | if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL: 198 | e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_INDUCTIVE 199 | 200 | return e_presence_matrix, test_split_ts_value 201 | 202 | 203 | def plot_edge_presence_matrix(e_presence_mat, 204 | test_split_ts_value, 205 | unique_ts_list, 206 | idx_edge_list, 207 | fig_param, 208 | test_split = False, 209 | add_frames=True, 210 | show=False): 211 | print("Info: plotting edge presence heatmap for {} ...".format(fig_param.fig_name)) 212 | 213 | fig, ax = plt.subplots(figsize=fig_param.figsize) 214 | plt.subplots_adjust(bottom=0.3, left=0.2) 215 | 216 | # colors = ['white', # E_ABSENCE 217 | # '#67a9cf', # E_ONLY_TRAIN 218 | # '#ef8a62', # E_TRAIN_AND_TEST 219 | # '#ef8a62', # E_TRANSDUCTIVE 220 | # '#b2182b' # E_INDUCTIVE 221 | # ] 222 | if test_split: 223 | colors = ['white', # E_ABSENCE 224 | '#018571', # E_ONLY_TRAIN 2c7bb6 225 | '#fc8d59', # E_TRAIN_AND_TEST 226 | '#fc8d59', # E_TRANSDUCTIVE 227 | '#b2182b' # E_INDUCTIVE 228 | ] 229 | else: 230 | colors = ['white', 231 | '#ca0020', 232 | '#ca0020', 233 | '#ca0020', 234 | '#ca0020',] 235 | # print(sns.color_palette(colors, as_cmap=True)) 236 | frame_color = "grey" # "#bababa" 237 | time_split_color = "black" 238 | axis_title_font_size = fig_param.axis_title_font_size 239 | x_font_size = fig_param.ticks_font_size 240 | y_font_size = fig_param.ticks_font_size 241 | 242 | ax = sns.heatmap(e_presence_mat, cmap=sns.color_palette(colors, as_cmap=True), cbar=False) 243 | 244 | # processing x-axis 245 | x_gaps = np.linspace(0, len((idx_edge_list)), num=5) 246 | x_labels = x_gaps / len(idx_edge_list) 247 | x_labels = [int(100*x) for x in x_labels] 248 | plt.xticks(x_gaps, x_labels, rotation=0, fontsize=x_font_size) 249 | 250 | # processing y-axis 251 | t_gaps = np.linspace(0, len(unique_ts_list), num=5) 252 | t_labels = [int(len(unique_ts_list) - tidx) for tidx in t_gaps] 253 | plt.yticks(t_gaps, t_labels, rotation=90, fontsize=y_font_size) 254 | 255 | # axis & title 256 | # plt.margins(x=0) 257 | plt.xlabel("Percentage of observed edges", fontsize=axis_title_font_size) 258 | plt.ylabel("Timestamp", fontsize=axis_title_font_size) 259 | 260 | # requirements for additional features 261 | x_length = e_presence_mat.shape[1] - 1 262 | y_length = e_presence_mat.shape[0] - 1 263 | test_split_idx_value = y_length - test_split_ts_value 264 | e_border_idx = 0 265 | for e_idx in range(e_presence_mat.shape[1] - 1, -1, -1): 266 | if e_presence_mat[y_length - test_split_ts_value, e_idx] != E_ABSENT: 267 | e_border_idx = e_idx 268 | break 269 | 270 | # rectangle for different parts of the dataset 271 | if add_frames and test_split: 272 | print("Info: Border edge index:", e_border_idx) 273 | print("Info: Test split timestamp value:", test_split_ts_value) 274 | rect_train = plt.Rectangle((0, y_length - test_split_ts_value + 0.085), e_border_idx, test_split_ts_value + 0.9, 275 | fill=False, linewidth=2, edgecolor=frame_color) 276 | rect_test_mayseen = plt.Rectangle((0, 0), e_border_idx, y_length - test_split_ts_value - 0.1, 277 | fill=False, linewidth=2, edgecolor=frame_color) 278 | rect_test_new = plt.Rectangle((e_border_idx, 0), x_length - e_border_idx, 279 | y_length - test_split_ts_value - 0.1, 280 | fill=False, linewidth=2, edgecolor=frame_color) 281 | ax = ax or plt.gca() 282 | ax.add_patch(rect_train) 283 | ax.add_patch(rect_test_mayseen) 284 | ax.add_patch(rect_test_new) 285 | 286 | elif add_frames: 287 | ax.add_patch(plt.Rectangle((0, 0), x_length, y_length+1, 288 | fill=False, linewidth=2, edgecolor=frame_color)) 289 | # test split horizontal line 290 | if test_split: 291 | plt.axhline(y=test_split_idx_value, color=time_split_color, linestyle="--", linewidth=2, label='x') 292 | plt.text(x=0, y=test_split_idx_value, s='x', color=time_split_color, va='center', ha='center', 293 | fontsize=y_font_size, fontweight='heavy') 294 | 295 | if fig_param.fig_name is not None: 296 | # print("Info: file name: {}".format(fig_param.fig_name)) 297 | plt.savefig(f"{fig_param.fig_name}/{fig_param.network_name}_TET.pdf") 298 | plt.show() 299 | print("Info: plotting done!") 300 | 301 | def set_fig_param(network_name, fig_name = None, 302 | figsize = (9, 5), 303 | axis_title_font_size = 20, 304 | ticks_font_size = 22, 305 | axis_tick_gap = 20, 306 | timestamp_split_cross_mark_offset = 1): 307 | 308 | # if network_name in ['US Legislative', 'Canadian Vote', 'UN Trade', 'UN Vote']: 309 | # axis_tick_gap = axis_tick_gap * 0.35 310 | 311 | # elif network_name in ['Reddit', 'Wikipedia', 'UCI', 'Social Evo.', 'Flights', 'LastFM', 'MOOC']: 312 | # axis_tick_gap = axis_tick_gap * 0.5 313 | 314 | # elif network_name in ['Enron']: 315 | # axis_tick_gap = axis_tick_gap * 0.4 316 | 317 | fig_param = Fig_Param(network_name, 318 | fig_name, 319 | figsize, 320 | axis_title_font_size, 321 | ticks_font_size, 322 | axis_tick_gap, 323 | timestamp_split_cross_mark_offset) 324 | 325 | return fig_param 326 | 327 | class Fig_Param: 328 | def __init__(self, network_name, fig_name, figsize, axis_title_font_size, ticks_font_size, axis_tick_gap, 329 | timestamp_split_cross_mark_offset): 330 | self.network_name = network_name 331 | self.fig_name = fig_name 332 | self.figsize = figsize 333 | self.axis_title_font_size = axis_title_font_size 334 | self.ticks_font_size = ticks_font_size 335 | self.axis_tick_gap = axis_tick_gap 336 | self.timestamp_split_cross_mark_offset = timestamp_split_cross_mark_offset -------------------------------------------------------------------------------- /tgx/utils/stat.py: -------------------------------------------------------------------------------- 1 | from tgx.utils.plotting_utils import plot_for_snapshots, plot_nodes_edges_per_ts, plot_density_map 2 | import networkx as nx 3 | import numpy as np 4 | from typing import List 5 | 6 | __all__ = ["degree_over_time", 7 | "nodes_over_time", 8 | "edges_over_time", 9 | "nodes_and_edges_over_time", 10 | "get_avg_e_per_ts", 11 | "get_avg_degree", 12 | "get_num_timestamps", 13 | "get_num_unique_edges", 14 | "get_reoccurrence", 15 | "get_surprise", 16 | "get_novelty", 17 | "get_avg_node_activity", 18 | "connected_components_per_ts", 19 | "size_connected_components", 20 | "get_avg_node_engagement", 21 | "degree_density"] 22 | 23 | #* helper functions 24 | def _find(x, parent): 25 | if parent[x] == x: 26 | return x 27 | parent[x] = _find(parent[x], parent) 28 | return parent[x] 29 | 30 | 31 | def _merge(x, y, parent): 32 | root_x = _find(x, parent) 33 | root_y = _find(y, parent) 34 | 35 | if root_x != root_y: 36 | parent[root_x] = root_y 37 | 38 | 39 | def degree_over_time(graph: object, 40 | network_name: str, 41 | filepath: str = "./") -> None: 42 | r''' 43 | Plot average degree per timestamp. 44 | Parameters: 45 | graph: Graph object created by tgx.Graph containing edgelist 46 | network_name: name of the graph to be used in the output file name 47 | filepath: path to save the output figure 48 | ''' 49 | ave_degree = _calculate_average_degree_per_ts(graph) 50 | 51 | if network_name is not None: 52 | filename = f"{network_name}_ave_degree_per_ts" 53 | else: 54 | filename = "ave_degree_per_ts" 55 | plot_for_snapshots(ave_degree, y_title= "Average degree", filename=filepath+filename) 56 | return 57 | 58 | 59 | 60 | def nodes_over_time(graph: object, 61 | network_name: str, 62 | filepath: str = "./") -> None: 63 | 64 | r''' 65 | Plot number of active nodes per timestamp. 66 | Parameters: 67 | graph: Graph object created by tgx.Graph containing edgelist 68 | network_name: name of the graph to be used in the output file name 69 | filepath: path to save the output figure 70 | ''' 71 | active_nodes = _calculate_node_per_ts(graph) 72 | if network_name is not None: 73 | filename = f"{network_name}_nodes_per_ts" 74 | else: 75 | filename = "nodes_per_ts" 76 | plot_for_snapshots(active_nodes, y_title="Number of nodes", filename=filepath+filename) 77 | return 78 | 79 | def edges_over_time(graph: object, 80 | network_name: str = None, 81 | filepath: str = "./") -> None: 82 | r''' 83 | Plot number of edges per timestamp. 84 | Parameters: 85 | graph: Graph object created by tgx.Graph containing edgelist 86 | network_name: name of the graph to be used in the output file name 87 | filepath: path to save the output figure 88 | ''' 89 | active_edges = _calculate_edge_per_ts(graph) 90 | if network_name is not None: 91 | filename = f"{network_name}_edges_per_ts" 92 | else: 93 | filename = "_edges_per_ts" 94 | plot_for_snapshots(active_edges, y_title="Number of edges", filename=filepath+filename) 95 | return 96 | 97 | def nodes_and_edges_over_time(graph: object, 98 | network_name: str , 99 | filepath: str = "./"): 100 | r""" 101 | Plot number of nodes per timestamp and number of edges per timestamp in one fiugre. 102 | Parameters: 103 | graph: Graph object created by tgx.Graph containing edgelist 104 | network_name: name of the graph to be used in the output file name 105 | filepath: path to save the output figure 106 | """ 107 | print("Plotting number of nodes and edges per timestamp.") 108 | edges = _calculate_edge_per_ts(graph) 109 | nodes = _calculate_node_per_ts(graph) 110 | ts = list(range(0, len(graph.data))) 111 | if network_name is not None: 112 | filename = f"{network_name}_node_and_edges_per_ts" 113 | else: 114 | filename = "node_and_edges_per_ts" 115 | return plot_nodes_edges_per_ts(edges, nodes, ts, filename=filepath+filename) 116 | 117 | 118 | 119 | def _calculate_average_degree_per_ts(graph): 120 | total_nodes = graph.total_nodes() 121 | total_ts = len(graph.data) 122 | ave_degree = [] 123 | for ts in range(total_ts): 124 | num_edges = len(graph.data[ts]) 125 | ave_degree.append(num_edges*2/ total_nodes) 126 | return ave_degree 127 | 128 | 129 | def _calculate_node_per_ts(graph): 130 | active_nodes = [] 131 | for ts in range(len(graph.data)): 132 | active_nodes.append(graph.edgelist_node_count(graph.data[ts])) 133 | return active_nodes 134 | 135 | def _calculate_edge_per_ts(graph): 136 | active_edges = [] 137 | for ts in range(len(graph.data)): 138 | active_edges.append(len(graph.data[ts])) 139 | return active_edges 140 | 141 | def get_avg_e_per_ts(graph_edgelist: dict) -> float: 142 | r""" 143 | Calculate the average number of edges per timestamp 144 | 145 | Parameters: 146 | graph: Graph object created by tgx.Graph containing edgelist 147 | """ 148 | sum_num_e_per_ts = 0 149 | unique_ts = list(graph_edgelist.keys()) 150 | for ts in unique_ts: 151 | num_e_at_this_ts = 0 152 | edge_at_this_ts = graph_edgelist[ts] 153 | for e, repeat in edge_at_this_ts.items(): 154 | num_e_at_this_ts += repeat 155 | sum_num_e_per_ts += num_e_at_this_ts 156 | avg_num_e_per_ts = (sum_num_e_per_ts * 1.0) / len(unique_ts) 157 | 158 | print(f"INFO: avg_num_e_per_ts: {avg_num_e_per_ts}") 159 | return avg_num_e_per_ts 160 | 161 | 162 | def get_avg_degree(graph: object) -> float: 163 | r""" 164 | Calculate average degree over the timestamps 165 | Parameters: 166 | graph: Graph object created by tgx.Graph containing edgelist 167 | """ 168 | graph_edgelist = graph.data 169 | degree_avg_at_ts_list = [] 170 | unique_ts = list(graph_edgelist.keys()) 171 | for ts in unique_ts: 172 | e_at_this_ts = graph_edgelist[ts] 173 | G = nx.MultiGraph() 174 | for e, repeat in e_at_this_ts.items(): 175 | G.add_edge(e[0], e[1], weight=repeat) 176 | nodes = G.nodes() 177 | degrees = [G.degree[n] for n in nodes] 178 | degree_avg_at_ts_list.append(np.mean(degrees)) 179 | 180 | print(f"INFO: avg_degree: {np.mean(degree_avg_at_ts_list)}") 181 | return np.mean(degree_avg_at_ts_list) 182 | 183 | 184 | def get_num_timestamps(graph_edgelist:dict) -> int: 185 | r""" 186 | Calculate the number of timestamps 187 | Parameters: 188 | graph: Graph object created by tgx.Graph containing edgelist 189 | """ 190 | print(f"INFO: Number of timestamps: {len(graph_edgelist)}") 191 | return len(graph_edgelist) 192 | 193 | def get_num_unique_edges(graph: object) -> int: 194 | r""" 195 | Calculate the number of unique edges 196 | Parameters: 197 | graph: Graph object created by tgx.Graph containing edgelist 198 | """ 199 | graph_edgelist = graph.data 200 | unique_edges = {} 201 | for ts, e_list in graph_edgelist.items(): 202 | for e in e_list: 203 | if e not in unique_edges: 204 | unique_edges[e] = 1 205 | print(f"INFO: Number of unique edges: {len(unique_edges)}") 206 | return len(unique_edges) 207 | 208 | 209 | def _split_data_chronological(graph_edgelist: dict, test_ratio: int): 210 | r""" 211 | split the timestamped edge-list chronologically 212 | """ 213 | # split the temporal graph data chronologically 214 | unique_ts = np.sort(list(graph_edgelist.keys())) 215 | test_split_time = list(np.quantile(unique_ts, [1 - test_ratio]))[0] 216 | 217 | # make train-validation & test splits 218 | train_val_e_set, test_e_set = {}, {} 219 | for ts, e_list in graph_edgelist.items(): 220 | for (u,v) in e_list: 221 | 222 | if ts < test_split_time: 223 | if (u,v) not in train_val_e_set: 224 | train_val_e_set[(u,v)] = 1 225 | else: 226 | if (u,v) not in test_e_set: 227 | test_e_set[(u,v)] = 1 228 | return train_val_e_set, test_e_set 229 | 230 | def find(x, parent): 231 | if parent[x] == x: 232 | return x 233 | parent[x] = find(parent[x], parent) 234 | return parent[x] 235 | 236 | 237 | def merge(x, y, parent): 238 | root_x = find(x, parent) 239 | root_y = find(y, parent) 240 | 241 | if root_x != root_y: 242 | parent[root_x] = root_y 243 | 244 | def get_reoccurrence(graph:object, test_ratio: float=0.15) -> float: 245 | r""" 246 | Calculate the recurrence index 247 | Parameters: 248 | graph: Graph object created by tgx.Graph containing edgelist 249 | test_ratio: The ratio to split the data chronologically 250 | """ 251 | graph_edgelist = graph.data 252 | train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio) 253 | train_val_size = len(train_val_e_set) 254 | # intersect = 0 255 | # total_train_freq = 0 256 | # for e, freq in train_val_e_set.items(): 257 | # if freq > 1: 258 | # print(e) 259 | # total_train_freq += freq 260 | # if e in test_e_set: 261 | # intersect += freq 262 | 263 | # print(total_train_freq, intersect) 264 | # reoccurrence = float(intersect * 1.0 / total_train_freq) 265 | intersect = 0 266 | for e in test_e_set: 267 | if e in train_val_e_set: 268 | intersect += 1 269 | reoccurrence = float(intersect * 1.0 / train_val_size) 270 | print(f"INFO: Reoccurrence: {reoccurrence}") 271 | return reoccurrence 272 | 273 | def get_surprise(graph, test_ratio: float = 0.15) -> float: 274 | r""" 275 | Calculate the surprise index 276 | Parameters: 277 | graph: Graph object created by tgx.Graph containing edgelist 278 | test_ratio: The ratio to split the data chronologically 279 | """ 280 | graph_edgelist = graph.data 281 | train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio) 282 | test_size = len(test_e_set) 283 | 284 | difference = 0 285 | # total_test_freq = 0 286 | # for e, freq in test_e_set.items(): 287 | # total_test_freq += freq 288 | # if e not in train_val_e_set: 289 | # difference += freq 290 | # surprise = float(difference * 1.0 / total_test_freq) 291 | 292 | for e in test_e_set: 293 | if e not in train_val_e_set: 294 | difference += 1 295 | surprise = float(difference * 1.0 / test_size) 296 | print(f"INFO: Surprise: {surprise}") 297 | return surprise 298 | 299 | def get_novelty(graph : object) -> float: 300 | r""" 301 | Calculate the novelty index 302 | Parameters: 303 | graph: Graph object created by tgx.Graph containing edgelist 304 | """ 305 | graph_edgelist = graph.data 306 | unique_ts = np.sort(list(graph_edgelist.keys())) 307 | novelty_ts = [] 308 | for ts_idx, ts in enumerate(unique_ts): 309 | e_set_this_ts = set(list(graph_edgelist[ts])) 310 | e_set_seen = [] 311 | for idx in range(0, ts_idx): 312 | e_set_seen.append(list(graph_edgelist[unique_ts[idx]])) 313 | e_set_seen = set(item for sublist in e_set_seen for item in sublist) 314 | novelty_ts.append(float(len(e_set_this_ts - e_set_seen) * 1.0 / len(e_set_this_ts))) 315 | 316 | novelty = float(np.sum(novelty_ts) * 1.0 / len(unique_ts)) 317 | print(f"INFO: Novelty: {novelty}") 318 | return novelty 319 | 320 | 321 | def get_avg_node_activity(graph: object) -> float: 322 | r""" 323 | Calculate the average node activity, 324 | the proportion of time steps a node is present 325 | Parameters: 326 | graph: Graph object created by tgx.Graph containing edgelist 327 | """ 328 | graph_edgelist = graph.data 329 | num_unique_ts = len(graph_edgelist) 330 | node_ts = {} 331 | for ts, e_list in graph_edgelist.items(): 332 | for e in e_list: 333 | # source 334 | if e[0] not in node_ts: 335 | node_ts[e[0]] = {ts: True} 336 | else: 337 | if ts not in node_ts[e[0]]: 338 | node_ts[e[0]][ts] = True 339 | 340 | # destination 341 | if e[1] not in node_ts: 342 | node_ts[e[1]] = {ts: True} 343 | else: 344 | if ts not in node_ts[e[1]]: 345 | node_ts[e[1]][ts] = True 346 | 347 | node_activity_ratio = [] 348 | for n, ts_list in node_ts.items(): 349 | node_activity_ratio.append(float(len(ts_list) * 1.0 / num_unique_ts)) 350 | 351 | avg_node_activity = float(np.sum(node_activity_ratio) * 1.0 / len(node_activity_ratio)) 352 | print(f"INFO: Node activity ratio: {avg_node_activity}") 353 | return avg_node_activity 354 | 355 | 356 | def get_avg_node_engagement(graph: object): 357 | r""" 358 | get the average node engagement over time. 359 | node engagement represents the average number of distinct nodes that establish 360 | at least one new connection during each time step. 361 | """ 362 | graph_edgelist = graph.data 363 | engaging_nodes = [] 364 | previous_edges = set() 365 | for ts, e_list in graph_edgelist.items(): 366 | node_set = set() 367 | new_edges = {(u, v) for (u, v) in e_list if frozenset({u, v}) not in previous_edges} 368 | for u, v in new_edges: 369 | if u not in node_set: 370 | node_set.add(u) 371 | if v not in node_set: 372 | node_set.add(v) 373 | # engaging_nodes.append((ts, len(node_set))) 374 | engaging_nodes.append(len(node_set)) 375 | previous_edges = {frozenset({u, v}) for (u, v) in e_list} # Update the set of previous edges for the next timestamp 376 | return engaging_nodes 377 | 378 | def degree_density(graph: tuple, 379 | k: int = 10, 380 | network_name: str = None, 381 | plot_path: str = "./") -> None: 382 | r""" 383 | Plot density map of node degrees per time window 384 | Parameters: 385 | graph_edgelist: Dictionary containing graph data 386 | k: number of time windows 387 | network_name: name of the graph to be used in the output file name 388 | plot_path: path to save the output figure 389 | """ 390 | graph_edgelist = graph.data 391 | degrees_by_k_list = [] 392 | temp = [] 393 | temp_idx = 0 394 | unique_ts = list(graph_edgelist.keys()) 395 | 396 | for ts in unique_ts: 397 | e_at_this_ts = graph_edgelist[ts] 398 | G = nx.MultiGraph() 399 | 400 | for e in e_at_this_ts: 401 | G.add_edge(e[0], e[1]) 402 | 403 | nodes = G.nodes() 404 | degrees = [G.degree[n] for n in nodes] 405 | 406 | if temp_idx None: 429 | r""" 430 | Plot number of connected components per timestamp 431 | Parameters: 432 | graph: a list containing graph snapshots 433 | network_name: name of the graph to be used in the output file name 434 | plot_path: path to save the output figure 435 | """ 436 | num_components = [] 437 | for t in range(len(graph.data)): 438 | edgelist_t = graph.data[t] 439 | nodes_t = graph.edgelist_node_list(edgelist_t) 440 | parent = {node: node for node in nodes_t} 441 | 442 | for edge in edgelist_t: 443 | (u, v) = edge 444 | _merge(u, v, parent) 445 | 446 | num = 0 447 | for u in nodes_t: 448 | if parent[u] == u: 449 | num += 1 450 | num_components.append(num) 451 | 452 | if network_name is not None: 453 | filename = f"{network_name}_connected_components_per_ts" 454 | else: 455 | filename = "_connected_components_per_ts" 456 | 457 | plot_for_snapshots(num_components, y_title="Number of connected components", filename=plot_path+filename) 458 | return 459 | 460 | 461 | # TODO turn this into a plotting function as well, can return the computed stats 462 | def size_connected_components(graph: tuple) -> List[List]: 463 | r""" 464 | Calculate the sizes of connected components per timestamp 465 | Returns: 466 | list[list]: A list containing lists of sizes of connected components for each timestamp. 467 | """ 468 | component_sizes = [] 469 | for t in range(len(graph.data)): 470 | edgelist_t = graph.data[t] 471 | nodes_t = graph.edgelist_node_list(edgelist_t) 472 | parent = {node: node for node in nodes_t} 473 | 474 | for edge in edgelist_t: 475 | (u, v) = edge 476 | _merge(u, v, parent) 477 | 478 | component_sizes_t = {} 479 | for u in nodes_t: 480 | root = _find(u, parent) 481 | if root not in component_sizes_t: 482 | component_sizes_t[root] = 0 483 | component_sizes_t[root] += 1 484 | 485 | component_sizes_t_list = list(component_sizes_t.values()) 486 | component_sizes.append(component_sizes_t_list) 487 | 488 | return component_sizes 489 | 490 | # TODO turn this into a plotting function as well, can return the computed stats 491 | def get_avg_node_engagement(graph: tuple) -> List[int]: 492 | r""" 493 | Calculate the average node engagement per timestamp, 494 | the average number of distinct nodes that establish 495 | at least one new connection. 496 | Parameters: 497 | graph_edgelist: Dictionary containing graph data 498 | """ 499 | engaging_nodes = [] 500 | previous_edges = set() 501 | 502 | for ts in range(len(graph.data)): 503 | edgelist_t = graph.data[ts] 504 | new_nodes = set() 505 | 506 | for edge in edgelist_t: 507 | (u, v) = edge 508 | if frozenset({u, v}) not in previous_edges: 509 | if u not in new_nodes: 510 | new_nodes.add(u) 511 | if v not in new_nodes: 512 | new_nodes.add(v) 513 | 514 | engaging_nodes.append(len(new_nodes)) 515 | previous_edges = {frozenset({u, v}) for (u, v) in edgelist_t} # Update the set of previous edges for next timestamp 516 | 517 | return engaging_nodes --------------------------------------------------------------------------------