├── tgx
    ├── io
    │   ├── __init__.py
    │   ├── write.py
    │   └── read.py
    ├── classes
    │   ├── __init__.py
    │   ├── .ipynb_checkpoints
    │   │   └── graph-checkpoint.py
    │   └── graph.py
    ├── data
    │   ├── __init__.py
    │   ├── tgb.py
    │   └── builtin.py
    ├── viz
    │   ├── __init__.py
    │   ├── TEA.py
    │   └── TET.py
    ├── utils
    │   ├── __init__.py
    │   ├── .ipynb_checkpoints
    │   │   ├── edgelist-checkpoint.py
    │   │   ├── graph_stat-checkpoint.py
    │   │   └── plotting_utils-checkpoint.py
    │   ├── plotting_utils.py
    │   ├── graph_utils.py
    │   └── stat.py
    └── __init__.py
├── py_tgx.egg-info
    ├── top_level.txt
    ├── dependency_links.txt
    ├── SOURCES.txt
    └── PKG-INFO
├── docs
    ├── io
    │   └── io.md
    ├── data
    │   ├── tgb.md
    │   └── builtin.md
    ├── viz
    │   ├── vis_tet.md
    │   └── vis_tea.md
    ├── classes
    │   └── graph.md
    ├── utils
    │   ├── graph_stats.md
    │   ├── graph_utils.md
    │   └── plotting_utils.md
    ├── 2023_TGX_logo.png
    ├── gallery
    │   ├── TEA
    │   │   ├── Enron.png
    │   │   ├── MOOC.png
    │   │   ├── UCI.png
    │   │   ├── CanParl.png
    │   │   ├── Flights.png
    │   │   ├── LastFM.png
    │   │   ├── Reddit.png
    │   │   ├── UNVote.png
    │   │   ├── USLegis.png
    │   │   ├── Contacts.png
    │   │   ├── SocialEvo.png
    │   │   ├── tgbl-coin.png
    │   │   ├── tgbl-wiki.png
    │   │   ├── tgbn-genre.png
    │   │   ├── tgbn-trade.png
    │   │   ├── tgbl-comment.png
    │   │   ├── tgbl-flight.png
    │   │   ├── tgbl-review.png
    │   │   └── tgbn-reddit.png
    │   ├── TET
    │   │   ├── Enron.png
    │   │   ├── MOOC.png
    │   │   ├── UCI.png
    │   │   ├── CanParl.png
    │   │   ├── Flights.png
    │   │   ├── LastFM.png
    │   │   ├── Reddit.png
    │   │   ├── UNVote.png
    │   │   ├── USLegis.png
    │   │   ├── Contacts.png
    │   │   ├── SocialEvo.png
    │   │   ├── tgbl-coin.png
    │   │   ├── tgbl-wiki.png
    │   │   ├── tgbn-genre.png
    │   │   ├── tgbn-trade.png
    │   │   ├── tgbl-comment.png
    │   │   ├── tgbl-flight.png
    │   │   ├── tgbl-review.png
    │   │   └── tgbn-reddit.png
    │   ├── degree
    │   │   ├── mooc_ave_degree_per_ts.png
    │   │   ├── uci_ave_degree_per_ts.png
    │   │   ├── UNvote_ave_degree_per_ts.png
    │   │   ├── enron_ave_degree_per_ts.png
    │   │   ├── lastfm_ave_degree_per_ts.png
    │   │   ├── reddit_ave_degree_per_ts.png
    │   │   ├── CanParl_ave_degree_per_ts.png
    │   │   ├── Contacts_ave_degree_per_ts.png
    │   │   ├── Flights_ave_degree_per_ts.png
    │   │   ├── SocialEvo_ave_degree_per_ts.png
    │   │   ├── USLegis_ave_degree_per_ts.png
    │   │   ├── tgbl-coin_ave_degree_per_ts.png
    │   │   ├── tgbl-wiki_ave_degree_per_ts.png
    │   │   ├── tgbl-flight_ave_degree_per_ts.png
    │   │   ├── tgbl-review_ave_degree_per_ts.png
    │   │   ├── tgbn-genre_ave_degree_per_ts.png
    │   │   ├── tgbn-reddit_ave_degree_per_ts.png
    │   │   ├── tgbn-trade_ave_degree_per_ts.png
    │   │   └── tgbl-comment_ave_degree_per_ts.png
    │   ├── node_edge
    │   │   ├── reddit_nodes_per_ts.png
    │   │   ├── mooc_node&edge_per_ts.png
    │   │   ├── uci_node&edge_per_ts.png
    │   │   ├── CanParl_node&edge_per_ts.png
    │   │   ├── Flights_node&edge_per_ts.png
    │   │   ├── UNvote_node&edge_per_ts.png
    │   │   ├── USLegis_node&edge_per_ts.png
    │   │   ├── enron_node&edge_per_ts.png
    │   │   ├── lastfm_node&edge_per_ts.png
    │   │   ├── reddit_node&edge_per_ts.png
    │   │   ├── Contacts_node&edge_per_ts.png
    │   │   ├── SocialEvo_node&edge_per_ts.png
    │   │   ├── tgbl-coin_node&edge_per_ts.png
    │   │   ├── tgbl-wiki_node&edge_per_ts.png
    │   │   ├── tgbl-comment_node&edge_per_ts.png
    │   │   ├── tgbl-flight_node&edge_per_ts.png
    │   │   ├── tgbl-review_node&edge_per_ts.png
    │   │   ├── tgbn-genre_node&edge_per_ts.png
    │   │   ├── tgbn-reddit_node&edge_per_ts.png
    │   │   └── tgbn-trade_node&edge_per_ts.png
    │   ├── uci.md
    │   ├── mooc.md
    │   ├── enron.md
    │   ├── lastfm.md
    │   ├── unvote.md
    │   ├── flight.md
    │   ├── uslegis.md
    │   ├── contact.md
    │   ├── tgbl-coin.md
    │   ├── socialevo.md
    │   ├── tgbn-genre.md
    │   ├── tgbn-trade.md
    │   ├── tgbl-flight.md
    │   ├── tgbl-wiki.md
    │   ├── tgbn-reddit.md
    │   ├── reddit.md
    │   ├── tgbl-comment.md
    │   ├── tgbl-review.md
    │   ├── canparl.md
    │   ├── 0-tet-tgb.md
    │   ├── 0-tea-tgb.md
    │   ├── 0-tet-builtin.md
    │   ├── 0-degree-tgb.md
    │   ├── 0-tea-builtin.md
    │   ├── 0-node_edge-tgb.md
    │   ├── 0-degree-builtin.md
    │   ├── 0-node_edge-builtin.md
    │   └── dataset.md
    ├── tutorials
    │   ├── toy_data.csv
    │   └── data_loader.ipynb
    ├── index.md
    └── contribute.md
├── imgs
    └── 2023_TGX_logo.png
├── pyproject.toml
├── requirements.txt
├── setup.py
├── setup.cfg
├── .github
    └── workflows
    │   └── ci.yml
├── full_requirements.txt
├── LICENSE
├── README.md
├── examples
    ├── data_viz.py
    ├── starting_example.py
    └── .ipynb_checkpoints
    │   └── test-checkpoint.py
├── .gitignore
└── mkdocs.yml


/tgx/io/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgx/classes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgx/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/py_tgx.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | tgx
2 | 


--------------------------------------------------------------------------------
/py_tgx.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docs/io/io.md:
--------------------------------------------------------------------------------
1 | # IO
2 | 
3 | ::: tgx.io.read
4 | 


--------------------------------------------------------------------------------
/tgx/viz/__init__.py:
--------------------------------------------------------------------------------
1 | # from tgx.viz import TEA


--------------------------------------------------------------------------------
/docs/data/tgb.md:
--------------------------------------------------------------------------------
1 | ## TGB Datasets
2 | ::: tgx.data.tgb


--------------------------------------------------------------------------------
/docs/viz/vis_tet.md:
--------------------------------------------------------------------------------
1 | ## TET Plots
2 | 
3 | ::: tgx.viz.TET


--------------------------------------------------------------------------------
/docs/viz/vis_tea.md:
--------------------------------------------------------------------------------
1 | ## TEA Plots
2 | 
3 | ::: tgx.viz.TEA
4 | 


--------------------------------------------------------------------------------
/docs/classes/graph.md:
--------------------------------------------------------------------------------
1 | ## Graph
2 | ::: tgx.classes.graph
3 | 
4 | 


--------------------------------------------------------------------------------
/docs/data/builtin.md:
--------------------------------------------------------------------------------
1 | ## Builtin Datasets
2 | ::: tgx.data.builtin


--------------------------------------------------------------------------------
/docs/utils/graph_stats.md:
--------------------------------------------------------------------------------
1 | ## Graph Stats
2 | ::: tgx.utils.stat
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/utils/graph_utils.md:
--------------------------------------------------------------------------------
1 | ## Graph Utils
2 | 
3 | ::: tgx.utils.graph_utils


--------------------------------------------------------------------------------
/docs/utils/plotting_utils.md:
--------------------------------------------------------------------------------
1 | ## Plotting Utils
2 | 
3 | ::: tgx.utils.plotting_utils


--------------------------------------------------------------------------------
/docs/2023_TGX_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/2023_TGX_logo.png


--------------------------------------------------------------------------------
/imgs/2023_TGX_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/imgs/2023_TGX_logo.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/Enron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Enron.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/MOOC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/MOOC.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/UCI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/UCI.png


--------------------------------------------------------------------------------
/docs/gallery/TET/Enron.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Enron.png


--------------------------------------------------------------------------------
/docs/gallery/TET/MOOC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/MOOC.png


--------------------------------------------------------------------------------
/docs/gallery/TET/UCI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/UCI.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/CanParl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/CanParl.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/Flights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Flights.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/LastFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/LastFM.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/Reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Reddit.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/UNVote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/UNVote.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/USLegis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/USLegis.png


--------------------------------------------------------------------------------
/docs/gallery/TET/CanParl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/CanParl.png


--------------------------------------------------------------------------------
/docs/gallery/TET/Flights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Flights.png


--------------------------------------------------------------------------------
/docs/gallery/TET/LastFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/LastFM.png


--------------------------------------------------------------------------------
/docs/gallery/TET/Reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Reddit.png


--------------------------------------------------------------------------------
/docs/gallery/TET/UNVote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/UNVote.png


--------------------------------------------------------------------------------
/docs/gallery/TET/USLegis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/USLegis.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/Contacts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/Contacts.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/SocialEvo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/SocialEvo.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbl-coin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-coin.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbl-wiki.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-wiki.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbn-genre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-genre.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbn-trade.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-trade.png


--------------------------------------------------------------------------------
/docs/gallery/TET/Contacts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/Contacts.png


--------------------------------------------------------------------------------
/docs/gallery/TET/SocialEvo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/SocialEvo.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbl-coin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-coin.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbl-wiki.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-wiki.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbn-genre.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-genre.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbn-trade.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-trade.png


--------------------------------------------------------------------------------
/docs/tutorials/toy_data.csv:
--------------------------------------------------------------------------------
1 | time, source, destination
2 | 0,1,2
3 | 0,2,1
4 | 0,3,1
5 | 1,2,2
6 | 1,1,2
7 | 1,3,1


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbl-comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-comment.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbl-flight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-flight.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbl-review.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbl-review.png


--------------------------------------------------------------------------------
/docs/gallery/TEA/tgbn-reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TEA/tgbn-reddit.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbl-comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-comment.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbl-flight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-flight.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbl-review.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbl-review.png


--------------------------------------------------------------------------------
/docs/gallery/TET/tgbn-reddit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/TET/tgbn-reddit.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/docs/gallery/degree/mooc_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/mooc_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/uci_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/uci_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/reddit_nodes_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/reddit_nodes_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/UNvote_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/UNvote_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/enron_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/enron_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/lastfm_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/lastfm_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/reddit_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/reddit_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/mooc_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/mooc_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/uci_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/uci_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/CanParl_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/CanParl_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/Contacts_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/Contacts_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/Flights_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/Flights_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/SocialEvo_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/SocialEvo_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/USLegis_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/USLegis_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbl-coin_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-coin_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbl-wiki_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-wiki_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/CanParl_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/CanParl_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/Flights_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/Flights_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/UNvote_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/UNvote_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/USLegis_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/USLegis_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/enron_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/enron_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/lastfm_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/lastfm_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/reddit_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/reddit_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbl-flight_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-flight_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbl-review_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-review_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbn-genre_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-genre_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbn-reddit_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-reddit_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbn-trade_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbn-trade_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/Contacts_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/Contacts_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/SocialEvo_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/SocialEvo_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbl-coin_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-coin_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbl-wiki_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-wiki_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/degree/tgbl-comment_ave_degree_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/degree/tgbl-comment_ave_degree_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbl-comment_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-comment_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbl-flight_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-flight_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbl-review_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbl-review_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbn-genre_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-genre_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbn-reddit_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-reddit_node&edge_per_ts.png


--------------------------------------------------------------------------------
/docs/gallery/node_edge/tgbn-trade_node&edge_per_ts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ComplexData-MILA/TGX/HEAD/docs/gallery/node_edge/tgbn-trade_node&edge_per_ts.png


--------------------------------------------------------------------------------
/tgx/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from tgx.utils.graph_stat import *
2 | # from tgx.utils.graph_utils import *
3 | # from tgx.utils.plotting_utils import *
4 | 
5 | # from . import *


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | setuptools>=69.1.0
 2 | wheel>=0.42.0
 3 | networkx>=3.2.1
 4 | args>=0.1.0
 5 | requests>=2.28.2
 6 | matplotlib>=3.8.0
 7 | pandas>=1.5.3
 8 | numpy>=1.26.0
 9 | seaborn>=0.13.0
10 | tqdm>=4.66.1
11 | scikit-learn>=1.3.1
12 | py-tgb>=0.9.2
13 | 


--------------------------------------------------------------------------------
/docs/gallery/uci.md:
--------------------------------------------------------------------------------
 1 | ## UCI
 2 | #### TEA Plot
 3 | ![UCI](TEA/UCI.png){ width="400"}
 4 | #### TET Plot
 5 | ![UCI](TET/UCI.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 |   ![UCI](degree/uci_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 |   ![UCI](node_edge/uci_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/mooc.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## MOOC
 3 | #### TEA Plot
 4 | ![image](TEA/MOOC.png){ width="400"}
 5 | #### TET Plot
 6 | ![image](TET/MOOC.png){ width="400"}
 7 | 
 8 | #### Average degree over time
 9 | ![image](degree/mooc_ave_degree_per_ts.png){ width="400"}
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/mooc_node&edge_per_ts.png){ width="400"}


--------------------------------------------------------------------------------
/docs/gallery/enron.md:
--------------------------------------------------------------------------------
 1 | ## Enron
 2 | #### TEA Plot
 3 | ![image](TEA/Enron.png){ width="400"}
 4 | ### TET Plot
 5 | ![image](TET/Enron.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/enron_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/enron_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/lastfm.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## LastFM
 3 | #### TEA Plot
 4 | ![image](TEA/LastFM.png){ width="400"}
 5 | #### TET Plot
 6 | ![image](TEA/LastFM.png){ width="400"}
 7 | #### Average degree over time
 8 | ![image](degree/lastfm_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/lastfm_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/unvote.md:
--------------------------------------------------------------------------------
 1 | ## UN Vote
 2 | #### TEA Plot
 3 | ![image](TEA/UNVote.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/UNVote.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/UNvote_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/UNvote_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/flight.md:
--------------------------------------------------------------------------------
 1 | ## Flights
 2 | #### TEA Plot
 3 | ![image](TEA/Flights.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/Flights.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/Flights_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | 
12 | #### Node and Edge over time
13 | ![image](node_edge/Flights_node&edge_per_ts.png){ width="400"}
14 | 


--------------------------------------------------------------------------------
/docs/gallery/uslegis.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## US Legis
 4 | #### TEA Plot
 5 | ![image](TEA/USLegis.png){ width="400"}
 6 | #### TET Plot
 7 | ![image](TET/USLegis.png){ width="400"}
 8 | #### Average degree over time
 9 | ![image](degree/USLegis_ave_degree_per_ts.png){ width="400"}
10 | 
11 | 
12 | #### Node and Edge over time
13 | ![image](node_edge/USLegis_node&edge_per_ts.png){ width="400"}
14 | 


--------------------------------------------------------------------------------
/docs/gallery/contact.md:
--------------------------------------------------------------------------------
 1 | ## Contacts
 2 | #### TEA Plot
 3 | ![image](TEA/Contacts.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/Contacts.png){ width="400"}
 6 | 
 7 | 
 8 | #### Average degree over time
 9 | ![image](degree/Contacts_ave_degree_per_ts.png){ width="400"}
10 | 
11 | 
12 | #### Node and Edge over time
13 | ![image](node_edge/Contacts_node&edge_per_ts.png){ width="400"}
14 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbl-coin.md:
--------------------------------------------------------------------------------
 1 | ## tgbl-coin
 2 | #### TEA Plot
 3 | ![image](TEA/tgbl-coin.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbl-coin.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbl-coin_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbl-coin_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/tgx/__init__.py:
--------------------------------------------------------------------------------
 1 | from tgx.classes.graph import Graph
 2 | 
 3 | from tgx.data.builtin import builtin
 4 | from tgx.data.tgb import tgb_data
 5 | 
 6 | from tgx.io.read import read_csv
 7 | from tgx.io.write import write_csv
 8 | 
 9 | from tgx.viz.TEA import TEA
10 | from tgx.viz.TET import TET
11 | 
12 | from tgx.utils.stat import *
13 | from tgx.utils.graph_utils import *
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/gallery/socialevo.md:
--------------------------------------------------------------------------------
 1 | ## Social Evo
 2 | #### TEA Plot
 3 | ![image](TEA/SocialEvo.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/SocialEvo.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 |   ![image](degree/SocialEvo_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 |   ![image](node_edge/SocialEvo_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbn-genre.md:
--------------------------------------------------------------------------------
 1 | ## tgbn-genre
 2 | #### TEA Plot
 3 | ![image](TEA/tgbn-genre.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbn-genre.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbn-genre_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbn-genre_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbn-trade.md:
--------------------------------------------------------------------------------
 1 | ## tgbn-trade
 2 | #### TEA Plot
 3 | ![image](TEA/tgbn-trade.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbn-trade.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbn-trade_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbn-trade_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbl-flight.md:
--------------------------------------------------------------------------------
 1 | ## tgbl-flight
 2 | #### TEA Plot
 3 | ![image](TEA/tgbl-flight.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbl-flight.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbl-flight_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbl-flight_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbl-wiki.md:
--------------------------------------------------------------------------------
 1 | ## tgbl-wiki
 2 | #### TEA Plot
 3 | ![image](TEA/tgbl-wiki.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbl-wiki.png){ width="400"}
 6 | 
 7 | 
 8 | 
 9 | #### Average degree over time
10 | ![image](degree/tgbl-wiki_ave_degree_per_ts.png){ width="400"}
11 | 
12 | 
13 | #### Node and Edge over time
14 | ![image](node_edge/tgbl-wiki_node&edge_per_ts.png){ width="400"}
15 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbn-reddit.md:
--------------------------------------------------------------------------------
 1 | ## tgbn-reddit
 2 | #### TEA Plot
 3 | ![image](TEA/tgbn-reddit.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbn-reddit.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbn-reddit_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbn-reddit_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/reddit.md:
--------------------------------------------------------------------------------
 1 | ## Reddit
 2 | #### TEA Plot
 3 | ![image](TEA/Reddit.png){ width="400"}
 4 | <!-- <figcaption>TEA</figcaption> -->
 5 | 
 6 | #### TET Plot
 7 | ![image](TET/Reddit.png){ width="400"}
 8 | 
 9 | #### Average degree over time
10 | ![image](degree/reddit_ave_degree_per_ts.png){ width="400"}
11 | 
12 | #### Node and Edge over time
13 | ![image](node_edge/reddit_node&edge_per_ts.png){ width="400"}


--------------------------------------------------------------------------------
/docs/gallery/tgbl-comment.md:
--------------------------------------------------------------------------------
 1 | ## tgbl-comment
 2 | #### TEA Plot
 3 | ![image](TEA/tgbl-comment.png){ width="400"}
 4 | #### TET Plot
 5 | ![image](TET/tgbl-comment.png){ width="400"}
 6 | 
 7 | #### Average degree over time
 8 | ![image](degree/tgbl-comment_ave_degree_per_ts.png){ width="400"}
 9 | 
10 | 
11 | #### Node and Edge over time
12 | ![image](node_edge/tgbl-comment_node&edge_per_ts.png){ width="400"}
13 | 


--------------------------------------------------------------------------------
/docs/gallery/tgbl-review.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## tgbl-review
 3 | #### TEA Plot
 4 | ![image](TEA/tgbl-review.png){ width="400"}
 5 | #### TET Plot
 6 | ![image](TET/tgbl-review.png){ width="400"}
 7 | 
 8 | #### Average degree over time
 9 | ![image](degree/tgbl-review_ave_degree_per_ts.png){ width="400"}
10 | 
11 | 
12 | #### Node and Edge over time
13 | ![image](node_edge/tgbl-review_node&edge_per_ts.png){ width="400"}
14 | 


--------------------------------------------------------------------------------
/docs/gallery/canparl.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Can. Parl.
 3 | #### TEA Plot
 4 | ![Canadian Parliment](TEA/CanParl.png){ width="400"}
 5 | #### TET Plot
 6 | ![Canadian Parliment](TET/CanParl.png){ width="400"}
 7 | #### Average degree over time
 8 | ![Canadian Parliment](degree/CanParl_ave_degree_per_ts.png){ width="400"}
 9 | #### Node and Edge over time
10 | ![Canadian Parliment](node_edge/CanParl_node&edge_per_ts.png){ width="400"}


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | def readme():
 5 |     with open("README.md") as f:
 6 |         return f.read()
 7 | 
 8 | 
 9 | setup(
10 |     name="py-tgx",
11 |     version="0.4.0",
12 |     description="Temporal Graph Visualization with TGX",
13 |     url="https://github.com/ComplexData-MILA/TGX",
14 |     keywords="Temporal Graph Visualization",
15 |     license="MIT",
16 |     packages=find_packages(),
17 | )


--------------------------------------------------------------------------------
/docs/gallery/0-tet-tgb.md:
--------------------------------------------------------------------------------
 1 | ## TET Plots
 2 | 
 3 | 
 4 | 
 5 | ### tgbl-wiki
 6 | ![image](TET/tgbl-wiki.png){ width="400"}
 7 | 
 8 | ### tgbl-review
 9 | ![image](TET/tgbl-review.png){ width="400"}
10 | 
11 | ### tgbl-coin
12 | ![image](TET/tgbl-coin.png){ width="400"}
13 | 
14 | ### tgbl-comment
15 | ![image](TET/tgbl-comment.png){ width="400"}
16 | 
17 | ### tgbl-flight
18 | ![image](TET/tgbl-flight.png){ width="400"}
19 | 
20 | ### tgbn-trade
21 | ![image](TET/tgbn-trade.png){ width="400"}
22 | 
23 | ### tgbn-genre
24 | ![image](TET/tgbn-genre.png){ width="400"}
25 | 
26 | ### tgbn-reddit
27 | ![image](TET/tgbn-reddit.png){ width="400"}
28 | 


--------------------------------------------------------------------------------
/docs/gallery/0-tea-tgb.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | 
 4 | 
 5 | ### tgbl-wiki
 6 | ![image](TEA/tgbl-wiki.png){ width="400"}
 7 | 
 8 | ### tgbl-review
 9 | ![image](TEA/tgbl-review.png){ width="400"}
10 | 
11 | ### tgbl-coin
12 | ![image](TEA/tgbl-coin.png){ width="400"}
13 | 
14 | 
15 | ### tgbl-comment
16 | ![image](TEA/tgbl-comment.png){ width="400"}
17 | 
18 | ### tgbl-flight
19 | ![image](TEA/tgbl-flight.png){ width="400"}
20 | 
21 | ### tgbn-trade
22 | ![image](TEA/tgbn-trade.png){ width="400"}
23 | 
24 | ### tgbn-genre
25 | ![image](TEA/tgbn-genre.png){ width="400"}
26 | 
27 | ### tgbn-reddit
28 | ![image](TEA/tgbn-reddit.png){ width="400"}
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = py-tgx
 3 | version = 0.2.2
 4 | author = ["Razieh Shirzadkhani <razieh.shirzadkhani@gmail.com>", "shenyang Huang <shenyang.huang@mail.mcgill.ca>", "Elahe Kooshafar", "Farimah Poursafaei"]
 5 | author_email = razieh.shirzadkhani@gmail.com
 6 | description = Temporal Graph Analysis project repo
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/fpour/TGX
10 | project_urls =
11 |     Bug Tracker = https://github.com/fpour/TGX/issues
12 | classifiers =
13 |     Programming Language :: Python :: 3.9
14 | 
15 | [options]
16 | package_dir =
17 |     = ./
18 | packages = find:
19 | python_requires = >=3.6
20 | 
21 | 


--------------------------------------------------------------------------------
/docs/gallery/0-tet-builtin.md:
--------------------------------------------------------------------------------
 1 | ## TET Plots
 2 | 
 3 | ### Reddit
 4 | ![image](TET/Reddit.png){ width="400"}
 5 | 
 6 | ### MOOC
 7 | ![image](TET/MOOC.png){ width="400"}
 8 | 
 9 | ### LastFM
10 | ![image](TET/LastFM.png){ width="400"}
11 | 
12 | ### Enron
13 | ![image](TET/Enron.png){ width="400"}
14 | 
15 | ### Social Evo
16 | ![image](TET/SocialEvo.png){ width="400"}
17 | 
18 | ### UCI
19 | ![UCI](TET/UCI.png){ width="400"}
20 | 
21 | ### Flights
22 | ![image](TET/Flights.png){ width="400"}
23 | 
24 | ### Can. Parl.
25 | ![Canadian Parliment](TET/CanParl.png){ width="400"}
26 | 
27 | ### US Legis
28 | ![image](TET/USLegis.png){ width="400"}
29 | 
30 | ### UN Vote
31 | ![image](TET/UNVote.png){ width="400"}
32 | 
33 | ### Contacts
34 | ![image](TET/Contacts.png){ width="400"}
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/py_tgx.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | LICENSE
 2 | README.md
 3 | pyproject.toml
 4 | setup.cfg
 5 | setup.py
 6 | ./py_tgx.egg-info/PKG-INFO
 7 | ./py_tgx.egg-info/SOURCES.txt
 8 | ./py_tgx.egg-info/dependency_links.txt
 9 | ./py_tgx.egg-info/top_level.txt
10 | ./tgx/__init__.py
11 | ./tgx/classes/__init__.py
12 | ./tgx/classes/graph.py
13 | ./tgx/data/__init__.py
14 | ./tgx/data/builtin.py
15 | ./tgx/data/tgb.py
16 | ./tgx/io/__init__.py
17 | ./tgx/io/read.py
18 | ./tgx/io/write.py
19 | ./tgx/utils/__init__.py
20 | ./tgx/utils/graph_utils.py
21 | ./tgx/utils/plotting_utils.py
22 | ./tgx/utils/stat.py
23 | ./tgx/viz/TEA.py
24 | ./tgx/viz/TET.py
25 | ./tgx/viz/__init__.py
26 | py_tgx.egg-info/PKG-INFO
27 | py_tgx.egg-info/SOURCES.txt
28 | py_tgx.egg-info/dependency_links.txt
29 | py_tgx.egg-info/top_level.txt
30 | tgx/__init__.py


--------------------------------------------------------------------------------
/docs/gallery/0-degree-tgb.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | ## TEA plots
 4 | 
 5 | 
 6 | ### tgbl-wiki
 7 | ![image](degree/tgbl-wiki_ave_degree_per_ts.png){ width="400"}
 8 | 
 9 | ### tgbl-review
10 | ![image](degree/tgbl-review_ave_degree_per_ts.png){ width="400"}
11 | 
12 | ### tgbl-coin
13 | ![image](degree/tgbl-coin_ave_degree_per_ts.png){ width="400"}
14 | 
15 | 
16 | ### tgbl-comment
17 | ![image](degree/tgbl-comment_ave_degree_per_ts.png){ width="400"}
18 | 
19 | ### tgbl-flight
20 | ![image](degree/tgbl-flight_ave_degree_per_ts.png){ width="400"}
21 | 
22 | ### tgbn-trade
23 | ![image](degree/tgbn-trade_ave_degree_per_ts.png){ width="400"}
24 | 
25 | ### tgbn-genre
26 | ![image](degree/tgbn-genre_ave_degree_per_ts.png){ width="400"}
27 | 
28 | ### tgbn-reddit
29 | ![image](degree/tgbn-reddit_ave_degree_per_ts.png){ width="400"}
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/gallery/0-tea-builtin.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | ## TEA plots
 4 | 
 5 | 
 6 | ### Reddit
 7 | ![image](TEA/Reddit.png){ width="400"}
 8 | 
 9 | ### MOOC
10 | ![image](TEA/MOOC.png){ width="400"}
11 | 
12 | ### LastFM
13 | ![image](TEA/LastFM.png){ width="400"}
14 | 
15 | ### Enron
16 | ![image](TEA/Enron.png){ width="400"}
17 | 
18 | ### Social Evo
19 |   ![image](TEA/SocialEvo.png){ width="400"}
20 | 
21 | 
22 | ### UCI
23 |   ![UCI](TEA/UCI.png){ width="400"}
24 | 
25 | ### Flights
26 | ![image](TEA/Flights.png){ width="400"}
27 | 
28 | ### Can. Parl.
29 | ![Canadian Parliment](TEA/CanParl.png){ width="400"}
30 | 
31 | ### US Legis
32 | ![image](TEA/USLegis.png){ width="400"}
33 | 
34 | ### UN Vote
35 | ![image](TEA/UNVote.png){ width="400"}
36 | 
37 | ### Contacts
38 | ![image](TEA/Contacts.png){ width="400"}
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci 
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master 
 6 |       - main
 7 | permissions:
 8 |   contents: write
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: actions/setup-python@v4
15 |         with:
16 |           python-version: 3.x
17 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
18 |       - uses: actions/cache@v3
19 |         with:
20 |           key: mkdocs-material-${{ env.cache_id }}
21 |           path: .cache
22 |           restore-keys: |
23 |             mkdocs-material-
24 |       - run: pip install mkdocs-material 
25 |       - run: pip install mkdocs-glightbox
26 |       - run: pip install mkdocstrings-python
27 |       - run: pip install mkdocs-jupyter
28 |       - run: mkdocs gh-deploy --force
29 | 


--------------------------------------------------------------------------------
/docs/gallery/0-node_edge-tgb.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | ## TEA plots
 4 | 
 5 | ### tgbl-wiki
 6 | ![image](node_edge/tgbl-wiki_node&edge_per_ts.png){ width="400"}
 7 | 
 8 | ### tgbl-review
 9 | ![image](node_edge/tgbl-review_node&edge_per_ts.png){ width="400"}
10 | 
11 | ### tgbl-coin
12 | ![image](node_edge/tgbl-coin_node&edge_per_ts.png){ width="400"}
13 | 
14 | 
15 | ### tgbl-comment
16 | ![image](node_edge/tgbl-comment_node&edge_per_ts.png){ width="400"}
17 | 
18 | ### tgbl-flight
19 | ![image](node_edge/tgbl-flight_node&edge_per_ts.png){ width="400"}
20 | 
21 | ### tgbn-trade
22 | ![image](node_edge/tgbn-trade_node&edge_per_ts.png){ width="400"}
23 | 
24 | ### tgbn-genre
25 | ![image](node_edge/tgbn-genre_node&edge_per_ts.png){ width="400"}
26 | 
27 | ### tgbn-reddit
28 | ![image](node_edge/tgbn-reddit_node&edge_per_ts.png){ width="400"}
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/tgx/io/write.py:
--------------------------------------------------------------------------------
 1 | # # a = {1:[(1,3), (4,5), (5,6)],
 2 | # #      2:[(2,5)]}
 3 | # # print(a.items())
 4 | 
 5 | 
 6 | # Details = {"Destination": "China", 
 7 | #            "Nationality": "Italian", "Age": []}
 8 |  
 9 | # print("Original:", Details)
10 |  
11 | # # appending the list
12 | # Details["Age"] += [20, "Twenty"]
13 | # print("Modified:", Details)
14 | 
15 | 
16 | # a1 = [(1,2,3), (1,2,3), (2,3,4)]
17 | # d={}
18 | # lis = []
19 | # t = 1
20 | # for i in a1:
21 | #     q1=i[0]
22 | #     q2=i[1]
23 | #     q3=i[2]
24 | #     if q1 not in d:
25 | #         d[q1] = []
26 | #     print(d)
27 | #     d[q1].append((q2,q3))
28 | #     if q1 != t:
29 | #         d[t] = lis
30 | #         lis=[]
31 | #         t = q1
32 | #     lis.append((q2,q3))
33 | # d[t] = lis
34 | # print(d)
35 | 
36 | 
37 | # for i, l in a.items():
38 | #     for s in l:
39 | #         print (i, s[0], s[1])
40 | 
41 | def write_csv():
42 |     pass


--------------------------------------------------------------------------------
/full_requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.9.3
 2 | aiosignal==1.3.1
 3 | args==0.1.0
 4 | async-timeout==4.0.3
 5 | attrs==23.2.0
 6 | certifi==2024.2.2
 7 | charset-normalizer==3.3.2
 8 | clint==0.5.1
 9 | contourpy==1.2.0
10 | cycler==0.12.1
11 | fonttools==4.49.0
12 | frozenlist==1.4.1
13 | fsspec==2024.2.0
14 | idna==3.6
15 | Jinja2==3.1.3
16 | joblib==1.3.2
17 | kiwisolver==1.4.5
18 | MarkupSafe==2.1.5
19 | matplotlib==3.8.3
20 | multidict==6.0.5
21 | networkx==3.2.1
22 | numpy==1.26.4
23 | packaging==23.2
24 | pandas==1.5.3
25 | pillow==10.2.0
26 | psutil==5.9.8
27 | py-tgb==0.9.2
28 | -e git+ssh://git@github.com/ComplexData-MILA/TGX.git@c872f31f3f062fcb837d8555081fc104064976cf#egg=py_tgx
29 | pyparsing==3.1.1
30 | python-dateutil==2.8.2
31 | pytz==2024.1
32 | requests==2.31.0
33 | scikit-learn==1.4.1.post1
34 | scipy==1.12.0
35 | seaborn==0.13.2
36 | six==1.16.0
37 | threadpoolctl==3.3.0
38 | torch_geometric==2.5.0
39 | tqdm==4.66.2
40 | urllib3==2.2.1
41 | yarl==1.9.4
42 | 


--------------------------------------------------------------------------------
/docs/gallery/0-degree-builtin.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | ## TEA plots
 4 | 
 5 | 
 6 | ### Reddit
 7 | ![image](degree/reddit_ave_degree_per_ts.png){ width="400"}
 8 | 
 9 | ### MOOC
10 | ![image](degree/mooc_ave_degree_per_ts.png){ width="400"}
11 | 
12 | ### LastFM
13 | ![image](degree/lastfm_ave_degree_per_ts.png){ width="400"}
14 | 
15 | ### Enron
16 | ![image](degree/enron_ave_degree_per_ts.png){ width="400"}
17 | 
18 | ### Social Evo
19 |   ![image](degree/SocialEvo_ave_degree_per_ts.png){ width="400"}
20 | 
21 | 
22 | ### UCI
23 |   ![UCI](degree/uci_ave_degree_per_ts.png){ width="400"}
24 | 
25 | ### Flights
26 | ![image](degree/Flights_ave_degree_per_ts.png){ width="400"}
27 | 
28 | ### Can. Parl.
29 | ![Canadian Parliment](degree/CanParl_ave_degree_per_ts.png){ width="400"}
30 | 
31 | ### US Legis
32 | ![image](degree/USLegis_ave_degree_per_ts.png){ width="400"}
33 | 
34 | ### UN Vote
35 | ![image](degree/UNvote_ave_degree_per_ts.png){ width="400"}
36 | 
37 | ### Contacts
38 | ![image](degree/Contacts_ave_degree_per_ts.png){ width="400"}
39 | 


--------------------------------------------------------------------------------
/docs/gallery/0-node_edge-builtin.md:
--------------------------------------------------------------------------------
 1 | # Plot by type
 2 | 
 3 | ## TEA plots
 4 | 
 5 | 
 6 | ### Reddit
 7 | ![image](node_edge/reddit_node&edge_per_ts.png){ width="400"}
 8 | 
 9 | ### MOOC
10 | ![image](node_edge/mooc_node&edge_per_ts.png){ width="400"}
11 | 
12 | ### LastFM
13 | ![image](node_edge/lastfm_node&edge_per_ts.png){ width="400"}
14 | 
15 | ### Enron
16 | ![image](node_edge/enron_node&edge_per_ts.png){ width="400"}
17 | 
18 | ### Social Evo
19 |   ![image](node_edge/SocialEvo_node&edge_per_ts.png){ width="400"}
20 | 
21 | 
22 | ### UCI
23 |   ![UCI](node_edge/uci_node&edge_per_ts.png){ width="400"}
24 | 
25 | ### Flights
26 | ![image](node_edge/Flights_node&edge_per_ts.png){ width="400"}
27 | 
28 | ### Can. Parl.
29 | ![Canadian Parliment](node_edge/CanParl_node&edge_per_ts.png){ width="400"}
30 | 
31 | ### US Legis
32 | ![image](node_edge/USLegis_node&edge_per_ts.png){ width="400"}
33 | 
34 | ### UN Vote
35 | ![image](node_edge/UNvote_node&edge_per_ts.png){ width="400"}
36 | 
37 | ### Contacts
38 | ![image](node_edge/Contacts_node&edge_per_ts.png){ width="400"}
39 | 
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2023 The Python Packaging Authority
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- # TGX -->
 2 | ![TGX logo](docs/2023_TGX_logo.png)
 3 | 
 4 | # Temporal Graph Analysis with TGX
 5 | <h4>
 6 | 	<a href="https://arxiv.org/abs/2402.03651"><img src="https://img.shields.io/badge/arXiv-pdf-yellowgreen"></a>
 7 | 	<a href="https://pypi.org/project/py-tgx/"><img src="https://img.shields.io/pypi/v/py-tgx.svg?color=brightgreen"></a>
 8 | 	<a href="https://complexdata-mila.github.io/TGX/"><img src="https://img.shields.io/badge/docs-orange"></a>
 9 | </h4> 
10 | 
11 | This repository contains the code for the paper "Temporal Graph Analysis with TGX" (WSDM 2024, Demo Track).
12 | 
13 | TGX overview:
14 | - TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 
15 | - TGX provides numerous temporal graph visualization plots and statistics out of the box.
16 | 
17 | 
18 | ## Dependecies
19 | TGX implementation works with `python >= 3.9` and can be installed as follows.
20 | 
21 | 1. Set up virtual environment (conda should work as well).
22 | 	```
23 | 	python -m venv tgx_env/
24 | 	source tgx_env/bin/activate
25 | 	```
26 | 
27 | 2. Upgrade pip (Optional)
28 | 	```
29 | 	pip install --upgrade pip
30 | 	```
31 | 
32 | 3. Install external packages
33 | 	```
34 | 	pip install -r requirements.txt
35 | 	```
36 | 
37 | 4. Install local dependencies under root directory `/TGX`.
38 | 	```
39 | 	pip install -e .
40 | 	```
41 | 
42 | 5. [Aternative] Install TGX from [`PyPi`](https://pypi.org/project/py-tgx/):
43 | 
44 | 	```
45 | 	pip install py-tgx
46 | 	```
47 | 
48 | 6. [optional] Install `mkdocs` dependencies to serve the documentation locally.
49 | 	```
50 | 	pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils
51 | 	```
52 | 
53 | 
54 | For tutorials on how to use TGX to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb)
55 | 
56 | 
57 | ### Citation
58 | If TGX is useful for your work, please consider citing it:
59 | ```bibtex
60 | @article{shirzadkhani2024temporal,
61 |   title={Temporal Graph Analysis with TGX},
62 |   author={Shirzadkhani, Razieh and Huang, Shenyang and Kooshafar, Elahe and Rabbany, Reihaneh and Poursafaei, Farimah},
63 |   journal={arXiv preprint arXiv:2402.03651},
64 |   year={2024}
65 | }
66 | ```
67 | 


--------------------------------------------------------------------------------
/examples/data_viz.py:
--------------------------------------------------------------------------------
 1 | import tgx
 2 | from tgx.utils.plotting_utils import plot_for_snapshots
 3 | from tgx.utils.graph_utils import subsampling
 4 | 
 5 | """
 6 | A master example to show all visualization in TGX
 7 | """
 8 |  
 9 | # === load built in datasets ===
10 | dataset = tgx.builtin.uci() 
11 | 
12 | # === load the tgb datasets ===
13 | # data_name = "tgbl-wiki" #"tgbl-review" 
14 | # dataset = tgx.tgb_data(data_name) #tgb datasets
15 | 
16 | # initialize a Graph object from the loaded dataset 
17 | # & discretize its timestamps...
18 | ctdg = tgx.Graph(dataset)
19 | time_scale = "weekly"  # other choices: "daily", "hourly", ...
20 | dtdg = ctdg.discretize(time_scale=time_scale)[0]
21 | 
22 | # === example for subsampling
23 | sub_edges = subsampling(ctdg, selection_strategy="random", N=1000)
24 | subgraph = tgx.Graph(edgelist=sub_edges)
25 | 
26 | 
27 | # === plot the statistics
28 | tgx.degree_over_time(dtdg, network_name=dataset.name)
29 | tgx.nodes_over_time(dtdg, network_name=dataset.name)
30 | tgx.edges_over_time(dtdg, network_name=dataset.name)
31 | tgx.nodes_and_edges_over_time(dtdg, network_name=dataset.name)
32 | 
33 | # Number of Connected Components
34 | tgx.connected_components_per_ts(dtdg, network_name=dataset.name)
35 | 
36 | # Degree Density
37 | tgx.degree_density(dtdg, k=3, network_name=dataset.name)
38 | 
39 | tgx.TET(dtdg, 
40 |         network_name=dataset.name)
41 | 
42 | # tgx.TET(dtdg, 
43 | #         network_name=dataset.name, 
44 | #         figsize = (9, 5), 
45 | #         axis_title_font_size = 24, 
46 | #         ticks_font_size = 24)
47 | 
48 | # tgx.TEA(dtdg, 
49 | #         network_name=dataset.name)
50 | 
51 | 
52 | 
53 | # === compute statistics
54 | test_ratio = 0.15
55 | tgx.get_reoccurrence(ctdg, test_ratio=test_ratio)
56 | tgx.get_surprise(ctdg, test_ratio=test_ratio)
57 | tgx.get_novelty(dtdg)
58 | tgx.get_avg_node_activity(dtdg)
59 | 
60 | # Size of Largest Connected Component
61 | component_sizes = tgx.size_connected_components(dtdg)
62 | largest_component_sizes = [max(inner_list) if inner_list else 0 for inner_list in component_sizes]
63 | filename = f"{dataset.name}_largest_connected_component_size"
64 | plot_for_snapshots(largest_component_sizes, y_title="Size of Largest Connected Component", filename="./"+filename)
65 | 
66 | # Average Node Engagement
67 | engagements = tgx.get_avg_node_engagement(dtdg)
68 | filename = f"{dataset.name}_average_node_engagement"
69 | plot_for_snapshots(engagements, y_title="Average Engagement", filename="./"+filename)
70 | 
71 | 


--------------------------------------------------------------------------------
/tgx/utils/.ipynb_checkpoints/edgelist-checkpoint.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def edgelist_discritizer(edgelist,
 3 |                          unique_ts,
 4 |                          time_interval = None,
 5 |                          max_intervals = 200):
 6 |     
 7 |     total_time = unique_ts[-1] - unique_ts[0]
 8 |     if time_interval is not None:
 9 |         if isinstance(time_interval, str):
10 |             if time_interval == "daily":
11 |                 interval_size = 86400
12 |             elif time_interval == "weekly":
13 |                 interval_size = 86400 * 7
14 |             elif time_interval == "monthly":
15 |                 interval_size = 86400 * 30
16 |             elif time_interval == "yearly":
17 |                 interval_size = 86400* 365
18 |             if int(total_time / interval_size) > max_intervals:
19 |                 user_input = input("Too many timestamps, discretizing data to 200 timestamps, do you want to proceed?(y/n): ")
20 |                 if user_input.lower() == 'n':
21 |                     print('Cannot proceed to TEA and TET plot')
22 |                     exit()
23 |                 else:
24 |                     interval_size = max_intervals
25 |         elif isinstance(time_interval, int):
26 |             if time_interval > max_intervals:
27 |                 raise ValueError(f"The maximum number of time intervals is {max_intervals}.")
28 |             else:
29 |                 interval_size = int(total_time / (time_interval))
30 |                 
31 |         else:
32 |             raise TypeError("Invalid time interval")
33 |     else:
34 |         user_input = input(f"discretizing data to {max_intervals} timestamps, do you want to proceed?(y/n): ")
35 |         if user_input.lower() == 'n':
36 |             print('Cannot proceed to TEA and TET plot')
37 |             exit()
38 |         else:
39 |             interval_size = int(total_time / 100)
40 |     num_intervals = int(total_time/interval_size)
41 |     print(f'Discretizing data to {num_intervals} timestamps...')
42 |     if num_intervals == 0:
43 |         print("Warning! Only one timestamp exist in the data.")
44 |     updated_edgelist = {}
45 |     new_ts = {}
46 |     curr_t = 0
47 |     for ts, edge_data in edgelist.items():
48 |         bin_ts = int(ts / interval_size)
49 |         if bin_ts >= num_intervals:
50 |             bin_ts -= 1
51 | 
52 |         if bin_ts not in new_ts:
53 |             new_ts[bin_ts] = curr_t
54 |             curr_t += 1
55 |         
56 |         if new_ts[bin_ts] not in updated_edgelist:
57 |             updated_edgelist[new_ts[bin_ts]] = {}
58 | 
59 |         for (u,v), n in edge_data.items():
60 |             if (u, v) not in updated_edgelist[new_ts[bin_ts]]:
61 |                 updated_edgelist[new_ts[bin_ts]][(u, v)] = n
62 |             else:
63 |                 updated_edgelist[new_ts[bin_ts]][(u, v)] += n
64 |     return updated_edgelist


--------------------------------------------------------------------------------
/examples/starting_example.py:
--------------------------------------------------------------------------------
 1 | import tgx
 2 | from tgx.utils.plotting_utils import plot_for_snapshots
 3 | import argparse
 4 | import sys
 5 | 
 6 | def get_args():
 7 |     parser = argparse.ArgumentParser('*** discretizing time steps of a TG dataset ***')
 8 |     parser.add_argument('-d', '--data', type=str, help='dataset name', default='tgbl-wiki')
 9 |     parser.add_argument('-t', '--time', type=str, help='time granularity', default='daily')
10 | 
11 |     try:
12 |         args = parser.parse_args()
13 |     except:
14 |         parser.print_help()
15 |         sys.exit(0)
16 |     return args, sys.argv 
17 | 
18 | args, _ = get_args()
19 | 
20 | 
21 | # === load the datasets from tgb or built-in ===
22 | 
23 | # load a built-in dataset
24 | # dataset = tgx.builtin.uci()
25 | 
26 | # load a TGB dataset
27 | data_name = args.data  # args.name can be supported TGB datasets, such as: "tgbl-coin", "tgbl-review", "tgbl-wiki"
28 | dataset = tgx.tgb_data(data_name)
29 | 
30 | # make a Graph object from loaded dataset
31 | ctdg = tgx.Graph(dataset)
32 | # ctdg.save2csv("ctdg") #! save the graph to csv files
33 | 
34 | time_scale = args.time  #choices are: "minutely", "monthly", "weekly", "daily", "hourly" 
35 | dtdg = ctdg.discretize(time_scale=time_scale)[0]
36 | print(f"INFO: Discretize {data_name} to `{time_scale}`")
37 | 
38 | 
39 | 
40 | # === plotting the statistics ===
41 | tgx.degree_over_time(dtdg, network_name=dataset.name)
42 | tgx.nodes_over_time(dtdg, network_name=dataset.name)
43 | tgx.edges_over_time(dtdg, network_name=dataset.name)
44 | tgx.nodes_and_edges_over_time(dtdg, network_name=dataset.name)
45 | 
46 | tgx.TET(dtdg, 
47 |         network_name=dataset.name, 
48 |         figsize = (9, 5),
49 |         axis_title_font_size = 24,
50 |         ticks_font_size = 24)
51 | 
52 | 
53 | tgx.TEA(dtdg, 
54 |         network_name=dataset.name)
55 | 
56 | 
57 | 
58 | # === compute statistics ===
59 | test_ratio = 0.15
60 | tgx.get_reoccurrence(ctdg, test_ratio=test_ratio)
61 | tgx.get_surprise(ctdg, test_ratio=test_ratio)
62 | tgx.get_novelty(dtdg)
63 | 
64 | 
65 | # Number of Connected Components
66 | tgx.connected_components_per_ts(dtdg, network_name=dataset.name)
67 | 
68 | # Degree Density
69 | tgx.degree_density(dtdg, k=3, network_name=dataset.name)
70 | 
71 | # Size of Largest Connected Component
72 | component_sizes = tgx.size_connected_components(dtdg)
73 | largest_component_sizes = [max(inner_list) if inner_list else 0 for inner_list in component_sizes]
74 | filename = f"{dataset.name}_largest_connected_component_size"
75 | plot_for_snapshots(largest_component_sizes, y_title="Size of Largest Connected Component", filename="./"+filename)
76 | 
77 | # Average Node Engagement
78 | engagements = tgx.get_avg_node_engagement(dtdg)
79 | filename = f"{dataset.name}_average_node_engagement"
80 | plot_for_snapshots(engagements, y_title="Average Engagement", filename="./"+filename)
81 | 
82 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | !requirements*.txt
  2 | !full_requirements*.txt
  3 | #dataset
  4 | *.png  
  5 | *.pdf
  6 | *.cpython-39.pyc
  7 | *.pyc
  8 | *.xz
  9 | *.parquet
 10 | *.gz
 11 | *.tar
 12 | *.pdf
 13 | *.csv
 14 | *.zip
 15 | *.json
 16 | *.npy
 17 | *.pt
 18 | *.out
 19 | *.pkl
 20 | *.txt
 21 | *.csv
 22 | *.npz
 23 | __pycache__/
 24 | */.ipynb_checkpoints/
 25 | # Byte-compiled / optimized / DLL files
 26 | 
 27 | raw/
 28 | books/
 29 | electronics/
 30 | software/
 31 | *.py[cod]
 32 | *$py.class
 33 | saved_models/
 34 | 
 35 | # C extensions
 36 | *.so
 37 | 
 38 | # Distribution / packaging
 39 | .Python
 40 | build/
 41 | develop-eggs/
 42 | dist/
 43 | downloads/
 44 | eggs/
 45 | .eggs/
 46 | lib/
 47 | lib64/
 48 | parts/
 49 | sdist/
 50 | var/
 51 | wheels/
 52 | pip-wheel-metadata/
 53 | share/python-wheels/
 54 | *.egg-info/
 55 | .installed.cfg
 56 | *.egg
 57 | MANIFEST
 58 | 
 59 | # PyInstaller
 60 | #  Usually these files are written by a python script from a template
 61 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 62 | *.manifest
 63 | *.spec
 64 | 
 65 | # Installer logs
 66 | pip-log.txt
 67 | pip-delete-this-directory.txt
 68 | 
 69 | # Unit test / coverage reports
 70 | htmlcov/
 71 | .tox/
 72 | .nox/
 73 | .coverage
 74 | .coverage.*
 75 | .cache
 76 | nosetests.xml
 77 | coverage.xml
 78 | *.cover
 79 | *.py,cover
 80 | .hypothesis/
 81 | .pytest_cache/
 82 | 
 83 | # Translations
 84 | *.mo
 85 | *.pot
 86 | 
 87 | # Django stuff:
 88 | *.log
 89 | local_settings.py
 90 | db.sqlite3
 91 | db.sqlite3-journal
 92 | 
 93 | # Flask stuff:
 94 | instance/
 95 | .webassets-cache
 96 | 
 97 | # Scrapy stuff:
 98 | .scrapy
 99 | 
100 | # Sphinx documentation
101 | docs/_build/
102 | 
103 | # PyBuilder
104 | target/
105 | 
106 | # Jupyter Notebook
107 | .ipynb_checkpoints
108 | 
109 | # IPython
110 | profile_default/
111 | ipython_config.py
112 | 
113 | # pyenv
114 | .python-version
115 | 
116 | # pipenv
117 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
118 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
119 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
120 | #   install all needed dependencies.
121 | #Pipfile.lock
122 | 
123 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
124 | __pypackages__/
125 | __pycache__/
126 | 
127 | # Celery stuff
128 | celerybeat-schedule
129 | celerybeat.pid
130 | 
131 | # SageMath parsed files
132 | *.sage.py
133 | 
134 | # Environments
135 | .env
136 | .venv
137 | env/
138 | venv/
139 | ENV/
140 | ENV_tgx/
141 | env.bak/
142 | venv.bak/
143 | cc_env.sh
144 | .github/
145 | 
146 | # Spyder project settings
147 | .spyderproject
148 | .spyproject
149 | 
150 | # Rope project settings
151 | .ropeproject
152 | 
153 | # mkdocs documentation
154 | /site
155 | 
156 | # mypy
157 | .mypy_cache/
158 | .dmypy.json
159 | dmypy.json
160 | 
161 | # Pyre type checker
162 | .pyre/
163 | 
164 | # PyCharm
165 | .idea


--------------------------------------------------------------------------------
/tgx/utils/.ipynb_checkpoints/graph_stat-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from tgx.utils.plotting_utils import plot_for_snapshots, plot_nodes_edges_per_ts
 2 | 
 3 | __all__ = ["average_degree_per_ts",
 4 |            "nodes_per_ts",
 5 |            "edges_per_ts",
 6 |            "nodes_and_edges_per_ts"]
 7 | 
 8 | 
 9 | def average_degree_per_ts(graph: list, 
10 |                           total_nodes: int, 
11 |                           network_name: str,
12 |                           plot_path: str = None) -> None:
13 |     '''
14 |     input: a list containing graph snapshots
15 |     '''
16 |     print("Plotting average degree per timestamp")
17 |     ave_degree = _calculate_average_degree_per_ts(graph, total_nodes)
18 |     filename = f"{network_name}_ave_degree_per_ts"
19 |     plot_for_snapshots(ave_degree, filename, "Average degree", plot_path = plot_path)
20 |     print("Plotting Done!")
21 |     return 
22 | 
23 | 
24 | def nodes_per_ts(graph: list,  
25 |                  network_name: str,
26 |                  plot_path: str = None) -> None:
27 |     '''
28 |     input: a list containing graph snapshots
29 |     '''
30 |     print("Plotting number of nodes per timestamp")
31 |     active_nodes = _calculate_node_per_ts(graph)
32 |     filename = f"{network_name}_nodes_per_ts"
33 |     plot_for_snapshots(active_nodes, filename, "Number of nodes", plot_path = plot_path)
34 |     print("Plotting Done!")
35 |     return 
36 | 
37 | def edges_per_ts(graph: list, 
38 |                  plot_path: str, 
39 |                  network_name: str) -> None:
40 |     '''
41 |     input: a list containing graph snapshots
42 |     '''
43 |     print("Plotting number of edges per timestamp")
44 |     active_edges = _calculate_edge_per_ts(graph)
45 |     filename = f"{network_name}_edges_per_ts"
46 |     plot_for_snapshots(active_edges, plot_path, filename, "Number of edges")
47 |     print("Plotting Done!")
48 |     return 
49 | 
50 | def nodes_and_edges_per_ts(graph: list, 
51 |                            network_name: str,
52 |                            plot_path: str = None):
53 |     
54 |     edges = _calculate_edge_per_ts(graph)
55 |     nodes = _calculate_node_per_ts(graph)
56 |     ts = list(range(0, len(graph)))
57 | 
58 |     return plot_nodes_edges_per_ts(edges, nodes, ts, network_name, plot_path = plot_path)
59 |     
60 | 
61 | def _calculate_average_degree_per_ts(graph, total_nodes):
62 |     total_ts = len(graph)
63 |     ave_degree = []
64 |     for t1 in range(total_ts):
65 |         num_edges = graph[t1].number_of_edges()
66 |         ave_degree.append(num_edges*2/ total_nodes)
67 |     return ave_degree
68 | 
69 | 
70 | def _calculate_node_per_ts(graph):
71 |     active_nodes = []
72 |     for ts in range(len(graph)):
73 |         active_nodes.append(graph[ts].number_of_nodes())
74 |     return active_nodes
75 | 
76 | def _calculate_edge_per_ts(graph):
77 |     active_edges = []
78 |     for ts in range(len(graph)):
79 |         active_edges.append(graph[ts].number_of_edges())
80 |     return active_edges
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/docs/gallery/dataset.md:
--------------------------------------------------------------------------------
  1 | # Plot by dataset
  2 | 
  3 | 
  4 | ## Reddit
  5 | #### TEA Plot
  6 | ![image](TEA/Reddit.png){ width="400"}
  7 | <!-- <figcaption>TEA</figcaption> -->
  8 | 
  9 | #### TET Plot
 10 | ![image](TET/Reddit.png){ width="400"}
 11 | <!-- <figcaption>TET</figcaption> -->
 12 | 
 13 | ## MOOC
 14 | #### TEA Plot
 15 | ![image](TEA/MOOC.png){ width="400"}
 16 | #### TET Plot
 17 | ![image](TET/MOOC.png){ width="400"}
 18 | 
 19 | ## LastFM
 20 | #### TEA Plot
 21 | ![image](TEA/LastFM.png){ width="400"}
 22 | #### TET Plot
 23 | ![image](TEA/LastFM.png){ width="400"}
 24 | 
 25 | ## Enron
 26 | #### TEA Plot
 27 | ![image](TEA/Enron.png){ width="400"}
 28 | ### TET Plot
 29 | ![image](TET/Enron.png){ width="400"}
 30 | 
 31 | ## Social Evo
 32 | #### TEA Plot
 33 | ![image](TEA/SocialEvo.png){ width="400"}
 34 | #### TET Plot
 35 | ![image](TET/SocialEvo.png){ width="400"}
 36 | 
 37 | ## UCI
 38 | #### TEA Plot
 39 | ![UCI](TEA/UCI.png){ width="400"}
 40 | #### TET Plot
 41 | ![UCI](TET/UCI.png){ width="400"}
 42 | 
 43 | 
 44 | ## Flights
 45 | #### TEA Plot
 46 | ![image](TEA/Flights.png){ width="400"}
 47 | #### TET Plot
 48 | ![image](TET/Flights.png){ width="400"}
 49 | 
 50 | 
 51 | ## Can. Parl.
 52 | #### TEA Plot
 53 | ![Canadian Parliment](TEA/CanParl.png){ width="400"}
 54 | #### TET Plot
 55 | ![Canadian Parliment](TET/CanParl.png){ width="400"}
 56 | 
 57 | ## US Legis
 58 | #### TEA Plot
 59 | ![image](TEA/USLegis.png){ width="400"}
 60 | #### TET Plot
 61 | ![image](TET/USLegis.png){ width="400"}
 62 | 
 63 | ## UN Vote
 64 | #### TEA Plot
 65 | ![image](TEA/UNVote.png){ width="400"}
 66 | #### TET Plot
 67 | ![image](TET/UNVote.png){ width="400"}
 68 | 
 69 | ## Contacts
 70 | #### TEA Plot
 71 | ![image](TEA/Contacts.png){ width="400"}
 72 | #### TET Plot
 73 | ![image](TET/Contacts.png){ width="400"}
 74 | 
 75 | ## tgbl-wiki
 76 | #### TEA Plot
 77 | ![image](TEA/tgbl-wiki.png){ width="400"}
 78 | #### TET Plot
 79 | ![image](TET/tgbl-wiki.png){ width="400"}
 80 | 
 81 | ## tgbl-review
 82 | #### TEA Plot
 83 | ![image](TEA/tgbl-review.png){ width="400"}
 84 | #### TET Plot
 85 | ![image](TET/tgbl-review.png){ width="400"}
 86 | 
 87 | ## tgbl-coin
 88 | #### TEA Plot
 89 | ![image](TEA/tgbl-coin.png){ width="400"}
 90 | #### TET Plot
 91 | ![image](TET/tgbl-coin.png){ width="400"}
 92 | 
 93 | ## tgbl-comment
 94 | #### TEA Plot
 95 | ![image](TEA/tgbl-comment.png){ width="400"}
 96 | #### TET Plot
 97 | ![image](TET/tgbl-comment.png){ width="400"}
 98 | 
 99 | ## tgbl-flight
100 | #### TEA Plot
101 | ![image](TEA/tgbl-flight.png){ width="400"}
102 | #### TET Plot
103 | ![image](TET/tgbl-flight.png){ width="400"}
104 | 
105 | ## tgbn-trade
106 | #### TEA Plot
107 | ![image](TEA/tgbn-trade.png){ width="400"}
108 | #### TET Plot
109 | ![image](TET/tgbn-trade.png){ width="400"}
110 | 
111 | ## tgbn-genre
112 | #### TEA Plot
113 | ![image](TEA/tgbn-genre.png){ width="400"}
114 | #### TET Plot
115 | ![image](TET/tgbn-genre.png){ width="400"}
116 | 
117 | ## tgbn-reddit
118 | #### TEA Plot
119 | ![image](TEA/tgbn-reddit.png){ width="400"}
120 | #### TET Plot
121 | ![image](TET/tgbn-reddit.png){ width="400"}
122 | 


--------------------------------------------------------------------------------
/py_tgx.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: py-tgx
 3 | Version: 0.4.0
 4 | Summary: Temporal Graph Visualization with TGX
 5 | Home-page: https://github.com/ComplexData-MILA/TGX
 6 | Author: ["Razieh Shirzadkhani <razieh.shirzadkhani@gmail.com>", "shenyang Huang <shenyang.huang@mail.mcgill.ca>", "Elahe Kooshafar", "Farimah Poursafaei"]
 7 | Author-email: razieh.shirzadkhani@gmail.com
 8 | License: MIT
 9 | Project-URL: Bug Tracker, https://github.com/fpour/TGX/issues
10 | Keywords: Temporal Graph Visualization
11 | Classifier: Programming Language :: Python :: 3.9
12 | Requires-Python: >=3.6
13 | Description-Content-Type: text/markdown
14 | License-File: LICENSE
15 | 
16 | <!-- # TGX -->
17 | ![TGX logo](docs/2023_TGX_logo.png)
18 | 
19 | # Temporal Graph Analysis with TGX
20 | <h4>
21 | 	<a href="https://arxiv.org/abs/2402.03651"><img src="https://img.shields.io/badge/arXiv-pdf-yellowgreen"></a>
22 | 	<a href="https://pypi.org/project/py-tgx/"><img src="https://img.shields.io/pypi/v/py-tgx.svg?color=brightgreen"></a>
23 | 	<a href="https://complexdata-mila.github.io/TGX/"><img src="https://img.shields.io/badge/docs-orange"></a>
24 | </h4> 
25 | 
26 | This repository contains the code for the paper "Temporal Graph Analysis with TGX" (WSDM 2024, Demo Track).
27 | 
28 | TGX overview:
29 | - TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 
30 | - TGX provides numerous temporal graph visualization plots and statistics out of the box.
31 | 
32 | 
33 | ## Dependecies
34 | TGX implementation works with `python >= 3.9` and can be installed as follows.
35 | 
36 | 1. Set up virtual environment (conda should work as well).
37 | 	```
38 | 	python -m venv tgx_env/
39 | 	source tgx_env/bin/activate
40 | 	```
41 | 
42 | 2. Upgrade pip (Optional)
43 | 	```
44 | 	pip install --upgrade pip
45 | 	```
46 | 
47 | 3. Install external packages
48 | 	```
49 | 	pip install -r requirements.txt
50 | 	```
51 | 
52 | 4. Install local dependencies under root directory `/TGX`.
53 | 	```
54 | 	pip install -e .
55 | 	```
56 | 
57 | 5. [Aternative] Install TGX from [`PyPi`](https://pypi.org/project/py-tgx/):
58 | 
59 | 	```
60 | 	pip install py-tgx
61 | 	```
62 | 
63 | 6. [optional] Install `mkdocs` dependencies to serve the documentation locally.
64 | 	```
65 | 	pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils
66 | 	```
67 | 
68 | 
69 | For tutorials on how to use TGX to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb)
70 | 
71 | 
72 | ### Citation
73 | If TGX is useful for your work, please consider citing it:
74 | ```bibtex
75 | @article{shirzadkhani2024temporal,
76 |   title={Temporal Graph Analysis with TGX},
77 |   author={Shirzadkhani, Razieh and Huang, Shenyang and Kooshafar, Elahe and Rabbany, Reihaneh and Poursafaei, Farimah},
78 |   journal={arXiv preprint arXiv:2402.03651},
79 |   year={2024}
80 | }
81 | ```
82 | 


--------------------------------------------------------------------------------
/tgx/classes/.ipynb_checkpoints/graph-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | from typing import Optional
 3 | 
 4 | 
 5 | class Graph():
 6 |     def __init__(self, 
 7 |                  edgelist: Optional[dict] = None, 
 8 |                  discretized: Optional[bool] = True):
 9 |         """
10 |         Create a Graph object with specific characteristics
11 |         Args:
12 |             edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}}
13 |             discretized: whether the given edgelist was discretized or not
14 |         """
15 |         
16 |         self.edgelist = edgelist
17 |         self.subsampled_graph = None
18 |         if discretized:
19 |             self.discrite_graph = self._generate_graph()
20 |             self.discrite_edgelist = edgelist
21 |         else:
22 |             self.continuous_edgelist = edgelist
23 |         
24 |         
25 |     def number_of_nodes(self, edgelist: dict = None) -> int:
26 |         """
27 |         Calculate total number of nodes present in an edgelist
28 |         """
29 |         if self.edgelist is None:
30 |             return []
31 |         elif edgelist is None:
32 |             edgelist = self.edgelist
33 |         node_list = {}
34 |         for _, edge_data in edgelist.items():
35 |             for (u,v), _ in edge_data.items():
36 |                 if u not in node_list:
37 |                     node_list[u] = 1
38 |                 if v not in node_list:
39 |                     node_list[v] = 1
40 |         return len(node_list.keys())
41 | 
42 |     def nodes(self) -> list:
43 |         """
44 |         Return a list of nodes present in an edgelist
45 |         """
46 |         node_list = {}
47 |         for _, edge_data in self.edgelist.items():
48 |             for (u,v), _ in edge_data.items():
49 |                 if u not in node_list:
50 |                     node_list[u] = 1
51 |                 if v not in node_list:
52 |                     node_list[v] = 1
53 |         
54 |         self.node_list = list(node_list.keys())
55 |         return list(node_list.keys())
56 | 
57 |     def _generate_graph(self, 
58 |                         edgelist: Optional[dict] = None
59 |                         ) -> list:
60 |         '''
61 |         Generate a list of graph snapshots. Each snapshot is a 
62 |         Networkx graph object.
63 |         Parameters:
64 |             edgelist: a dictionary containing in the form of {t: {(u, v), freq}}
65 |         Returns:
66 |             G_times: a list of networkx graphs
67 |         '''
68 |         if self.edgelist is None:
69 |             return []
70 |         elif edgelist is None:
71 |             edgelist = self.edgelist
72 |         G_times = []
73 |         G = nx.Graph()
74 |         cur_t = 0
75 |         for ts, edge_data in edgelist.items():
76 |             for (u,v), n in edge_data.items():
77 |                 if (ts != cur_t):
78 |                     G_times.append(G)   
79 |                     G = nx.Graph()  
80 |                     cur_t = ts 
81 |                 G.add_edge(u, v, freq=n) 
82 |         G_times.append(G)
83 |         return G_times
84 |     
85 |     


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | <!-- # TGX -->
  2 | ![TGX logo](2023_TGX_logo.png)
  3 | 
  4 | # Temporal Graph Analysis with TGX (WSDM 2024 Demo Track)
  5 | <h4>
  6 | 	<a href="https://arxiv.org/abs/2402.03651"><img src="https://img.shields.io/badge/arXiv-pdf-yellowgreen"></a>
  7 | 	<a href="https://complexdata-mila.github.io/TGX/"><img src="https://img.shields.io/badge/docs-orange"></a>
  8 | 	<a href="https://github.com/ComplexData-MILA/TGX"><img src="https://img.shields.io/badge/Github-link-lightgrey"></a>
  9 | </h4> 
 10 | 
 11 | TGX supports all datasets from [TGB](https://tgb.complexdatalab.com/) and [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg) as well as any custom dataset in `.csv` format. 
 12 | TGX provides numerous temporal graph visualization plots and statistics out of the box.
 13 | 
 14 | 
 15 | ### Data Loading ###
 16 | For detailed tutorial on how to load the datasets into `tgx.Graph`, see [`docs/tutorials/data_loader.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_loader.ipynb)
 17 | 
 18 | 1. Load TGB datasets
 19 | ```
 20 | import tgx
 21 | dataset = tgx.tgb_data("tgbl-wiki")
 22 | ctdg = tgx.Graph(dataset)
 23 | ```
 24 | 
 25 | 2. Load built-in datasets
 26 | ```
 27 | dataset = tgx.builtin.uci()
 28 | ctdg = tgx.Graph(dataset)
 29 | ```
 30 | 
 31 | 3. Load custom datasets from `.csv` 
 32 | ```
 33 | from tgx.io.read import read_csv
 34 | toy_fname = "docs/tutorials/toy_data.csv"
 35 | edgelist = read_csv(toy_fname, header=True,index=False, t_col=0,)
 36 | tgx.Graph(edgelist=edgelist)
 37 | ```
 38 | 
 39 | ### Visualization and Statistics ###
 40 | For detailed tutorial on how to generate visualizations and compute statistics for temporal graphs, see [`docs/tutorials/data_viz_stats.ipynb`](https://github.com/ComplexData-MILA/TGX/blob/master/docs/tutorials/data_viz_stats.ipynb)
 41 | 
 42 | 1. Discretize the network (required for viz)
 43 | 
 44 | ```
 45 | dataset = tgx.builtin.uci()
 46 | ctdg = tgx.Graph(dataset)
 47 | time_scale = "weekly"
 48 | dtdg, ts_list = ctdg.discretize(time_scale=time_scale, store_unix=True)
 49 | ```
 50 | 
 51 | 2. Plot the number of nodes over time
 52 | 
 53 | ```
 54 | tgx.degree_over_time(dtdg, network_name="uci")
 55 | ```
 56 | 
 57 | 3. Compute novelty index
 58 | ```
 59 | tgx.get_novelty(dtdg)
 60 | ```
 61 | 
 62 | 
 63 | ### Install dependency
 64 | Our implementation works with python >= 3.9 and can be installed as follows
 65 | 
 66 | 1. set up virtual environment (conda should work as well)
 67 | ```
 68 | python -m venv ~/tgx_env/
 69 | source ~/tgx_env/bin/activate
 70 | ```
 71 | 
 72 | 2. install external packages
 73 | ```
 74 | pip install -r requirements.txt
 75 | ```
 76 | 
 77 | 3. install local dependencies under root directory `/TGX`
 78 | <!-- ```
 79 | pip install -e py-tgx
 80 | ``` -->
 81 | ```
 82 | pip install -e .
 83 | ```
 84 | 
 85 | 
 86 | 
 87 | 3. [alternatively] install from test-pypi
 88 | 
 89 | ```
 90 | pip install -i https://test.pypi.org/simple/ py-tgx
 91 | ```
 92 | You can specify the version with `==`, note that the pypi version might not always be the most updated version
 93 | 
 94 | 
 95 | 4. [optional] install mkdocs dependencies to serve the documentation locally
 96 | ```
 97 | pip install mkdocs-glightbox
 98 | ```
 99 | 
100 | ### Creating new branch ###
101 | 
102 | first create the branch on github
103 | ```
104 | git fetch origin
105 | 
106 | git checkout -b test origin/test
107 | ```
108 | 


--------------------------------------------------------------------------------
/examples/.ipynb_checkpoints/test-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import tgx
 2 | import time
 3 | import numpy as np
 4 | # from tgx.utils.graph_utils import subsampling, train_test_split, edgelist_discritizer
 5 | from tgx.utils.graph_stat import get_novelty, get_avg_node_activity, get_reoccurrence, get_surprise
 6 | 
 7 | data_path = '/network/scratch/r/razieh.shirzadkhani/'
 8 | # dataset = tgx.data.reddit(root=data_path)
 9 | 
10 | 
11 | dataset = tgx.data.uci(root=data_path)
12 | data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
13 | # get_avg_node_activity(data)
14 | # print(dataset.name)
15 | # dataset = tgx.data.lastfm(root=data_path)
16 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
17 | # get_avg_node_activity(data)
18 | # print(dataset.name)
19 | # dataset = tgx.data.mooc(root=data_path)
20 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
21 | # get_avg_node_activity(data)
22 | # print(dataset.name)
23 | # dataset = tgx.data.canparl(root=data_path)
24 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
25 | # get_avg_node_activity(data)
26 | # print(dataset.name)
27 | # dataset = tgx.data.contacts(root=data_path)
28 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
29 | # get_avg_node_activity(data)
30 | # print(dataset.name)
31 | # dataset= tgx.data.enron(root=data_path)
32 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
33 | # get_avg_node_activity(data)
34 | # print(dataset.name)
35 | # dataset = tgx.data.flights(root=data_path)
36 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
37 | # get_avg_node_activity(data)
38 | # print(dataset.name)
39 | 
40 | # dataset = tgx.data.unvote(root=data_path)
41 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
42 | # get_avg_node_activity(data)
43 | # print(dataset.name)
44 | # dataset = tgx.data.uslegis(root=data_path)
45 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
46 | # get_avg_node_activity(data)
47 | # print(dataset.name)
48 | # dataset = tgx.data.social_evo(root=data_path)
49 | # data = tgx.read_edgelist(data=dataset, discretize=dataset.discretize, intervals=dataset.intervals)
50 | # get_avg_node_activity(data)
51 | # print(dataset.name)
52 | # print(dataset.name)
53 | # data = tgx.read_edgelist(data=dataset, discretize=True, intervals=dataset.intervals)
54 | # get_novelty(data)
55 | 
56 | # total_nodes = G.number_of_nodes()
57 | # plot_path = "./examples/plots/"
58 | # n_sampling = 1000
59 | # new_edges = subsampling(G, random_selection=True, N=n_sampling)
60 | # new_edges = edgelist_discritizer(new_edges, time_interval=50)
61 | # G.subsampled_graph = G._generate_graph(new_edges)
62 | # print(G.number_of_nodes(G.subsampled_graph))
63 | # tgx.nodes_and_edges_per_ts(G.subsampled_graph, plot_path=plot_path, network_name=dataset.name)
64 | # tgx.average_degree_per_ts(G.subsampled_graph, n_sampling, plot_path=plot_path, network_name=dataset.name)
65 | 
66 | 
67 | # TEA_path = "./examples/plots/TEA/1"
68 | 
69 | # tgx.TEA(data, filepath = TEA_path, network_name=dataset.name+'disc')
70 | # TET_path = "./examples/plots/TET/"
71 | # tgx.TET(data, filepath = TET_path, network_name=dataset.name)
72 | 
73 | # print("--- Total elapsed time: %s seconds ---" % (time.time() - start_time))
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/docs/contribute.md:
--------------------------------------------------------------------------------
 1 | <!-- # TGX -->
 2 | ![TGX logo](2023_TGX_logo.png)
 3 | 
 4 | # TGX Community Contribution Guidelines
 5 | 
 6 | TGX is a community driven project and we hope to continue to add new features to it so that it is useful for a wide range of analysis and scenarios. This guide documents the best way to make various types of contribution to TGX, including what is required before submitting a code change. Note that as the package evolves, this guideline will be improved as well, so feel free to check back here for more information in the near future too. 
 7 | 
 8 | Contributing to TGX is more than submitting code changes, we also welcome new research suggestions, inviting new users, testing releases and improving the documentation. Raising issues on Github to point out any issues or directions of improvement are also welcome. 
 9 | 
10 | 
11 | ## Reporting issues on Github
12 | 
13 | Creating issues on Github is a useful way to manage problems and identify priorities. When possible, please add appropriate tags to the issue you are creating. For example, if it is related to the documentation website or error in documentation, you can include the `documentation` tag. If it is related to package installation, you can add the `install` tag. Overall, combining multiple issues that are related into a single one to report is also helpful to avoid trackig too many issues. 
14 | 
15 | ### Bugs
16 | 
17 | Bug reports are useful when they are accompanied by ways to understand and reproduce the bug. When reporting a bug, try to include detailed steps on how you encountered the bug and how to reproduce it. If you would like to propose a fix to the bug, feel free to link it to a pull request on the issue as well.
18 | 
19 | ### Feedbacks and Improvements
20 | 
21 | Feedbacks and improvements on TGX are welcome. If you would like to see new temporal graph statistics or visualization added, feel free to reach out directly by [email](mailto:shenyang.huang@mail.mcgill.ca) or create an issue on Github. You are of course more than welcome to add visualization and statistics from your own paper as well. 
22 | 
23 | ## Preparing code changes
24 | 
25 | If you are interested in adding new features or fixing bugs in TGX, thanks for your help. This section walks you through on how to propose code changes to TGX. 
26 | 
27 | ### Deciding what to work on
28 | 
29 | The first step is to decide on which aspects you want to improve for TGX. The best way is to look at currently [active issues](https://github.com/ComplexData-MILA/TGX/issues) on Github. You can find the future improvements we are planning in the [Roadmap](https://github.com/ComplexData-MILA/TGX/issues/43), from there you can find detailed instructions on what each task means. You are also very welcome to fix any bugs that you encounters or someone else encounters by proposing pull request on the github. Once you have decided on what you want to fix, reach out to us on TG slack (check the most recent link to join on the [TG website](https://www.cs.mcgill.ca/~shuang43/rg.html)) or by [email](mailto:shenyang.huang@mail.mcgill.ca) and let's work on it together. 
30 | 
31 | ### setting up dev environment
32 | 
33 | First, identify the issue you want to solve and create a new branch linked to the issue. (see create a branch button on right side of the issue.) Install TGX to the latest version as instructed below.
34 | 
35 | 1. Set up virtual environment (conda should work as well).
36 | 
37 | ```
38 | python -m venv tgx_env/
39 | source tgx_env/bin/activate
40 | ```
41 | 
42 | 2. Upgrade pip (Optional)
43 | 	
44 | ```
45 | pip install --upgrade pip
46 | ```
47 | 
48 | 3. Install external packages
49 | 
50 | ```
51 | pip install -r requirements.txt
52 | ```
53 | 
54 | 4. Install local dependencies under root directory `/TGX`.
55 | 
56 | ```
57 | pip install -e .
58 | ```
59 | 
60 | 5. Install `mkdocs` dependencies to serve the documentation locally. 
61 | 
62 | ```
63 | pip install mkdocs mkdocs-material mkdocstrings-python mkdocs-glightbox mkdocs-jupyter ipython_genutils
64 | ```
65 | 
66 | 6. Switch to the branch you created from the issue (swap out `test` with name of your branch)
67 |     ```
68 |     git fetch origin
69 | 
70 |     git checkout -b test origin/test
71 |     ```
72 | 
73 | ### Creating Pull Request
74 | 
75 | please make sure you have tested your code before creating a pull request, also created documentation for any new functions that you have added. 
76 | Once you created the pull request, you can reach out for a code review on slack or by email.


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: Temporal Graph Analysis with TGX
  2 | site_url: https://shenyanghuang.github.io/TGX
  3 | nav:
  4 |     - Home: index.md
  5 |     - API: 
  6 |       - ReadWrite: 
  7 |         -  Read Data: io/io.md
  8 |       - Utils: 
  9 |         - Graph stats: utils/graph_stats.md
 10 |         - Graph Utils: utils/graph_utils.md
 11 |         - Plotting Utils: utils/plotting_utils.md
 12 |       - Visualization: 
 13 |         -  TEA Plot: viz/vis_tea.md
 14 |         -  TET Plot: viz/vis_tet.md
 15 |       - Classes: 
 16 |         - Graph: classes/graph.md
 17 |       - Data:
 18 |         - Builtin: data/builtin.md
 19 |         - TGB: data/tgb.md
 20 |     - Tutorials:
 21 |       - Load data: tutorials/data_loader.ipynb
 22 |       - Visualization and Statistics: tutorials/data_viz_stats.ipynb
 23 |     - Gallery:
 24 |       - Plots by type: 
 25 |         - TEA Plots: 
 26 |           - Builtin: gallery/0-tea-builtin.md
 27 |           - TGB: gallery/0-tea-tgb.md
 28 |         - TET Plots: 
 29 |           - Builtin: gallery/0-tet-builtin.md
 30 |           - TGB: gallery/0-tet-tgb.md
 31 |         - Average degree over time: 
 32 |           - Builtin: gallery/0-degree-builtin.md
 33 |           - TGB: gallery/0-degree-tgb.md
 34 |         - Node and Edge over time: 
 35 |           - Builtin: gallery/0-node_edge-builtin.md
 36 |           - TGB: gallery/0-node_edge-tgb.md
 37 |       - Plots by dataset: 
 38 |         - Builtin:
 39 |           - Reddit: gallery/reddit.md
 40 |           - MOOC: gallery/mooc.md
 41 |           - LastFM: gallery/lastfm.md
 42 |           - Enron: gallery/enron.md
 43 |           - Social Evo: gallery/socialevo.md
 44 |           - UCI: gallery/uci.md
 45 |           - Flights: gallery/flight.md
 46 |           - Can Parl: gallery/canparl.md
 47 |           - US Legis: gallery/uslegis.md
 48 |           - UN Vote: gallery/unvote.md
 49 |           - Contacts: gallery/contact.md
 50 |         - TGB:
 51 |           - tgbl-wiki: gallery/tgbl-wiki.md
 52 |           - tgbl-review: gallery/tgbl-review.md
 53 |           - tgbl-coin: gallery/tgbl-coin.md
 54 |           - tgbl-comment: gallery/tgbl-comment.md
 55 |           - tgbl-flight: gallery/tgbl-flight.md
 56 |           - tgbn-trade: gallery/tgbn-trade.md
 57 |           - tgbn-genre: gallery/tgbn-genre.md
 58 |           - tgbn-reddit: gallery/tgbn-reddit.md
 59 |           
 60 |     - Contribute: contribute.md
 61 |     - Github: 'https://github.com/ComplexData-MILA/TGX'
 62 | 
 63 | 
 64 | theme:
 65 |   logo: tgx_logo_no_txt.png
 66 |   name: material
 67 |   features:
 68 |     - navigation.tabs
 69 |     - toc.integrate
 70 |     - navigation.footer
 71 |     - navigation.indexes
 72 |     - navigation.sections
 73 |     - navigation.top
 74 |     - navigation.tracking
 75 |     - navigation.tabs.sticky
 76 |     - navigation.expand
 77 |     - search.suggest
 78 |     - search.highlight
 79 |     - content.tabs.link
 80 |     - content.code.annotation
 81 |     - content.code.copy
 82 |   language: en
 83 |   palette:
 84 |     - scheme: default
 85 |       toggle:
 86 |         icon: material/toggle-switch-off-outline 
 87 |         name: Switch to dark mode
 88 |       primary: navy
 89 |       accent: red
 90 |     - scheme: slate 
 91 |       toggle:
 92 |         icon: material/toggle-switch
 93 |         name: Switch to light mode    
 94 |       primary: blue
 95 |       accent: lime
 96 | 
 97 | 
 98 | plugins: 
 99 |   - search
100 | 
101 |   - glightbox:
102 |        touchNavigation: true
103 |        loop: false
104 |        effect: zoom
105 |        slide_effect: slide
106 |        width: 100%
107 |        height: auto
108 |        zoomable: true
109 |        draggable: true
110 |        skip_classes:
111 |          - custom-skip-class-name
112 |        auto_caption: false
113 |        caption_position: bottom
114 |   - mkdocstrings:
115 |       watch:
116 |         - tgx/
117 |       handlers:
118 |         python:
119 |           setup_commands:
120 |             - import sys
121 |             - sys.path.append("docs")
122 |             - sys.path.append("tgx")
123 |           selection:
124 |             new_path_syntax: true
125 |           rendering:
126 |             show_root_heading: false
127 |             heading_level: 3
128 |             show_root_full_path: false
129 | 
130 | 
131 |   - mkdocs-jupyter:
132 |       execute: false
133 | 
134 | markdown_extensions:
135 |   - pymdownx.arithmatex:
136 |       generic: true
137 |   - attr_list
138 |   - md_in_html
139 | 


--------------------------------------------------------------------------------
/tgx/utils/.ipynb_checkpoints/plotting_utils-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | def create_ts_list(start, end, metric=None, interval=None):
  7 |     if metric == "Unix" or metric == "unix" or metric == "UNIX":
  8 |         start = datetime.datetime.fromtimestamp(start).date()
  9 |         end = datetime.datetime.fromtimestamp(end).date()
 10 |         if interval == 'daily':
 11 |             date_list = pd.date_range(start = start, end = end, freq="D") 
 12 |         elif interval == "month":
 13 |             date_list = pd.date_range(start = start, end = end, freq="M")
 14 |         elif interval == "year":
 15 |             date_list = pd.date_range(start = start, end = end, freq="Y") 
 16 |         timelist = []
 17 |         for dates in date_list:
 18 |             timelist.append(dates.strftime("%Y/%m/%d"))
 19 |     else:
 20 |         timelist = list(range(start, end, interval))
 21 |     # print(timelist)
 22 |     return timelist
 23 | 
 24 |     
 25 | 
 26 | def plot_nodes_edges_per_ts(edges: list,
 27 |                             nodes: list, 
 28 |                             ts: list,
 29 |                             network_name: str, 
 30 |                             plot_path: str = None, 
 31 |                             ylabel_1: str = 'Edges per Timestamp',
 32 |                             ylabel_2: str = 'Nodes per Timestamp'):
 33 |     """
 34 |     Plot nodes and edges per timestamp in one figure
 35 |     Parameters:
 36 |         edges: A list containing number of edges per timestamp
 37 |         nodes: A list containing number of nodes per timestamp
 38 |         ts: list of timestamps
 39 |         network_name: Name of the network to be used in the output file name
 40 |         plot_path: Path to save the output figure
 41 |         ylabel_1: Label for the edges per timestamp line
 42 |         ylabel_2: Label for the nodes per timestamp line
 43 |     """
 44 |     fig = plt.figure(facecolor='w', figsize=(11, 6))
 45 |     ax1 = fig.add_subplot(111)
 46 |     ax2 = ax1.twinx()
 47 | 
 48 |     c1, = ax1.plot(ts, edges, color='black', lw=3, label=ylabel_1)
 49 |     c2, = ax2.plot(ts, nodes, color='gray', linestyle='dashed', lw=3, label=ylabel_2)
 50 |     curves = [c1, c2]
 51 |     ax1.legend(curves, [curve.get_label() for curve in curves], fontsize = 18)
 52 |     ax1.set_xlabel('Time', fontsize=20)
 53 |     ax1.set_ylabel(ylabel_1, fontsize=20)
 54 |     ax2.set_ylabel(ylabel_2, fontsize=20)
 55 |     ax1.tick_params(labelsize=20)
 56 |     ax2.tick_params(labelsize=20)
 57 |     ax1.set_ylim(0)
 58 |     ax2.set_ylim(0)
 59 |     ax1.set_xlim(0, len(ts)-1)
 60 |     if plot_path is not None:
 61 |         filename = f"{network_name}_node&edge_per_ts"
 62 |         plt.savefig(f'{plot_path}/{filename}')
 63 |     plt.show()
 64 | 
 65 | def plot_for_snapshots(data: list,  
 66 |                        filename: str, 
 67 |                        y_title: str, 
 68 |                        show_ave: bool=True, 
 69 |                        plot_path:str = None,
 70 |                        plot_title:str = None):
 71 |     '''
 72 |     Plot a variable for different timestamps
 73 |     Parameters:
 74 |         data: A list of desired variable to be plotted
 75 |         filename: Name of the output file name
 76 |         y_title: Title of the y axis
 77 |         show_ave: Whether to plot a line showing the average of the variable over all timestamps
 78 |         plot_path: The path to save the output file
 79 |     '''
 80 |     ts = list(range(0, len(data)))
 81 |     # plt.rcParams["font.family"] = "Times New Roman"
 82 |     fig = plt.figure(facecolor='w', figsize=(9,6))
 83 |     ax = fig.add_subplot(111)
 84 |     ax.plot(ts, data, color='black', lw=3)
 85 | 
 86 |     ax.set_xlabel('Time', fontsize=20)
 87 |     ax.set_ylabel(y_title, fontsize=20)
 88 |     ax.tick_params(labelsize=20)
 89 |     # ax.set_ylim(0, 7.5)
 90 |     ax.set_xlim(0, len(ts)-1)
 91 |     ax.set_title(plot_title, fontsize=20)
 92 |     if show_ave:
 93 |         ave_deg = [np.average(data) for i in range(len(ts))]
 94 |         ax.plot(ts, ave_deg, color='#ca0020', linestyle='dashed', lw=3)
 95 |     if plot_path is not None:
 96 |         plt.savefig(f'{plot_path}/{filename}')
 97 |     plt.show()
 98 | 
 99 | if __name__ == "__main__":
100 |     create_ts_list(86400, 86400*365, "unix", "month")
101 |     create_ts_list(2015, 2022, interval=2)


--------------------------------------------------------------------------------
/tgx/data/tgb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | Data_specifications = {
  4 |         'tgbl-wiki'     : {'discretize' : True,     'time_scale': 'daily'},
  5 |         'tgbl-review'   : {'discretize' : True,     'time_scale': 'yearly'},
  6 |         'tgbl-coin'     : {'discretize' : True,     'time_scale': 'weekly'},
  7 |         'tgbl-comment'  : {'discretize' : True,     'time_scale': 'monthly'},
  8 |         'tgbl-flight'   : {'discretize' : True,     'time_scale': 'monthly'},
  9 |         'tgbn-trade'    : {'discretize' : False,    'time_scale': None},
 10 |         'tgbn-genre'    : {'discretize' : True,     'time_scale': 'monthly'},
 11 |         'tgbn-reddit'   : {'discretize' : True,     'time_scale': 'monthly'},
 12 |         'tgbn-token'   : {'discretize' : True,     'time_scale': 'weekly'}
 13 |         }
 14 | 
 15 | class tgb_data(object):
 16 |     def __init__(self, dname: str, 
 17 |             edge_feat: bool = False,
 18 |             w: bool = False,
 19 |             edge_label: bool = False,
 20 |             edge_idxs: bool = False):
 21 |         """
 22 |         Data class for loading default (in-package) temporal datasets
 23 | 
 24 |         In order to use "tgb" datasets install tgb package
 25 |         for more detals visit here: https://tgb.complexdatalab.com/
 26 | 
 27 |         In order to use dgb datasets download and extract dataset file
 28 |         from here: https://zenodo.org/record/7213796#.Y1cO6y8r30o
 29 |         and locate them in ./data/ directory.
 30 |         """
 31 |         self.tgb(dname, 
 32 |                 edge_feat = edge_feat,
 33 |                 w = w,
 34 |                 edge_label = edge_label,
 35 |                 edge_idxs = edge_idxs)
 36 |         
 37 |         return
 38 | 
 39 |     @classmethod
 40 |     def tgb(self, dname: str, 
 41 |             edge_feat: bool = False,
 42 |             w: bool = False,
 43 |             edge_label: bool = False,
 44 |             edge_idxs: bool = False):
 45 |         """
 46 |         Load datasets from "tgb" package. To load these datasets you need to install tgb package.
 47 |         Parameters:
 48 |             dname: str, name of the dataset from the list:
 49 |                         ["tgbl-wiki", "tgbl-review", 
 50 |                         "tgbl-coin", "tgbl-comment", 
 51 |                         "tgbl-flight","tgbn-trade", 
 52 |                         "tgbn-genre", "tgbn-reddit"]
 53 |             edge_feat: list of edge features
 54 |             w: edge weights
 55 |             edge_label: edge labels
 56 |             edge_idxs: edge indexes
 57 | 
 58 |         """
 59 |         try:
 60 |             from tgb.linkproppred.dataset import LinkPropPredDataset
 61 |             from tgb.nodeproppred.dataset import NodePropPredDataset
 62 |         except:
 63 |             print("First install TGB package using 'pip install py-tgb'")
 64 | 
 65 |         if "tgbl" in dname:
 66 |             dataset = LinkPropPredDataset(name=dname, root="datasets", preprocess=True)
 67 |         elif "tgbn" in dname:
 68 |             dataset = NodePropPredDataset(name=dname, root="datasets", preprocess=True)
 69 |         else:
 70 |             raise ValueError("Invalid tgb dataset name")
 71 |         
 72 |         data = dataset.full_data
 73 |         data = np.array([data['sources'], data["destinations"], data["timestamps"]])
 74 |         self.data = np.transpose(data)
 75 | 
 76 |         if edge_feat:
 77 |             self.edge_feat = data['edge_feat']
 78 |         if w:
 79 |             self.w = data['w']
 80 |         if edge_label:
 81 |             self.edge_label = data['edge_label']
 82 |         if edge_idxs:
 83 |             self.edge_idxs = data['edge_idxs']
 84 | 
 85 |         if (dname in Data_specifications):
 86 |             self.discretize = Data_specifications[dname]['discretize']
 87 |             self.time_scale = Data_specifications[dname]['time_scale']
 88 |         else:
 89 |             self.discretize = False
 90 |             self.time_scale = None
 91 |         self.train_mask = dataset.train_mask
 92 |         self.val_mask = dataset.val_mask
 93 |         self.test_mask = dataset.test_mask
 94 |         self.name = dname
 95 | 
 96 |         return self
 97 | 
 98 | 
 99 |     def read_specifications(self, 
100 |                             data: type):
101 |         """
102 |         Load dataset specifications for dgb datasets
103 |         Parameters:
104 |             data: str, name of the dataset
105 |         """
106 |         self.name = data
107 |         self.discretize = Data_specifications[data]['discretize']
108 |         self.time_scale = Data_specifications[data]['time_scale']
109 |         return self


--------------------------------------------------------------------------------
/tgx/utils/plotting_utils.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import matplotlib.colors as mcolors
  6 | from matplotlib.ticker import MaxNLocator
  7 | 
  8 | def create_ts_list(start, end, metric=None, interval=None):
  9 |     if metric == "Unix" or metric == "unix" or metric == "UNIX":
 10 |         start = datetime.datetime.fromtimestamp(start).date()
 11 |         end = datetime.datetime.fromtimestamp(end).date()
 12 |         if interval == 'daily':
 13 |             date_list = pd.date_range(start = start, end = end, freq="D") 
 14 |         elif interval == "month":
 15 |             date_list = pd.date_range(start = start, end = end, freq="M")
 16 |         elif interval == "year":
 17 |             date_list = pd.date_range(start = start, end = end, freq="Y") 
 18 |         timelist = []
 19 |         for dates in date_list:
 20 |             timelist.append(dates.strftime("%Y/%m/%d"))
 21 |     else:
 22 |         timelist = list(range(start, end, interval))
 23 |     # print(timelist)
 24 |     return timelist
 25 | 
 26 |     
 27 | 
 28 | def plot_nodes_edges_per_ts(edges: list,
 29 |                             nodes: list, 
 30 |                             ts: list,
 31 |                             filename: str = None,
 32 |                             ylabel_1: str = 'Edges per Timestamp',
 33 |                             ylabel_2: str = 'Nodes per Timestamp'):
 34 |     """
 35 |     Plot nodes and edges per timestamp in one figure
 36 |     Parameters:
 37 |         edges: A list containing number of edges per timestamp
 38 |         nodes: A list containing number of nodes per timestamp
 39 |         ts: list of timestamps
 40 |         filename: Name of the output file name, containing the path
 41 |         ylabel_1: Label for the edges per timestamp line
 42 |         ylabel_2: Label for the nodes per timestamp line
 43 |     """
 44 |     fig = plt.figure(facecolor='w', figsize=(11, 6))
 45 |     ax1 = fig.add_subplot(111)
 46 |     ax2 = ax1.twinx()
 47 | 
 48 |     c1, = ax1.plot(ts, edges, color='black', lw=3, label=ylabel_1)
 49 |     c2, = ax2.plot(ts, nodes, color='gray', linestyle='dashed', lw=3, label=ylabel_2)
 50 |     curves = [c1, c2]
 51 |     ax1.legend(curves, [curve.get_label() for curve in curves], fontsize = 18)
 52 |     ax1.set_xlabel('Time', fontsize=20)
 53 |     ax1.set_ylabel(ylabel_1, fontsize=20)
 54 |     ax2.set_ylabel(ylabel_2, fontsize=20)
 55 |     ax1.tick_params(labelsize=20)
 56 |     ax2.tick_params(labelsize=20)
 57 |     ax1.set_ylim(0)
 58 |     ax2.set_ylim(0)
 59 |     ax1.set_xlim(0, len(ts)-1)
 60 |     if filename is not None:
 61 |         plt.savefig(f'{filename}')
 62 |     else:
 63 |         plt.show()
 64 | 
 65 | def plot_for_snapshots(data: list,  
 66 |                        y_title: str, 
 67 |                        filename: str = None, 
 68 |                        show_ave: bool=True, ):
 69 |     '''
 70 |     Plot a variable for different timestamps
 71 |     Parameters:
 72 |         data: A list of desired variable to be plotted
 73 |         y_title: Title of the y axis
 74 |         filename: Name of the output file name, containing the path
 75 |         show_ave: Whether to plot a line showing the average of the variable over all timestamps
 76 |     '''
 77 |     ts = list(range(0, len(data)))
 78 |     # plt.rcParams["font.family"] = "Times New Roman"
 79 |     fig = plt.figure(facecolor='w', figsize=(9,6))
 80 |     ax = fig.add_subplot(111)
 81 |     ax.plot(ts, data, color='black', lw=3)
 82 | 
 83 |     ax.set_xlabel('Time', fontsize=20)
 84 |     ax.set_ylabel(y_title, fontsize=20)
 85 |     ax.tick_params(labelsize=20)
 86 |     ax.set_xlim(0, len(ts)-1)
 87 |     if show_ave:
 88 |         ave_deg = [np.average(data) for i in range(len(ts))]
 89 |         ax.plot(ts, ave_deg, color='#ca0020', linestyle='dashed', lw=3)
 90 |     if filename is not None:
 91 |         plt.savefig(f'{filename}')
 92 |     else:
 93 |         plt.show()
 94 | 
 95 | 
 96 | def plot_density_map(data: list, 
 97 |                      y_title: str,
 98 |                      filename: str = None,):
 99 |     '''
100 |     Plot a density map using fig and ax
101 |     Parameters:
102 |         data: A list of desired variable to be plotted
103 |         y_title: Title of the y axis
104 |         filename: Name of the output file name, containing the path
105 |     '''
106 |     max_value = max(max(inner) for inner in data if inner)
107 |     c = np.zeros((max_value, len(data)))
108 | 
109 |     for i, row in enumerate(data):
110 |         for value in row:
111 |             c[value - 1][i] += 1
112 | 
113 |     # Plot
114 |     fig = plt.figure(facecolor='w', figsize=(9, 6))
115 |     ax = fig.add_subplot(111)
116 | 
117 |     norm = mcolors.Normalize(vmin=0, vmax=1)
118 |     cax = ax.imshow(c, cmap='viridis', interpolation='nearest', norm=norm)
119 |     cbar = fig.colorbar(cax)
120 |     cbar.set_label('Frequency')
121 | 
122 |     ax.set_title("Heatmap of Node Degrees Over Time")
123 |     ax.set_xlabel('Time', fontsize=20)
124 |     ax.set_ylabel(y_title, fontsize=20)
125 |     ax.tick_params(labelsize=20)
126 |     ax.xaxis.set_major_locator(MaxNLocator(integer=True))
127 | 
128 |     # Adjust the aspect ratio of the plot
129 |     ax.set_aspect('auto')
130 | 
131 |     if filename is not None:
132 |         plt.savefig(f'{filename}')
133 |     else:
134 |         plt.show()
135 | 
136 | if __name__ == "__main__":
137 |     create_ts_list(86400, 86400*365, "unix", "month")
138 |     create_ts_list(2015, 2022, interval=2)


--------------------------------------------------------------------------------
/tgx/data/builtin.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import zipfile
  3 | import requests
  4 | import os
  5 | 
  6 | 
  7 | 
  8 | __all__ = ["data"]
  9 | 
 10 | root_path = "."
 11 | 
 12 | 
 13 | DataPath={
 14 |     'USLegis'   : "/data/USLegis/ml_USLegis.csv",
 15 |     'CanParl'   : "/data/CanParl/ml_CanParl.csv",
 16 |     'UNtrade'   : "/data/UNtrade/ml_UNtrade.csv",
 17 |     'UNvote'    : "/data/UNvote/ml_UNvote.csv",
 18 |     'reddit'    : "/data/reddit/ml_reddit.csv",
 19 |     'Wikipedia' : "/data/wikipedia/ml_wikipedia.csv",
 20 |     'enron'     : "/data/enron/ml_enron.csv",
 21 |     'mooc'      : "/data/mooc/ml_mooc.csv",
 22 |     'uci'       : "/data/uci/ml_uci.csv",
 23 |     'SocialEvo' : "/data/SocialEvo/ml_SocialEvo.csv",
 24 |     'Flights'   : "/data/Flights/ml_Flights.csv",
 25 |     'lastfm'    : "/data/lastfm/ml_lastfm.csv",
 26 |     'Contacts'  : "/data/Contacts/ml_Contacts.csv"
 27 |     }
 28 | 
 29 | Data_specifications = {
 30 |         'USLegis'       : {'discretize' : False,    'time_scale': None},
 31 |         'CanParl'       : {'discretize' : False,    'time_scale': None},
 32 |         'UNvote'        : {'discretize' : False,    'time_scale': None},
 33 |         'reddit'        : {'discretize' : True,     'time_scale': 'daily'},
 34 |         'enron'         : {'discretize' : True,     'time_scale': 'monthly'},
 35 |         'mooc'          : {'discretize' : True,     'time_scale': 'daily'},
 36 |         'uci'           : {'discretize' : True,     'time_scale': 'weekly'},
 37 |         'SocialEvo'     : {'discretize' : True,     'time_scale': 'weekly'},
 38 |         'Flights'       : {'discretize' : False,     'time_scale': 121},
 39 |         'Contacts'      : {'discretize' : True,     'time_scale': 'daily'},
 40 |         'lastfm'        : {'discretize' : True,     'time_scale': 'monthly'}
 41 |         }
 42 | 
 43 | def download(url: str, output_path: str):
 44 |     get_response = requests.get(url,stream=True)
 45 |     file_name  = url.split("/")[-1]
 46 |     fpath = output_path + "/" + file_name
 47 |     with open(fpath, 'wb') as f:
 48 |         for chunk in get_response.iter_content(chunk_size=1024):
 49 |             if chunk: # filter out keep-alive new chunks
 50 |                 f.write(chunk)
 51 |     return fpath
 52 | 
 53 | 
 54 | 
 55 | class builtin(object):
 56 |     def __init__(self):
 57 |         """
 58 |         Data class for loading default (in-package) temporal datasets
 59 | 
 60 |         In order to use "tgb" datasets install tgb package
 61 |         for more detals visit here: https://tgb.complexdatalab.com/
 62 | 
 63 |         In order to use dgb datasets download and extract dataset file
 64 |         from here: https://zenodo.org/record/7213796#.Y1cO6y8r30o
 65 |         and locate them in ./data/ directory.
 66 |         """
 67 |         pass
 68 | 
 69 | 
 70 |     def read_specifications(self, 
 71 |                             data: type):
 72 |         """
 73 |         Load dataset specifications for dgb datasets
 74 |         Parameters:
 75 |             data: str, name of the dataset
 76 |         """
 77 |         self.name = data
 78 |         self.path = DataPath[data]
 79 |         # self.header = Data_specifications[data]['header']
 80 |         # self.index = Data_specifications[data]['index']
 81 |         self.discretize = Data_specifications[data]['discretize']
 82 |         self.time_scale = Data_specifications[data]['time_scale']
 83 |         return self
 84 |     
 85 |     def load_dgb_data(self):
 86 |         try:
 87 |             data = pd.read_csv(f"{self.root}{self.path}", index_col=0)
 88 |         except:
 89 |             self.download_file(self)
 90 |             data = pd.read_csv(f"{self.root}{self.path}", index_col=0)
 91 | 
 92 |         self.data =  data.iloc[:, 0:3].to_numpy()
 93 |         return self
 94 |     
 95 |     def download_file(self):
 96 | 
 97 |         print("Data missing, download recommended!")
 98 |         inp = input('Will you download the dataset(s) now? (y/N)\n').lower()
 99 |         url = f"https://zenodo.org/record/7213796/files/{self.name}.zip"
100 |         path_download = f"./data" 
101 |         print(path_download)
102 |         print(url)
103 |         if inp == 'y':
104 |             if not os.path.exists(path_download):
105 |                 os.mkdir(path_download)
106 |                 print("Folder %s created!" % path_download)
107 | 
108 |             print(f"Downloading {self.name} dataset . . .")
109 |             zip_path = download(url, path_download)
110 |             with zipfile.ZipFile(zip_path, "r") as f:
111 |                 f.extractall(path_download)
112 |             print("Download completed")
113 | 
114 |         else:
115 |             print("Download cancelled")
116 | 
117 | 
118 |     @classmethod
119 |     def mooc(self, root=root_path):
120 |         data = "mooc"
121 |         self.root = root
122 |         self.read_specifications(self, data)
123 |         self.load_dgb_data(self)
124 |         return self
125 |     
126 |     @classmethod
127 |     def uci(self, root=root_path):
128 |         data = "uci"
129 |         self.root = root
130 |         self.read_specifications(self, data)
131 |         self.load_dgb_data(self)
132 |         return self
133 | 
134 |     @classmethod   
135 |     def uslegis(self, root=root_path):
136 |         data = "USLegis"
137 |         self.root = root
138 |         self.read_specifications(self, data)
139 |         self.load_dgb_data(self)
140 |         return self
141 |     
142 |     @classmethod
143 |     def canparl(self, root=root_path):
144 |         data = "CanParl"
145 |         self.root = root
146 |         self.read_specifications(self, data)
147 |         self.load_dgb_data(self)
148 |         return self
149 |     
150 |     @classmethod
151 |     def untrade(self, root=root_path):
152 |         data = "UNtrade"
153 |         self.root = root
154 |         self.read_specifications(self, data)
155 |         self.load_dgb_data(self)
156 |         return self
157 |     
158 |     @classmethod
159 |     def unvote(self, root=root_path):
160 |         data = "UNvote"
161 |         self.root = root
162 |         self.read_specifications(self, data)
163 |         self.load_dgb_data(self)
164 |         return self
165 |     
166 |     @classmethod
167 |     def reddit(self, root=root_path):
168 |         data = "reddit"
169 |         self.root = root
170 |         self.read_specifications(self, data)
171 |         self.load_dgb_data(self)
172 |         return self
173 |     
174 |     @classmethod
175 |     def wikipedia(self, root=root_path):
176 |         data = "Wikipedia"
177 |         self.root = root
178 |         self.read_specifications(self, data)
179 |         self.load_dgb_data(self)
180 |         return self
181 |     
182 |     @classmethod
183 |     def enron(self, root=root_path):
184 |         data = "enron"
185 |         self.root = root
186 |         self.read_specifications(self, data)
187 |         self.load_dgb_data(self)
188 |         return self
189 |     
190 |     @classmethod
191 |     def social_evo(self, root=root_path):
192 |         data = "SocialEvo"
193 |         self.root = root
194 |         self.read_specifications(self, data)
195 |         self.load_dgb_data(self)
196 |         return self
197 |     
198 |     @classmethod
199 |     def flights(self, root=root_path):
200 |         data = "Flights"
201 |         self.root = root
202 |         self.read_specifications(self, data)
203 |         self.load_dgb_data(self)
204 |         return self
205 |     
206 |     @classmethod
207 |     def lastfm(self, root=root_path):
208 |         data = "lastfm"
209 |         self.root = root
210 |         self.read_specifications(self, data)
211 |         self.load_dgb_data(self)
212 |         return self
213 |     
214 |     @classmethod
215 |     def contacts(self, root=root_path):
216 |         data = "Contacts"
217 |         self.root = root
218 |         self.read_specifications(self, data)
219 |         self.load_dgb_data(self)
220 |         return self
221 | 


--------------------------------------------------------------------------------
/tgx/io/read.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import csv
  3 | import numpy as np
  4 | from typing import Optional, Union
  5 | # from tgx.datasets.data_loader import read_dataset
  6 | 
  7 | 
  8 | #  data: Optional[object] = None,
  9 | #  is_discretized: bool = False,
 10 | #  discretize: bool = False,
 11 | #  time_scale: Union[str, int, None] = None,
 12 | 
 13 | def read_csv(fname: Union[str, object] = None, 
 14 |              header: bool = False,
 15 |              index: bool = False,
 16 |              t_col: int = 2,) -> dict:
 17 |     
 18 |     """
 19 |     Read temporal edgelist and store it in a dictionary.
 20 |     Parameters:
 21 |         fname: directory of a dataset in .csv format or data object created from loading dgb/tgb datasets 
 22 |         header: whether first line of data file is header
 23 |         index: whether the first column is row indices
 24 |         t_col: column indext for timestamps (0 or 2)
 25 |         ts_sorted: if data are sorted based on timestamp
 26 | 
 27 |     Returns:
 28 |         temp_edgelist: A dictionary of edges and their frequency at each time interval
 29 |     """
 30 |     
 31 |     start_col = 0
 32 |     if index:
 33 |         start_col = 1
 34 |         t_col += 1
 35 | 
 36 |     if t_col < 2:
 37 |         u_col = t_col + 1
 38 |     else:
 39 |         u_col = start_col
 40 |     v_col = u_col + 1
 41 | 
 42 |     cols_to_read = [u_col, v_col, t_col]
 43 | 
 44 |     if (isinstance(fname, str)):
 45 |         return _load_edgelist(fname, cols_to_read, header=header)
 46 |     elif isinstance(fname, type) or isinstance(fname, object):
 47 |         return _datasets_edgelist_loader(fname.data) 
 48 |     else:
 49 |         raise TypeError("Invalid input")
 50 | 
 51 | 
 52 | def _load_edgelist(fname, columns, header):
 53 |     """
 54 |     read edges from the file and store them in a dictionary
 55 |     Parameters:
 56 |         fname: file address
 57 |         columns: order of the nodes and timestamp
 58 |         header: Whether the data file contains header
 59 |     """
 60 |     try:
 61 |         edgelist = open(fname, "r")
 62 |     except:
 63 |         raise FileNotFoundError("No such file or directory.")
 64 |     edgelist.readline()
 65 |     lines = list(edgelist.readlines())
 66 |     edgelist.close()
 67 | 
 68 |     u_idx, v_idx, ts_idx = columns
 69 |     temp_edgelist = {}
 70 |     unique_edges = {}
 71 |     edges_list = []
 72 |     total_edges = 0
 73 |     sorted = True
 74 |     previous_t = 0
 75 |     if header:
 76 |         first_line = 1
 77 |     else:
 78 |         first_line = 0
 79 |     for i in range(first_line, len(lines)):
 80 |         line = lines[i]
 81 |         values = line.split(',')
 82 |         t = int(float(values[ts_idx]))
 83 |         u = values[u_idx].strip()
 84 |         v = values[v_idx].strip()
 85 |         
 86 |         if i == first_line:
 87 |             curr_t = t
 88 | 
 89 |         # Check if the dataset is sorted
 90 |         if t < previous_t:
 91 |             sorted = False
 92 |         previous_t = t
 93 | 
 94 |         if t not in temp_edgelist:
 95 |             temp_edgelist[t] = {}
 96 |         if (u, v) not in temp_edgelist[t]:
 97 |             temp_edgelist[t][(u, v)] = 1
 98 |         else:
 99 |             temp_edgelist[t][(u, v)] += 1
100 | 
101 |         # temp_edgelist[t].append((u, v))
102 |         if (u,v) not in unique_edges:
103 |             unique_edges[(u, v)] = 1
104 |         total_edges += 1
105 |     # temp_edgelist[curr_t] = edges_list
106 |     
107 |     if sorted is False:
108 |         print("edgelist not sorted, sorting dataset...")
109 |         myKeys = list(temp_edgelist.keys())
110 |         myKeys.sort()
111 |         temp_edgelist = {i: temp_edgelist[i] for i in myKeys}
112 |         
113 |     print("Number of loaded edges: " + str(total_edges))
114 |     print("Number of unique edges:" , len(unique_edges.keys()))
115 |     print("Available timestamps: ", len(temp_edgelist.keys()))
116 |     return temp_edgelist
117 | 
118 | def _datasets_edgelist_loader(data) -> dict:
119 |     """
120 |     load built-in datasets and tgb datasets
121 |     """
122 |     temp_edgelist = {}
123 |     total_edges = 0
124 |     unique_edges = {}
125 |     first_line = 0
126 |     previous_t = 0
127 |     edges_list = []
128 |     sorted = True
129 |     for line in data:
130 |         u = line[0]
131 |         v = line[1]
132 |         t = int(float(line[2]))
133 |         if first_line == 0:
134 |             curr_t = t
135 |             first_line += 1
136 | 
137 |         # Check if the dataset is sorted
138 |         if t < previous_t:
139 |             sorted = False
140 |         previous_t = t
141 | 
142 |         if t != curr_t:
143 |             temp_edgelist[curr_t] = edges_list
144 |             edges_list = []
145 |             curr_t = t
146 | 
147 |         edges_list.append((u, v))
148 |         if (u,v) not in unique_edges:
149 |             unique_edges[(u, v)] = 1
150 |         total_edges += 1
151 |     temp_edgelist[curr_t] = edges_list
152 |     
153 |     if sorted is False:
154 |         print("Sorting dataset...")
155 |         myKeys = list(temp_edgelist.keys())
156 |         myKeys.sort()
157 |         temp_edgelist = {i: temp_edgelist[i] for i in myKeys}
158 | 
159 |     print("Number of loaded edges: " + str(total_edges))
160 |     print("Number of unique edges:" + str(len(unique_edges.keys())))
161 |     print("Available timestamps: ", len(temp_edgelist.keys()))
162 |     
163 |     return temp_edgelist
164 | 
165 | 
166 | def _load_edgelist_with_discretizer(
167 |         fname : str, 
168 |         columns : list, 
169 |         time_scale : Union[str , int] = 86400, 
170 |         header : Optional[bool] = True) -> dict:
171 |     """
172 |     load temporal edgelist into a dictionary
173 |     assumption: the edges are ordered in increasing order of their timestamp
174 |     '''
175 |     the timestamp in the edgelist is based cardinal
176 |     more detail see here: https://github.com/srijankr/jodie
177 |     need to merge edges in a period of time into an interval
178 |     86400 is # of secs in a day, good interval size
179 |     '''
180 |     """
181 |     # print("Info: Interval size:", interval_size)
182 |     edgelist = open(fname, "r")
183 |     edgelist.readline()
184 |     lines = list(edgelist.readlines())
185 |     edgelist.close()
186 | 
187 |     
188 |     u_idx, v_idx, ts_idx = columns
189 | 
190 |     if isinstance(time_scale, str):
191 |         if time_scale == "daily":
192 |             interval_size = 86400
193 |         elif time_scale == "weekly":
194 |             interval_size = 86400 * 7
195 |         elif time_scale == "monthly":
196 |             interval_size = 86400 * 30
197 |         elif time_scale == "yearly":
198 |             interval_size = 86400* 365
199 |     elif isinstance(time_scale, int):
200 |             last_line = lines[-1]
201 |             values = last_line.split(',')
202 |             total_time = float(values[ts_idx])
203 |             interval_size = int(total_time / (time_scale-1))
204 |     else:
205 |         raise TypeError("Invalid time interval")
206 | 
207 |     temporal_edgelist = {}
208 |     total_n_edges = 0
209 |     
210 |     if header:
211 |         first_line = 1
212 |     else:
213 |         first_line = 0
214 | 
215 | 
216 |     for i in range(first_line, len(lines)):
217 |             line = lines[i]
218 |             values = line.split(',')
219 | 
220 |             total_n_edges += 1
221 |             # values = line.strip().split(',')
222 |             u = values[u_idx]  # source node
223 |             v = values[v_idx]  # destination node
224 |             ts = float(values[ts_idx])  # timestamp
225 |             ts_bin_id = int(ts / interval_size)
226 |             if ts_bin_id not in temporal_edgelist:
227 |                 temporal_edgelist[ts_bin_id] = {}
228 |                 temporal_edgelist[ts_bin_id][(u, v)] = 1
229 |             else:
230 |                 if (u, v) not in temporal_edgelist[ts_bin_id]:
231 |                     temporal_edgelist[ts_bin_id][(u, v)] = 1
232 |                 else:
233 |                     temporal_edgelist[ts_bin_id][(u, v)] += 1
234 | 
235 |     print("Loading edge-list: Maximum timestamp is ", ts)
236 |     print("Loading edge-list: Maximum timestamp-bin-id is", ts_bin_id)
237 |     print("Loading edge-list: Total number of edges:", total_n_edges)
238 |     return temporal_edgelist
239 | 
240 | 
241 | 
242 | 
243 | 


--------------------------------------------------------------------------------
/docs/tutorials/data_loader.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Load built-in and ported datasets from TGB\n",
  8 |     "This tutorial shows you how to load built-in datasets\n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import tgx"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "### Access TGB datasets\n",
 25 |     "In order to load TGB datasets you should first install the TGB package:\n",
 26 |     "\n",
 27 |     "`pip install py-tgb`\n",
 28 |     "\n",
 29 |     "Then write name of the dataset in the parantheses:\n",
 30 |     "\n",
 31 |     "`tgx.data.tgb(\"name\")`\n",
 32 |     "\n",
 33 |     "The dataset names are as follow\n",
 34 |     "\n",
 35 |     "`tgbl-wiki`, `tgbl-review`, `tgbl-coin`, `tgbl-comment`, `tgbl-flight`\n",
 36 |     "\n",
 37 |     "`tgbn-trade`, `tgbn-genre`, `tgbn-reddit`"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "raw file found, skipping download\n",
 50 |       "Dataset directory is  /mnt/f/code/TGB/tgb/datasets/tgbl_wiki\n",
 51 |       "loading processed file\n",
 52 |       "Number of loaded edges: 157474\n",
 53 |       "Number of unique edges:18257\n",
 54 |       "Available timestamps:  152757\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "data_name = \"tgbl-wiki\" \n",
 60 |     "dataset = tgx.tgb_data(data_name) #tgb datasets\n",
 61 |     "ctdg = tgx.Graph(dataset)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Access other datasets\n",
 69 |     "\n",
 70 |     "To load built-in TGX datasets (from [Poursafaei et al. 2022](https://openreview.net/forum?id=1GVpwr2Tfdg)). You can write the name of the dataset instead of `datasest_name`:\n",
 71 |     "\n",
 72 |     "`tgx.data.dataset_name`\n",
 73 |     "\n",
 74 |     "The dataset names are as:\n",
 75 |     "\n",
 76 |     "`mooc`, `uci`, `uslegis`, `unvote`, `untrade`, `flight`, `wikipedia`, `reddit`, `lastfm`, `contact`, `canparl`, `socialevo`, `enron`"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 3,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "Number of loaded edges: 59835\n",
 89 |       "Number of unique edges:20296\n",
 90 |       "Available timestamps:  58911\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "dataset = tgx.builtin.uci()\n",
 96 |     "ctdg = tgx.Graph(dataset)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Custom Datasets"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "You can load your own custom dataset from `.csv` files and read it into a `tgx.Graph` object\n",
111 |     "\n",
112 |     "Let's start by loading a toy dataset into pandas and then visualize the rows"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 4,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/html": [
123 |        "<div>\n",
124 |        "<style scoped>\n",
125 |        "    .dataframe tbody tr th:only-of-type {\n",
126 |        "        vertical-align: middle;\n",
127 |        "    }\n",
128 |        "\n",
129 |        "    .dataframe tbody tr th {\n",
130 |        "        vertical-align: top;\n",
131 |        "    }\n",
132 |        "\n",
133 |        "    .dataframe thead th {\n",
134 |        "        text-align: right;\n",
135 |        "    }\n",
136 |        "</style>\n",
137 |        "<table border=\"1\" class=\"dataframe\">\n",
138 |        "  <thead>\n",
139 |        "    <tr style=\"text-align: right;\">\n",
140 |        "      <th></th>\n",
141 |        "      <th>time</th>\n",
142 |        "      <th>source</th>\n",
143 |        "      <th>destination</th>\n",
144 |        "    </tr>\n",
145 |        "  </thead>\n",
146 |        "  <tbody>\n",
147 |        "    <tr>\n",
148 |        "      <th>0</th>\n",
149 |        "      <td>0</td>\n",
150 |        "      <td>1</td>\n",
151 |        "      <td>2</td>\n",
152 |        "    </tr>\n",
153 |        "    <tr>\n",
154 |        "      <th>1</th>\n",
155 |        "      <td>0</td>\n",
156 |        "      <td>2</td>\n",
157 |        "      <td>1</td>\n",
158 |        "    </tr>\n",
159 |        "    <tr>\n",
160 |        "      <th>2</th>\n",
161 |        "      <td>0</td>\n",
162 |        "      <td>3</td>\n",
163 |        "      <td>1</td>\n",
164 |        "    </tr>\n",
165 |        "    <tr>\n",
166 |        "      <th>3</th>\n",
167 |        "      <td>1</td>\n",
168 |        "      <td>2</td>\n",
169 |        "      <td>2</td>\n",
170 |        "    </tr>\n",
171 |        "    <tr>\n",
172 |        "      <th>4</th>\n",
173 |        "      <td>1</td>\n",
174 |        "      <td>1</td>\n",
175 |        "      <td>2</td>\n",
176 |        "    </tr>\n",
177 |        "    <tr>\n",
178 |        "      <th>5</th>\n",
179 |        "      <td>1</td>\n",
180 |        "      <td>3</td>\n",
181 |        "      <td>1</td>\n",
182 |        "    </tr>\n",
183 |        "  </tbody>\n",
184 |        "</table>\n",
185 |        "</div>"
186 |       ],
187 |       "text/plain": [
188 |        "   time   source   destination\n",
189 |        "0     0        1             2\n",
190 |        "1     0        2             1\n",
191 |        "2     0        3             1\n",
192 |        "3     1        2             2\n",
193 |        "4     1        1             2\n",
194 |        "5     1        3             1"
195 |       ]
196 |      },
197 |      "execution_count": 4,
198 |      "metadata": {},
199 |      "output_type": "execute_result"
200 |     }
201 |    ],
202 |    "source": [
203 |     "import pandas as pd\n",
204 |     "toy_fname = 'toy_data.csv'\n",
205 |     "df = pd.read_csv(toy_fname)\n",
206 |     "df"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 5,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "name": "stdout",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "Number of loaded edges: 5\n",
219 |       "Number of unique edges: 4\n",
220 |       "Available timestamps:  2\n"
221 |      ]
222 |     },
223 |     {
224 |      "data": {
225 |       "text/plain": [
226 |        "<tgx.classes.graph.Graph at 0x7fde4755aca0>"
227 |       ]
228 |      },
229 |      "execution_count": 5,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "from tgx.io.read import read_csv\n",
236 |     "# header indicates if there is a header row at the top\n",
237 |     "# index whether the first column is row indices\n",
238 |     "# t_col indicates which column corresponds to timestamps\n",
239 |     "edgelist = read_csv(toy_fname, \n",
240 |     "         header=True,\n",
241 |     "         index=False,\n",
242 |     "         t_col=0,)\n",
243 |     "tgx.Graph(edgelist=edgelist)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "### Subsampling graphs\n",
251 |     "\n",
252 |     "To perform subsmpling graphs you should follow these steps:\n",
253 |     "\n",
254 |     "1. descritize the data\n",
255 |     "\n",
256 |     "2. create a graph object of data (G)\n",
257 |     "\n",
258 |     "3. subsample the graph by `tgx.utils.graph_utils.subsampling`\n",
259 |     "\n",
260 |     "4. create a new graph from the subsampled subgraph"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": 6,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       "Generate graph subsample...\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "from tgx.utils.graph_utils import subsampling\n",
278 |     "\n",
279 |     "sub_edges = subsampling(ctdg, selection_strategy=\"random\", N=1000) #N is # of nodes to be sampled \n",
280 |     "subgraph = tgx.Graph(edgelist=sub_edges)"
281 |    ]
282 |   }
283 |  ],
284 |  "metadata": {
285 |   "kernelspec": {
286 |    "display_name": "Python 3 (ipykernel)",
287 |    "language": "python",
288 |    "name": "python3"
289 |   },
290 |   "language_info": {
291 |    "codemirror_mode": {
292 |     "name": "ipython",
293 |     "version": 3
294 |    },
295 |    "file_extension": ".py",
296 |    "mimetype": "text/x-python",
297 |    "name": "python",
298 |    "nbconvert_exporter": "python",
299 |    "pygments_lexer": "ipython3",
300 |    "version": "3.9.6"
301 |   }
302 |  },
303 |  "nbformat": 4,
304 |  "nbformat_minor": 4
305 | }
306 | 


--------------------------------------------------------------------------------
/tgx/viz/TEA.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | from typing import Union, Optional
  4 | from tgx.utils.graph_utils import discretize_edges
  5 | from tgx.utils.plotting_utils import create_ts_list
  6 | __all__ = ["TEA"]
  7 | 
  8 | def TEA(
  9 |         temp_edgelist : Union[object, dict], 
 10 |         filepath : Optional[str] = ".",
 11 |         fig_size : tuple = (7,5),
 12 |         font_size : int = 20, 
 13 |         network_name : str = None,
 14 |         time_scale : Union[str, int] = None, 
 15 |         real_dates : bool = None,
 16 |         test_split : bool = False,
 17 |         density : bool = False
 18 |         ):
 19 |     r"""
 20 |     generating TEA plot
 21 |     
 22 |     Parameters:
 23 |         temp_edgelist: a dictionary of temporal edges or a dataset object.
 24 |         filepath: Path to save the TEA Plot.
 25 |         fig_size: Size of the figure to save.
 26 |         font_size: Size of the text in the figure.
 27 |         network_name: Name of the dataset to be used in the TEA plot file.
 28 |         time_scale: time_scale for discretizing data if already not done.
 29 |         real_dates: Whether to use the real dates from dataset.
 30 |         test_split: Whether show the test split on the plot.
 31 |         density: Whether to return edge density and edge frequency dictioneries.
 32 |     """
 33 |     if isinstance(temp_edgelist, object):
 34 |         if temp_edgelist.freq_data is None:
 35 |             temp_edgelist.count_freq()
 36 |         temp_edgelist = temp_edgelist.freq_data
 37 |     
 38 |     # check number of unique timestamps:
 39 |     unique_ts = list(temp_edgelist.keys())
 40 |     # if len(unique_ts) > max_time_scale:
 41 |     #     inp = input(f"There are {unique_ts} timestamps in the data.\nDo you want to discretize the data to 1000 timestamps?(y/n)").lower()
 42 |     #     if inp == "y":
 43 |     #         temp_edgelist = edgelist_discritizer(temp_edgelist,
 44 |     #                                             unique_ts,
 45 |     #                                             time_scale = max_time_scale)
 46 |     if time_scale is not None:
 47 |         temp_edgelist = discretize_edges(temp_edgelist,
 48 |                                         time_scale = time_scale)
 49 | 
 50 | 
 51 |     ts_edges_dist, ts_edges_dist_density, edge_frequency_dict = TEA_process_edgelist_per_timestamp(temp_edgelist)
 52 |     
 53 |     TEA_plot_edges_bar(ts_edges_dist, 
 54 |                        filepath = filepath, 
 55 |                        fig_size = fig_size, 
 56 |                        font_size = font_size, 
 57 |                        network_name=network_name,
 58 |                        real_dates = real_dates,
 59 |                        test_split = test_split)
 60 | 
 61 |     if density:
 62 |         return ts_edges_dist_density, edge_frequency_dict
 63 | 
 64 | 
 65 | 
 66 | def TEA_process_edgelist_per_timestamp(temp_edgelist):
 67 |     # generate distribution of the edges history
 68 |     unique_ts = list(temp_edgelist.keys())
 69 |     # unique_ts.sort()
 70 |     # print(f"There are {len(unique_ts)} timestamps.")
 71 | 
 72 |     # get node set & total number of nodes
 73 |     node_dict = {}
 74 |     for t, e_dict in temp_edgelist.items():
 75 |         for e, exist in e_dict.items():
 76 |             if e[0] not in node_dict:
 77 |                 node_dict[e[0]] = 1
 78 |             if e[1] not in node_dict:
 79 |                 node_dict[e[1]] = 1
 80 |     num_nodes = len(node_dict)
 81 |     num_e_fully_connected = num_nodes * (num_nodes - 1)
 82 | 
 83 |     edge_frequency_dict = {}  # how many times an edge is seen
 84 |     ts_edges_dist = []  # contains different features specifying the characteristics of the edge distribution over time
 85 |     ts_edges_dist_density = []
 86 |     for curr_t in unique_ts:
 87 |             
 88 |         # if curr_t < 2:
 89 |             # print("curr_t", curr_t)
 90 |             prev_ts = [ts for ts in unique_ts if ts < curr_t]
 91 |             edges_in_prev_ts = {}
 92 |             for bts in prev_ts:
 93 |                 edges_in_prev_ts.update(temp_edgelist[bts])
 94 |             
 95 |             curr_ts_edge_list = temp_edgelist[curr_t]
 96 |             for e in curr_ts_edge_list:
 97 |                 if e not in edge_frequency_dict:
 98 |                     edge_frequency_dict[e] = 1
 99 |                 else:
100 |                     edge_frequency_dict[e] += 1
101 | 
102 |             if len(curr_ts_edge_list) > 0:
103 |                 curr_ts_edges_dist = {'ts': curr_t,
104 |                                     'new': len([e for e in curr_ts_edge_list if e not in edges_in_prev_ts]),
105 |                                     'repeated': len([e for e in curr_ts_edge_list if e in edges_in_prev_ts]),
106 |                                     'not_repeated': len([e for e in edges_in_prev_ts if e not in curr_ts_edge_list]),
107 |                                     'total_curr_ts': len(curr_ts_edge_list),
108 |                                     'total_seen_until_curr_ts': len(edges_in_prev_ts) + len(curr_ts_edge_list)
109 |                                     }
110 |                 curr_ts_edges_dist_density = {'ts': curr_t,
111 |                                             'new': (curr_ts_edges_dist['new'] * 1.0) / num_e_fully_connected,
112 |                                             'repeated': (curr_ts_edges_dist['repeated'] * 1.0) / num_e_fully_connected,
113 |                                             'not_repeated': (curr_ts_edges_dist[
114 |                                                                 'not_repeated'] * 1.0) / num_e_fully_connected,
115 |                                             'total_curr_ts': (curr_ts_edges_dist[
116 |                                                                     'total_curr_ts'] * 1.0) / num_e_fully_connected,
117 |                                             'total_seen_until_curr_ts': (curr_ts_edges_dist[
118 |                                                                             'total_seen_until_curr_ts'] * 1.0) / num_e_fully_connected,
119 |                                             }
120 |             else:
121 |                 curr_ts_edges_dist = {'ts': curr_t,
122 |                                     'new': 0,
123 |                                     'repeated': 0,
124 |                                     'not_repeated': 0,
125 |                                     'total_curr_ts': 0,
126 |                                     'total_seen_until_curr_ts': len(edges_in_prev_ts) + len(curr_ts_edge_list)
127 |                                     }
128 |                 curr_ts_edges_dist_density = {'ts': curr_t,
129 |                                             'new': 0,
130 |                                             'repeated': 0,
131 |                                             'not_repeated': 0,
132 |                                             'total_curr_ts': 0,
133 |                                             'total_seen_until_curr_ts': 0,
134 |                                             }
135 |             ts_edges_dist.append(curr_ts_edges_dist)
136 |             ts_edges_dist_density.append(curr_ts_edges_dist_density)
137 |     #         print(len(edges_in_prev_ts))
138 |     # print(len(ts_edges_dist))
139 |             # print(edge_frequency_dict)
140 |             # break
141 |     return ts_edges_dist, ts_edges_dist_density, edge_frequency_dict
142 | 
143 | 
144 | def TEA_plot_edges_bar(ts_edges_dist: list, 
145 |                    filepath: str = ".", 
146 |                    fig_size: list = (9,5),
147 |                    font_size: int = 20,
148 |                    network_name: str = None,
149 |                    real_dates: list = None,
150 |                    time_scale: list = None,
151 |                    test_split: bool = False,
152 |                    show: bool =False):
153 |     r"""
154 |     Making TEA plot and save into pdf file.
155 |     Args:
156 |         ts_edges_dist: list of dictionaries containing the edge distribution over time.
157 |         filepath: Path to save the TEA Plot.
158 |         fig_size: Size of the figure to save.
159 |         font_size: Size of the text in the figure.
160 |         network_name: Name of the dataset to be used in the TEA plot file.
161 |         real_dates: list of real dates as ticks
162 |         time_scale: time_scale for discretizing data if already not done.
163 |         test_split: Whether show the test split on the plot.
164 |         show: Whether to show the plot.
165 |     """
166 |     
167 | 
168 |     ts_edges_dist_df = pd.DataFrame(ts_edges_dist, columns=['ts', 'new', 'repeated',
169 |                                                             'not_repeated',
170 |                                                             'total_curr_ts',
171 |                                                             'total_seen_until_curr_ts'])
172 |     
173 | 
174 |     ### Additional Stats ###
175 |     mean = ts_edges_dist_df.mean(axis=0)
176 |     # print("INFO: Network Name:", network_name)
177 |     # print("INFO: AVG. stats. over all timestamps: ", mean)
178 |     # print("INFO: ratio of avg.(new)/avg.(total_curr_ts): {:.2f}".format(mean['new'] / mean['total_curr_ts']))
179 |     ###
180 | 
181 |     fig, ax = plt.subplots(figsize=fig_size)  # lastfm, mooc, reddit, UNtrade, UNvote
182 |     plt.subplots_adjust(bottom=0.2, left=0.2)
183 |     font_size = font_size
184 |     ticks_font_size = 15
185 |     plt.yticks(fontsize=ticks_font_size)
186 |     plt.xticks(fontsize=ticks_font_size)
187 |     if real_dates is not None:
188 |         start = real_dates[0]
189 |         end = real_dates[1]
190 |         metric = real_dates[2]
191 |         create_ts_list(start, end, metric=metric, interval=time_scale)
192 |     else:
193 |         duration = ts_edges_dist_df['ts'].tolist()
194 |         timestamps = [i for i in range(len(duration))]
195 |     
196 |     new = ts_edges_dist_df['new'].tolist()
197 |     repeated = ts_edges_dist_df['repeated'].tolist()
198 |     # print(len(timestamps), repeated, new)
199 |     # plotting stuffs
200 |     # bar plot
201 |     plt.bar(timestamps, repeated, label='Repeated', color='#404040', alpha=0.4)
202 |     plt.bar(timestamps, new, label='New', bottom=repeated, color='#ca0020', alpha=0.8, hatch='//')
203 |     # test split line
204 |     if test_split:
205 |         plt.axvline(x=(timestamps[int(0.85 * len(timestamps))]), color="blue", linestyle="--", linewidth=2)
206 |         plt.text((timestamps[int(0.85 * len(timestamps))]), 0,
207 |                 'x', va='center', ha='center', fontsize=font_size, fontweight='heavy', color='blue')
208 | 
209 |     plt.margins(x=0)
210 |     plt.xlabel("Timestamp", fontsize=font_size)
211 |     plt.ylabel("Number of edges", fontsize=font_size)
212 |     plt.legend(fontsize = 13)
213 |     if filepath is not None:
214 |         plt.savefig(f"{filepath}/{network_name}_TEA.pdf")
215 |         print("plot saved as " + f"{filepath}/{network_name}_TEA.pdf")
216 |     if (show):
217 |         plt.show()
218 | 
219 | 
220 | 


--------------------------------------------------------------------------------
/tgx/classes/graph.py:
--------------------------------------------------------------------------------
  1 | # import networkx as nx
  2 | import copy
  3 | import csv
  4 | import numpy as np
  5 | from typing import Optional, Union
  6 | from tgx.utils.graph_utils import discretize_edges, frequency_count, subsampling
  7 | from tgx.io.read import read_csv
  8 | 
  9 | #TODO should contain a new property tracking the number of timestamps#TODO should contain a new property tracking the number of timestamps
 10 | class Graph(object):
 11 |     def __init__(self, 
 12 |                  dataset: Optional[object] = None, 
 13 |                  fname: Optional[str] = None,
 14 |                  edgelist: Optional[dict] = None):
 15 |         """
 16 |         Create a Graph object with specific characteristics
 17 |         Args:
 18 |             dataset: a dataset object
 19 |             edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}}
 20 |         """
 21 | 
 22 |         if dataset is not None:
 23 |             if isinstance(dataset, type) or isinstance(dataset,object):
 24 |                 data = read_csv(dataset) 
 25 |         elif fname is not None and isinstance(fname, str):
 26 |             data = read_csv(fname)
 27 |         elif edgelist is not None and isinstance(edgelist, dict):
 28 |             data = edgelist
 29 |         else:
 30 |             raise TypeError("Please enter valid input.")
 31 |         
 32 |         init_key = list(data.keys())[0]
 33 |         if isinstance(data[init_key], list):
 34 |             data = self._list2dict(data)
 35 |         self.data = data
 36 |         self.subsampled_graph = None
 37 |         self.freq_data = None
 38 |         self.id_map = None #a map from original node id to new node id based on their order of appearance
 39 | 
 40 |     def _list2dict(self, data) -> dict:
 41 |         r"""
 42 |         convert data into a dictionary of dictionary of temporal edges
 43 |         """
 44 |         new_data = {}
 45 |         for t in data.keys():
 46 |             edgelist = {}
 47 |             for u,v in data[t]:
 48 |                 edgelist[(u,v)] = 1
 49 |             new_data[t] = edgelist
 50 |         return new_data
 51 | 
 52 |     #TODO support edge features, edge weights, node features and more, currently supports, timestamp, source, destination
 53 |     def export_full_data(self):
 54 |         """
 55 |         convert self.data inot a dictionary of numpy arrays similar to TGB LinkPropPredDataset
 56 |         """
 57 |         num_edge = self.number_of_edges()
 58 |         sources = np.zeros(num_edge, dtype=np.int64)
 59 |         destinations = np.zeros(num_edge, dtype=np.int64)
 60 |         timestamps = np.zeros(num_edge, dtype=np.int64)
 61 |         idx = 0
 62 |         edgelist = self.data
 63 | 
 64 |         for ts, edge_data in edgelist.items():
 65 |             for u,v in edge_data.keys():
 66 |                 sources[idx] = u
 67 |                 destinations[idx] = v
 68 |                 timestamps[idx] = ts
 69 |                 idx += 1
 70 |         full_data = {
 71 |             "sources": sources,
 72 |             "destinations": destinations,
 73 |             "timestamps": timestamps,
 74 |         }
 75 |         return full_data
 76 | 
 77 |     def shift_time_to_zero(self) -> None:
 78 |         r"""
 79 |         shift all edges in the dataset to start with timestamp 0
 80 |         """
 81 |         min_t = list(self.data.keys())[0]
 82 |         new_data = {}
 83 |         for ts in self.data.keys():
 84 |             new_data[ts - min_t] = self.data[ts]
 85 |         self.data = new_data
 86 |         
 87 |     def discretize(self, 
 88 |                    time_scale: Union[str, int],
 89 |                    store_unix: bool = True,
 90 |                    freq_weight: bool = False) -> object:
 91 |         """
 92 |         discretize the graph object based on the given time interval
 93 |         Args:
 94 |             time_scale: time interval to discretize the graph
 95 |             store_unix: whether to store converted unix time in a list
 96 |             freq_weight: whether to weight the edges by frequency in the new graph object
 97 |         """
 98 |         new_G = copy.deepcopy(self)    
 99 |         # discretie differently based on # of intervals of time granularity
100 |         output = discretize_edges(self.data,
101 |                                     time_scale = time_scale,
102 |                                     store_unix = store_unix,
103 |                                     freq_weight = freq_weight)
104 |         disc_G = output[0]
105 |         new_G.data = disc_G
106 |         if (store_unix):
107 |             return new_G, output[1]
108 |         else:
109 |             return (new_G, None)
110 | 
111 |     def count_freq(self):
112 |         self.freq_data = frequency_count(self.data)
113 |         return self
114 | 
115 |     def subsampling(self, 
116 |                     node_list: Optional[list] = [], 
117 |                     random_selection: Optional[bool] = True, 
118 |                     N: Optional[int] = None) -> object:
119 |         new_G = copy.deepcopy(self) 
120 |         new_G.data = subsampling(new_G, node_list = node_list, random_selection=random_selection, N=N)
121 |         return new_G
122 | 
123 |     def number_of_edges(self) -> int:
124 |         r"""
125 |         Calculate total number of nodes present in an edgelist
126 |         """
127 |         edgelist = self.data
128 |         e_num = 0
129 |         for _, edges in edgelist.items():
130 |             e_num += len(edges)
131 |         
132 |         return e_num
133 | 
134 |     def unique_edges(self) -> int:
135 |         r"""
136 |         Calculate the number of unique edges
137 |         Parameters:
138 |         graph_edgelist: Dictionary containing graph data
139 |         """
140 |         unique_edges = {}
141 |         for _, e_list in self.data.items():
142 |             for e in e_list:
143 |                 if e not in unique_edges:
144 |                     unique_edges[e] = 1
145 |         return len(unique_edges)
146 |     
147 | 
148 |     def total_nodes(self) -> int:
149 |         r"""
150 |         Calculate total number of unique nodes present in an edgelist
151 |         """
152 |         edgelist = self.data
153 |         node_list = {}
154 |         for _, edge_data in edgelist.items():
155 |             for u,v in edge_data.keys():
156 |                 if u not in node_list:
157 |                     node_list[u] = 1
158 |                 if v not in node_list:
159 |                     node_list[v] = 1
160 |         return len(node_list)
161 |     
162 | 
163 |     def max_nid(self) -> int:
164 |         r"""
165 |         find the largest node ID in the dataset
166 |         """
167 |         edgelist = self.data
168 |         max_id = 0
169 |         for _, edge_data in edgelist.items():
170 |             for u,v in edge_data.keys():
171 |                 if u > max_id:
172 |                     max_id = u
173 |                 if v > max_id:
174 |                     max_id = v
175 |         return max_id #offset by 1
176 |     
177 |     def min_nid(self) -> int:
178 |         r"""
179 |         find the smallest node ID in the dataset
180 |         """
181 |         edgelist = self.data
182 |         min_id = 1000000000
183 |         for _, edge_data in edgelist.items():
184 |             for u,v in edge_data.keys():
185 |                 if u < min_id:
186 |                     min_id = u
187 |                 if v < min_id:
188 |                     min_id = v
189 |         return min_id #offset by 1
190 | 
191 |     
192 |     def map_nid(self) -> dict:
193 |         r"""
194 |         remap all node ids in the dataset to start from 0 and based on node order of appearance. Also updates self.data
195 |         Output: 
196 |             id_map: a dictionary mapping original node id to new node id
197 |         """
198 |         edgelist = self.data
199 |         id_map = {}
200 |         nid = 0
201 |         new_edgelist = {}
202 |         for ts, edge_data in edgelist.items():
203 |             new_edgelist[ts] = {}
204 |             for u,v in edge_data.keys():
205 |                 if u not in id_map:
206 |                     id_map[u] = nid
207 |                     nid += 1
208 |                 if v not in id_map:
209 |                     id_map[v] = nid
210 |                     nid += 1
211 |                 new_edgelist[ts][(id_map[u],id_map[v])] = edge_data[(u,v)]
212 |         self.data = new_edgelist
213 |         return id_map
214 | 
215 | 
216 |     def node_per_ts(self):
217 |         active_nodes = {}
218 |         for ts in range(len(self.data)):
219 |             edgelist_t = self.data[ts]
220 |             active_nodes.append(self.edgelist_node_count(edgelist_t))
221 |         return active_nodes
222 | 
223 |     def edgelist_node_count(self, edge_data: list):
224 |         node_list = {}
225 |         for edge in edge_data:
226 |             (u, v) = edge
227 |             if u not in node_list:
228 |                 node_list[u] = 1
229 |             if v not in node_list:
230 |                 node_list[v] = 1
231 |         return len(node_list.keys())
232 |     
233 |     def edgelist_node_list(self, edge_data: list):
234 |         node_list = {}
235 |         for edge in edge_data:
236 |             (u, v) = edge
237 |             if u not in node_list:
238 |                 node_list[u] = 1
239 |             if v not in node_list:
240 |                 node_list[v] = 1
241 |         return list(node_list.keys())
242 | 
243 |     def nodes_list(self) -> list:
244 |         r"""
245 |         Return a list of nodes present in an edgelist
246 |         """
247 |         node_list = {}
248 |         edgelist = self.data
249 |         for _, edge_data in edgelist.items():
250 |             for u,v in edge_data.keys():
251 |                 if u not in node_list:
252 |                     node_list[u] = 1
253 |                 if v not in node_list:
254 |                     node_list[v] = 1
255 |         self.node_list = list(node_list.keys())
256 |         return list(node_list.keys())
257 |     
258 |     def check_time_gap(self) -> bool:
259 |         r"""
260 |         Check whether the edgelist timestamps have gaps or not (increments bigger than 1)
261 |         Returns:
262 |             time_gap: a boolean indicating whether there is a time gap or not
263 |         """
264 |         time_gap = False
265 |         ts = list(self.data.keys())
266 |         for i in range(1, len(ts)):
267 |             if ts[i] - ts[i-1] > 1:
268 |                 time_gap = True
269 |                 return time_gap
270 |         return time_gap
271 |     
272 |     def save2csv(self,
273 |                  fname:str = "output") -> None:
274 |         r"""
275 |         Save the graph object in an edgelist format to a csv file
276 |         Args:
277 |             fname: name of the csv file to save the graph, no csv suffix needed
278 |         """
279 |         outname = fname + ".csv"
280 |         #iterate through all edges
281 |         with open(outname, 'w') as csvfile:
282 |             print ("saving to ", outname)
283 |             csvwriter = csv.writer(csvfile, delimiter=',')
284 |             csvwriter.writerow(['timestamp'] + ['source'] + ['destination'])
285 |             for t, edges_list in self.data.items():
286 |                 for edge in edges_list:
287 |                     (u, v) = edge
288 |                     csvwriter.writerow([t] + [u] + [v])        
289 |                     
290 |     
291 |     


--------------------------------------------------------------------------------
/tgx/utils/graph_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Union, Optional
  3 | 
  4 | __all__ = ["train_test_split",
  5 |            "discretize_edges",
  6 |            "subsampling",
  7 |            "node_list",
  8 |            "is_discretized",
  9 |            "frequency_count"]
 10 | 
 11 | SEC_IN_MIN = 60
 12 | SEC_IN_HOUR = 3600
 13 | SEC_IN_DAY = 86400
 14 | SEC_IN_WEEK = 86400 * 7
 15 | SEC_IN_MONTH = 86400 * 30
 16 | SEC_IN_YEAR = 86400 * 365
 17 | SEC_IN_BIYEARLY = 86400 * 365 * 2
 18 | 
 19 | # helper function to do ceiling divison, i.e. 5/2 = 3
 20 | def ceiling_division(n, d):
 21 |     q, r = divmod(n, d)
 22 |     return q + bool(r)
 23 | 
 24 | 
 25 | 
 26 | def discretize_edges(edgelist: dict,
 27 |                     time_scale: Union[int,str],
 28 |                     store_unix: Optional[bool] = False,
 29 |                     freq_weight: Optional[bool] = False) -> list:
 30 |     """
 31 |     util function for discretizing edgelist, expected timestamp on edges are unixtimestamp
 32 |     this func supports discretization of edge timestamp 
 33 |     1. by providing the number of intervals (int), it will equally divide the data into that number of intervals. Note that the last bin can have less duration than others.
 34 |     2. by providing a time granularity (str), it will divide the data into intervals based on the given granularity, i.e. "hourly", "daily", "weekly", "monthly", "yearly", the starting time of the dataset is consider the start of the first interval
 35 |     Parameters:
 36 |         edgelist: dict, dictionary of edges
 37 |         time_scale: int or str, time interval to discretize the graph
 38 |         store_unix: bool, whether to return the converted timestamps in unix format
 39 |         freq_weight: bool, whether to weight the edges based on their frequency
 40 |     Returns:
 41 |         output list: the first item in the list is always the updated edgelist (dict, dictionary of edges with discretized timestamps) and the second item is the converted timestamps in unix format (list) if store_unix is True
 42 |     """
 43 |     unique_ts = list(edgelist.keys())        
 44 |     total_time = unique_ts[-1] - unique_ts[0]
 45 | 
 46 |     #! adding intermediate hour and days, to remove
 47 | 
 48 |     if time_scale is not None:
 49 |         if isinstance(time_scale, int):
 50 |             interval_size = total_time // time_scale  #integer timestamp of the bin, discounting any bin that has a smaller duration than others
 51 |         elif isinstance(time_scale, str): 
 52 |             if time_scale == "minutely":
 53 |                 interval_size = SEC_IN_MIN
 54 |             elif time_scale == "hourly":
 55 |                 interval_size = SEC_IN_HOUR
 56 |             elif time_scale == "2hourly":
 57 |                 interval_size = 2*SEC_IN_HOUR
 58 |             elif time_scale == "4hourly":
 59 |                 interval_size = 4*SEC_IN_HOUR
 60 |             elif time_scale == "6hourly":
 61 |                 interval_size = 6*SEC_IN_HOUR
 62 |             elif time_scale == "12hourly":
 63 |                 interval_size = 12*SEC_IN_HOUR
 64 |             elif time_scale == "daily":
 65 |                 interval_size = SEC_IN_DAY
 66 |             elif time_scale == "2daily":
 67 |                 interval_size = 2*SEC_IN_DAY
 68 |             elif time_scale == "4daily":
 69 |                 interval_size = 4*SEC_IN_DAY
 70 |             elif time_scale == "weekly":
 71 |                 interval_size = SEC_IN_WEEK
 72 |             elif time_scale == "monthly":
 73 |                 interval_size = SEC_IN_MONTH
 74 |             elif time_scale == "yearly":
 75 |                 interval_size = SEC_IN_YEAR
 76 |             elif time_scale == "biyearly":
 77 |                 interval_size = SEC_IN_BIYEARLY
 78 |         else:
 79 |             raise TypeError("Invalid time interval")
 80 |     else:
 81 |         raise TypeError("Please provide a time interval")
 82 |     
 83 |     num_time_scale = ceiling_division(total_time, interval_size)    
 84 |     print(f'Discretizing data to {num_time_scale} timestamps...')
 85 | 
 86 |     updated_edgelist = {}
 87 | 
 88 |     if (store_unix):
 89 |         unix_dict = []
 90 |         start_time = int(unique_ts[0])
 91 | 
 92 |     for ts, edges_list in edgelist.items():
 93 |         #? no longer assume ts start with 0
 94 |         bin_ts = ceiling_division(ts-start_time, interval_size)  #will correctly put edges into the last bin
 95 | 
 96 |         for edge in edges_list:
 97 |             if bin_ts not in updated_edgelist:
 98 |                 updated_edgelist[bin_ts] = {edge: 1}
 99 |             else:
100 |                 if (not freq_weight):
101 |                     updated_edgelist[bin_ts][edge] = 1
102 |                 else:
103 |                     if (edge in updated_edgelist[bin_ts]):
104 |                         updated_edgelist[bin_ts][edge] += 1
105 |                     else:
106 |                         updated_edgelist[bin_ts][edge] = 1
107 |         
108 |         if (store_unix):
109 |             #! should use bin_ts here
110 |             unix_ts = start_time + bin_ts * interval_size
111 | 
112 |             # unix_ts = start_time + int(ts // interval_size) * interval_size #round to the nearest start time
113 |             unix_ts = int(unix_ts)
114 |             unix_dict.extend([unix_ts] * len(edges_list))
115 |     
116 |     output = [updated_edgelist]
117 |     if (store_unix):
118 |         output.append(unix_dict)
119 |     return output
120 | 
121 | def subsampling(graph: object, 
122 |                 node_list: Optional[list] = [], 
123 |                 selection_strategy: str = "random", 
124 |                 N: Optional[int] = 100
125 |                 ) -> dict:
126 |     """
127 |     Subsampling a part of graph by only monitoring the contacts from specific nodes' list
128 | 
129 |     Parameters:
130 |         graph: graph object
131 |         node_list: list, a set of nodes to extract their contacts from the graph
132 |         selection_strategy: str, currently supports random sampling
133 |         N: int, number of nodes to be randomly sampled from graph
134 |     
135 |     Returns:
136 |         new_edgelist: dict, a dictionary of edges corresponding to nodes in the node_list
137 |     """
138 |     print("Generate graph subsample...")
139 |     edgelist = graph.data
140 |     nodes = graph.nodes_list()
141 | 
142 |     if (len(node_list) == 0): #decide on selection strategy if nodelist not provided
143 |         if (selection_strategy == "random"):
144 |             node_list = list(np.random.choice(nodes, size = N, replace = False))
145 |         else:
146 |             raise ValueError("Selection strategy not supported", selection_strategy)
147 | 
148 |     new_edgelist = {}
149 |     for t, edge_data in edgelist.items():
150 |                 for (u,v), f in edge_data.items():
151 |                     if u in node_list or v in node_list:
152 |                         if t not in new_edgelist:
153 |                             new_edgelist[t] = {}
154 |                             new_edgelist[t][(u, v)] = f
155 |                         else:
156 |                             new_edgelist[t][(u, v)] = f
157 |     return new_edgelist
158 | 
159 | def frequency_count(edgelist: dict):
160 |     new_edgelist = {}
161 | 
162 |     for t, edges_list in edgelist.items():
163 |         for edge in edges_list:
164 |             (u, v) = edge
165 | 
166 |             # Check if this is the first edge occurning in this timestamp
167 |             if t not in new_edgelist: 
168 |                 new_edgelist[t] = {}
169 |                 new_edgelist[t][(u, v)] = 1
170 |                 
171 |             else:
172 |                 if (u, v) not in new_edgelist[t]:
173 |                     new_edgelist[t][(u, v)] = 1 # If the edge was not occured in this timestamp before
174 |                 else:
175 |                     new_edgelist[t][(u, v)] += 1 
176 |     
177 |     return new_edgelist
178 | 
179 | def node_list(dict_edgelist: dict) -> list:
180 | 
181 |     """
182 |     create a list of nodes from edgelist dictionary
183 |     """
184 |     node_list = {}
185 |     for _, edge_data in dict_edgelist.items():
186 |         for (u,v), _ in edge_data.items():
187 |             if u not in node_list:
188 |                 node_list[u] = 1
189 |             if v not in node_list:
190 |                 node_list[v] = 1
191 |     return list(node_list.keys())
192 | 
193 | 
194 | def train_test_split(data : dict, 
195 |                      val : bool = False,
196 |                      ratio : list = [85, 15]) -> dict:
197 |     """
198 |     Generate train/test split for the data
199 | 
200 |     Parameters:
201 |         data:dictionary of data
202 |         val: whether we want to have a validation split as well
203 |         ratio: list indication the ratio of the data in split. Sum of the list components should be 100.
204 | 
205 |     Returns:
206 |         two (train/test) or three (train/val/test) data dictionaries
207 |     """
208 |     sum = 0
209 |     for i in ratio:
210 |         sum += i
211 |     if sum != 100:
212 |         raise ValueError("invalid train/test split ratio. Sum of the ratios should be 100.")
213 |     
214 |     if val and len(ratio) != 3:
215 |         raise Exception("Provide train/val/test ratio")
216 |     elif not val and len(ratio) == 3:
217 |         print("Warning! Data is being splitted to train and test only!")
218 |     
219 |     data_len = len(data)
220 |     train_split = int(data_len * ratio[0] / 100)
221 |     train_data = {k: v for k, v in data.items() if k < train_split}
222 |     if val:
223 |         val_split = int(data_len * ratio[1] / 100) + train_split
224 |         val_data = {k: v for k, v in data.items() if train_split <= k < val_split}
225 |         test_data = {k: v for k, v in data.items() if val_split <= k <= data_len}
226 |         return train_data, val_data, test_data
227 |     
228 |     else:
229 |         test_data = {k: v for k, v in data.items() if train_split <= k <= data_len}
230 |         return train_data, test_data
231 |     
232 | 
233 | def is_discretized(edgelist: Optional[dict],
234 |                    max_timestamps: Optional[int] = 10000) -> bool:
235 |     r"""
236 |     Check if an edgelist is discretized or not.
237 |     """
238 |     timestamps = list(edgelist.keys())
239 |     discretized = True
240 |     if len(timestamps) > max_timestamps:
241 |         discretized = False
242 |     
243 |     return discretized
244 | 
245 | def list2csv(lst: list,
246 |              fname: str,
247 |              delimiter: str = ",",
248 |              fmt: str = '%i'):
249 |     out_list = np.array(lst)
250 |     np.savetxt(fname, out_list, delimiter=delimiter,  fmt=fmt)
251 | 
252 | 
253 | 
254 | 
255 | # def edgelist_discritizer(edgelist: dict,
256 | #                          time_scale: Union[str, int]):
257 | #     """
258 | #     util function for discretizing edgelist, expected timestamp on edges are unixtimestamp
259 | #     this func supports discretization in two different ways
260 | #     1. by providing the number of intervals (int), it will equally divide the data into that number of intervals. Note that the last bin can have less duration than others.
261 | #     2. by providing a time granularity (str), it will divide the data into intervals based on the given granularity, i.e. "hourly", "daily", "weekly", "monthly", "yearly"
262 | #     In the second way however, the intervals will be based on utc timezone (dividing into days, hours this way) thus both first bin and last bin can have last duration than others.
263 | 
264 | #     Parameters:
265 | #         edgelist: dict, dictionary of edges
266 | #         time_scale: str or int, time interval to discretize the graph
267 | #     Returns:
268 | #         updated_edgelist: dict, dictionary of edges with discretized timestamps
269 | #     """
270 |     
271 | #     unique_ts = list(edgelist.keys())
272 |         
273 | #     total_time = unique_ts[-1] - unique_ts[0]
274 | #     if time_scale is not None:
275 | #         if isinstance(time_scale, str):
276 | #             if time_scale == "hourly":
277 | #                 interval_size = SEC_IN_HOUR
278 | #             elif time_scale == "daily":
279 | #                 interval_size = SEC_IN_DAY
280 | #             elif time_scale == "weekly":
281 | #                 interval_size = SEC_IN_WEEK
282 | #             elif time_scale == "monthly":
283 | #                 interval_size = SEC_IN_MONTH
284 | #             elif time_scale == "yearly":
285 | #                 interval_size = SEC_IN_YEAR
286 | #         elif isinstance(time_scale, int):
287 | #             interval_size = int(total_time / (time_scale))
288 | #         else:
289 | #             raise TypeError("Invalid time interval")
290 | #     else:
291 | #         raise TypeError("Please provide a time interval")
292 | #     num_time_scale = int(total_time/interval_size)
293 | #     print(f'Discretizing data to {num_time_scale} timestamps...')
294 | #     # if num_time_scale == 0:
295 | #     #     print("Warning! Only one timestamp exist in the data.")
296 | 
297 | #     updated_edgelist = {}
298 | #     for ts, edges_list in edgelist.items():
299 | #         bin_ts = int(ts / interval_size)
300 | #         if bin_ts >= num_time_scale:
301 | #             bin_ts -= 1
302 | 
303 | #         for edge in edges_list:
304 | #             if bin_ts not in updated_edgelist:
305 | #                 updated_edgelist[bin_ts] = []
306 | #             updated_edgelist[bin_ts].append(edge)
307 | #     print("Discretization Done..!")
308 | #     return updated_edgelist
309 | 


--------------------------------------------------------------------------------
/tgx/viz/TET.py:
--------------------------------------------------------------------------------
  1 | # TET Plot
  2 | import numpy as np
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | from tqdm import tqdm
  6 | from typing import Union, Optional
  7 | import matplotlib.pyplot as plt
  8 | from tgx.utils.graph_utils import discretize_edges
  9 | 
 10 | 
 11 | # some parameters to be used for drawing
 12 | E_ABSENT = 0
 13 | E_PRESENCE_GENERAL = 1
 14 | E_SEEN_IN_TRAIN = 2
 15 | E_IN_TEST = 3
 16 | E_NOT_IN_TEST = 4
 17 | 
 18 | TEST_RATIO = 0.15
 19 | 
 20 | # new color controlling parameters; Date: Dec. 22, 2021
 21 | E_ONLY_TRAIN = 10
 22 | E_TRAIN_AND_TEST = 20
 23 | E_TRANSDUCTIVE = 30
 24 | E_INDUCTIVE = 40
 25 | 
 26 | 
 27 | #! should be merged graph class?
 28 | def TET(temp_edgelist : Union[object, dict],
 29 |         filepath: Optional[str] = ".", 
 30 |         time_scale : Union[str, int] = None,
 31 |         network_name : str = None,
 32 |         add_frame : bool = True,
 33 |         test_split : bool = False,
 34 |         figsize : tuple = (9, 5),
 35 |         axis_title_font_size : int = 20,
 36 |         ticks_font_size : int = 20,
 37 |         show: bool = True):
 38 |     r"""
 39 |     Generate TET plots
 40 |     Args:
 41 |         temp_edgelist: a dictionary of temporal edges or a dataset object.
 42 |         filepath: Path to save the TEA Plot.
 43 |         figsize: Size of the figure to save.
 44 |         axis_title_font_size: The font size of xis titles.
 45 |         ticks_font_size: Size of the text in the figure.
 46 |         add_frame: Add the frame to the plot.
 47 |         network_name: Name of the dataset to be used in the TEA plot file.
 48 |         time_scale: time_scale for discretizing data if already not done.
 49 |         test_split: Whether show the test split on the plot.
 50 |         max_time_scale: Maximum number of time_scale to discretize data.
 51 |         show: Whether to show the plot.
 52 |     """
 53 |     if isinstance(temp_edgelist, object):
 54 |         if temp_edgelist.freq_data is None:
 55 |             temp_edgelist.count_freq()
 56 |         temp_edgelist = temp_edgelist.freq_data
 57 |     
 58 |     # check number of unique timestamps:
 59 |     unique_ts = list(temp_edgelist.keys())
 60 |     # if len(unique_ts) > max_time_scale:
 61 |     #     inp = input(f"There are {unique_ts} timestamps in the data.\nDo you want to discretize the data to 1000 timestamps?(y/n)").lower()
 62 |     #     if inp == "y":
 63 |     #         temp_edgelist = edgelist_discritizer(temp_edgelist,
 64 |     #                                             unique_ts,
 65 |     #                                             time_scale = max_time_scale)
 66 |     if time_scale is not None:
 67 |         temp_edgelist = discretize_edges(temp_edgelist,
 68 |                                         time_scale = time_scale)
 69 |     
 70 |     edge_last_ts = generate_edge_last_timestamp(temp_edgelist)
 71 |     edge_idx_map = generate_edge_idx_map(temp_edgelist, edge_last_ts)
 72 |     idx_edge_map = {v: k for k, v in edge_idx_map.items()}  # key: edge index; value: actual edge (source, destination)
 73 |     print("Info: Number of distinct edges (from index-edge map): {}".format(len(idx_edge_map)))
 74 | 
 75 |     unique_ts_list = list(temp_edgelist.keys())
 76 |     e_presence_mat = generate_edge_presence_matrix(unique_ts_list, idx_edge_map, edge_idx_map, temp_edgelist)
 77 |     print("Info: edge-presence-matrix shape: {}".format(e_presence_mat.shape))
 78 |     # print(np.unique(e_presence_mat, return_counts=True))
 79 |     e_presence_mat, test_split_ts_value = process_presence_matrix(e_presence_mat, test_ratio_p=0.85)
 80 |     print("Info: edge-presence-matrix shape: {}".format(e_presence_mat.shape))
 81 |     # print(np.unique(e_presence_mat, return_counts=True))
 82 |     fig_param = set_fig_param(network_name, 
 83 |                               fig_name = filepath,
 84 |                               figsize = figsize,
 85 |                               axis_title_font_size = axis_title_font_size,
 86 |                               ticks_font_size = ticks_font_size)
 87 | 
 88 |     plot_edge_presence_matrix(e_presence_mat, test_split_ts_value, unique_ts_list, list(idx_edge_map.keys()),
 89 |                               fig_param, test_split = test_split, add_frames=add_frame, show=show)
 90 |     return 
 91 | 
 92 | 
 93 | def generate_edge_last_timestamp(edges_per_ts):
 94 |     """generates a dictionary containing the last timestamp of each edge"""
 95 |     edge_last_ts = {}
 96 |     for ts, e_list in edges_per_ts.items():
 97 |         for e in e_list:
 98 |             if e not in edge_last_ts:
 99 |                 edge_last_ts[e] = ts
100 |             else:
101 |                 edge_last_ts[e] = max(ts, edge_last_ts[e])
102 |     return edge_last_ts
103 | 
104 | 
105 | def generate_edge_idx_map(edges_per_ts, edge_last_ts):
106 |     """
107 |     generates index for edges according to two-level sorting policy:
108 |     1. the first level is based on their first appearance timestamp
109 |     2. the second level is based on their last appearance timestamp
110 |     """
111 |     edge_idx_map = {}  # key: actual edge (source, destination), value: edge index
112 |     distinct_edge_idx = 0
113 |     for ts, ts_e_list in edges_per_ts.items():
114 |         e_last_ts_this_timestamp = {}
115 |         for e in ts_e_list:
116 |             e_last_ts_this_timestamp[e] = edge_last_ts[e]
117 |         e_last_ts_this_timestamp = dict(sorted(e_last_ts_this_timestamp.items(), key=lambda item: item[1]))
118 |         for e in e_last_ts_this_timestamp:
119 |             if e not in edge_idx_map:
120 |                 edge_idx_map[e] = distinct_edge_idx
121 |                 distinct_edge_idx += 1
122 | 
123 |     return edge_idx_map
124 | 
125 | 
126 | def generate_edge_presence_matrix(unique_ts_list, idx_edge_map, edge_idx_map, edges_per_ts):
127 |     '''
128 |     Returns presence matrix with values 0 and 1 which indicate:
129 |     value = 0 : edge is not present in this timestamp
130 |     value = 1 : edge is present in this timestamp
131 | 
132 |     shape: (ts, total number of edges)
133 |     '''
134 |     num_unique_ts = len(unique_ts_list)
135 |     num_unique_edge = len(idx_edge_map)
136 |     e_presence_mat = np.zeros([num_unique_ts, num_unique_edge], dtype=np.int8)
137 |     unique_ts_list = np.sort(unique_ts_list)
138 | 
139 |     for x, ts in tqdm(enumerate(unique_ts_list)):
140 |         es_ts = edges_per_ts[ts]
141 |         for e in es_ts:
142 |             e_presence_mat[num_unique_ts - x - 1, edge_idx_map[e]] = E_PRESENCE_GENERAL
143 | 
144 |     return e_presence_mat
145 | 
146 | def process_presence_matrix(e_presence_matrix, test_ratio_p):
147 |     """
148 |     there are 4 types of edge presence:
149 |     1. only in train
150 |     2. in train and in test
151 |     3. in test and train (which is the number 2 but in later timestamps)
152 |     4. only in test
153 |     X: timestamp
154 |     Y: edge index
155 |     """
156 |     num_unique_ts = e_presence_matrix.shape[0]
157 |     num_unique_edges = e_presence_matrix.shape[1]
158 |     ts_idx_list = [i for i in range(num_unique_ts)]
159 | 
160 |     # generating timestamp list for train and test:
161 |     test_split_ts_value = int(np.quantile(ts_idx_list, test_ratio_p))
162 |     train_ts_list = [ts for ts in ts_idx_list if ts <= test_split_ts_value]  # any timestamp in train/validation split
163 |     test_ts_list = [ts for ts in ts_idx_list if ts > test_split_ts_value]  # test_split_ts_value is in train
164 | 
165 |     # first level processing: differentiate train set edges: 1) Only in train set, 2) in train & test set
166 |     print("First level processing: ")
167 |     print("Detecting edges present in train & test sets")
168 |     for tr_ts in tqdm(train_ts_list):
169 |         for eidx in range(num_unique_edges):
170 |             if e_presence_matrix[num_unique_ts - tr_ts - 1, eidx] == E_PRESENCE_GENERAL:
171 |                 for test_ts_idx in range(test_split_ts_value + 1, num_unique_ts):
172 |                     if e_presence_matrix[num_unique_ts - test_ts_idx - 1, eidx] == E_PRESENCE_GENERAL:  # if seen in
173 |                         # the test set
174 |                         e_presence_matrix[num_unique_ts - tr_ts - 1, eidx] = E_TRAIN_AND_TEST
175 |                         break
176 | 
177 |     # differentiate test set edges: 1) transductive (seen in train, repeating in test), 2) inductive (only in test)
178 |     print("Detecting transductive edges (seen in train, repeating in test)")
179 |     for ts in tqdm(test_ts_list):
180 |         for eidx in range(num_unique_edges):
181 |             if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL:
182 |                 for prev_ts_idx in range(test_split_ts_value, -1, -1):
183 |                     if e_presence_matrix[num_unique_ts - prev_ts_idx - 1, eidx] == E_TRAIN_AND_TEST:  # if seen in
184 |                         # the training set
185 |                         e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_TRANSDUCTIVE
186 |                         break
187 | 
188 |     # second level processing
189 |     print("Second level processing:")
190 |     print("Detecting edges 1) Only in train set, 2) only in test (inductive)")
191 |     for ts in tqdm(range(num_unique_ts)):
192 |         for eidx in range(num_unique_edges):
193 |             if ts <= test_split_ts_value:
194 |                 if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL:
195 |                     e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_ONLY_TRAIN
196 |             else:
197 |                 if e_presence_matrix[num_unique_ts - ts - 1, eidx] == E_PRESENCE_GENERAL:
198 |                     e_presence_matrix[num_unique_ts - ts - 1, eidx] = E_INDUCTIVE
199 | 
200 |     return e_presence_matrix, test_split_ts_value
201 | 
202 | 
203 | def plot_edge_presence_matrix(e_presence_mat, 
204 |                               test_split_ts_value, 
205 |                               unique_ts_list,
206 |                               idx_edge_list, 
207 |                               fig_param, 
208 |                               test_split = False, 
209 |                               add_frames=True,
210 |                               show=False):
211 |     print("Info: plotting edge presence heatmap for {} ...".format(fig_param.fig_name))
212 | 
213 |     fig, ax = plt.subplots(figsize=fig_param.figsize)
214 |     plt.subplots_adjust(bottom=0.3, left=0.2)
215 | 
216 |     # colors = ['white',  # E_ABSENCE
217 |     #           '#67a9cf',  # E_ONLY_TRAIN
218 |     #           '#ef8a62',  # E_TRAIN_AND_TEST
219 |     #           '#ef8a62',  # E_TRANSDUCTIVE
220 |     #           '#b2182b'  # E_INDUCTIVE
221 |     #           ]
222 |     if test_split:
223 |         colors = ['white',  # E_ABSENCE
224 |                 '#018571',  # E_ONLY_TRAIN    2c7bb6
225 |                 '#fc8d59',  # E_TRAIN_AND_TEST
226 |                 '#fc8d59',  # E_TRANSDUCTIVE
227 |                 '#b2182b'  # E_INDUCTIVE
228 |                 ]
229 |     else:
230 |         colors = ['white',
231 |                   '#ca0020',
232 |                   '#ca0020',
233 |                   '#ca0020',
234 |                   '#ca0020',]
235 |     # print(sns.color_palette(colors, as_cmap=True))
236 |     frame_color = "grey" # "#bababa"
237 |     time_split_color = "black"
238 |     axis_title_font_size = fig_param.axis_title_font_size
239 |     x_font_size = fig_param.ticks_font_size
240 |     y_font_size = fig_param.ticks_font_size
241 | 
242 |     ax = sns.heatmap(e_presence_mat, cmap=sns.color_palette(colors, as_cmap=True), cbar=False)
243 | 
244 |     # processing x-axis
245 |     x_gaps = np.linspace(0, len((idx_edge_list)), num=5)
246 |     x_labels = x_gaps / len(idx_edge_list)
247 |     x_labels = [int(100*x) for x in x_labels]
248 |     plt.xticks(x_gaps, x_labels, rotation=0, fontsize=x_font_size)
249 | 
250 |     # processing y-axis
251 |     t_gaps = np.linspace(0, len(unique_ts_list), num=5)
252 |     t_labels = [int(len(unique_ts_list) - tidx) for tidx in t_gaps]
253 |     plt.yticks(t_gaps, t_labels, rotation=90, fontsize=y_font_size)
254 | 
255 |     # axis & title
256 |     # plt.margins(x=0)
257 |     plt.xlabel("Percentage of observed edges", fontsize=axis_title_font_size)
258 |     plt.ylabel("Timestamp", fontsize=axis_title_font_size)
259 | 
260 |     # requirements for additional features
261 |     x_length = e_presence_mat.shape[1] - 1
262 |     y_length = e_presence_mat.shape[0] - 1
263 |     test_split_idx_value = y_length - test_split_ts_value
264 |     e_border_idx = 0
265 |     for e_idx in range(e_presence_mat.shape[1] - 1, -1, -1):
266 |         if e_presence_mat[y_length - test_split_ts_value, e_idx] != E_ABSENT:
267 |             e_border_idx = e_idx
268 |             break
269 | 
270 |     # rectangle for different parts of the dataset
271 |     if add_frames and test_split:
272 |         print("Info: Border edge index:", e_border_idx)
273 |         print("Info: Test split timestamp value:", test_split_ts_value)
274 |         rect_train = plt.Rectangle((0, y_length - test_split_ts_value + 0.085), e_border_idx, test_split_ts_value + 0.9,
275 |                                    fill=False, linewidth=2, edgecolor=frame_color)
276 |         rect_test_mayseen = plt.Rectangle((0, 0), e_border_idx, y_length - test_split_ts_value - 0.1,
277 |                                           fill=False, linewidth=2, edgecolor=frame_color)
278 |         rect_test_new = plt.Rectangle((e_border_idx, 0), x_length - e_border_idx,
279 |                                       y_length - test_split_ts_value - 0.1,
280 |                                       fill=False, linewidth=2, edgecolor=frame_color)
281 |         ax = ax or plt.gca()
282 |         ax.add_patch(rect_train)
283 |         ax.add_patch(rect_test_mayseen)
284 |         ax.add_patch(rect_test_new)
285 |     
286 |     elif add_frames:
287 |         ax.add_patch(plt.Rectangle((0, 0), x_length, y_length+1,
288 |                                           fill=False, linewidth=2, edgecolor=frame_color))
289 |     # test split horizontal line
290 |     if test_split:
291 |         plt.axhline(y=test_split_idx_value, color=time_split_color, linestyle="--", linewidth=2, label='x')
292 |         plt.text(x=0, y=test_split_idx_value, s='x', color=time_split_color, va='center', ha='center',
293 |                 fontsize=y_font_size, fontweight='heavy')
294 | 
295 |     if fig_param.fig_name is not None:
296 |         # print("Info: file name: {}".format(fig_param.fig_name))
297 |         plt.savefig(f"{fig_param.fig_name}/{fig_param.network_name}_TET.pdf")
298 |     plt.show()
299 |     print("Info: plotting done!")
300 | 
301 | def set_fig_param(network_name, fig_name = None,
302 |                   figsize = (9, 5),
303 |                   axis_title_font_size = 20,
304 |                   ticks_font_size = 22,
305 |                   axis_tick_gap = 20,
306 |                   timestamp_split_cross_mark_offset = 1):
307 | 
308 |     # if network_name in ['US Legislative', 'Canadian Vote', 'UN Trade', 'UN Vote']:
309 |     #     axis_tick_gap = axis_tick_gap * 0.35
310 | 
311 |     # elif network_name in ['Reddit', 'Wikipedia', 'UCI', 'Social Evo.', 'Flights', 'LastFM', 'MOOC']:
312 |     #     axis_tick_gap = axis_tick_gap * 0.5
313 | 
314 |     # elif network_name in ['Enron']:
315 |     #     axis_tick_gap = axis_tick_gap * 0.4
316 | 
317 |     fig_param = Fig_Param(network_name,
318 |                           fig_name,
319 |                           figsize, 
320 |                           axis_title_font_size,
321 |                           ticks_font_size,
322 |                           axis_tick_gap,
323 |                           timestamp_split_cross_mark_offset)
324 | 
325 |     return fig_param
326 | 
327 | class Fig_Param:
328 |     def __init__(self, network_name, fig_name, figsize, axis_title_font_size, ticks_font_size, axis_tick_gap,
329 |                  timestamp_split_cross_mark_offset):
330 |         self.network_name = network_name
331 |         self.fig_name = fig_name
332 |         self.figsize = figsize
333 |         self.axis_title_font_size = axis_title_font_size
334 |         self.ticks_font_size = ticks_font_size
335 |         self.axis_tick_gap = axis_tick_gap
336 |         self.timestamp_split_cross_mark_offset = timestamp_split_cross_mark_offset


--------------------------------------------------------------------------------
/tgx/utils/stat.py:
--------------------------------------------------------------------------------
  1 | from tgx.utils.plotting_utils import plot_for_snapshots, plot_nodes_edges_per_ts, plot_density_map
  2 | import networkx as nx
  3 | import numpy as np
  4 | from typing import List
  5 | 
  6 | __all__ = ["degree_over_time",
  7 |            "nodes_over_time",
  8 |            "edges_over_time",
  9 |            "nodes_and_edges_over_time",
 10 |            "get_avg_e_per_ts",
 11 |            "get_avg_degree",
 12 |            "get_num_timestamps",
 13 |            "get_num_unique_edges",
 14 |            "get_reoccurrence",
 15 |            "get_surprise",
 16 |            "get_novelty",
 17 |            "get_avg_node_activity",
 18 |            "connected_components_per_ts", 
 19 |            "size_connected_components",
 20 |            "get_avg_node_engagement", 
 21 |            "degree_density"]
 22 | 
 23 | #* helper functions
 24 | def _find(x, parent):
 25 |     if parent[x] == x:
 26 |         return x
 27 |     parent[x] = _find(parent[x], parent)  
 28 |     return parent[x]
 29 | 
 30 | 
 31 | def _merge(x, y, parent):
 32 |     root_x = _find(x, parent)
 33 |     root_y = _find(y, parent)
 34 | 
 35 |     if root_x != root_y:
 36 |         parent[root_x] = root_y  
 37 | 
 38 | 
 39 | def degree_over_time(graph: object,  
 40 |                     network_name: str,
 41 |                     filepath: str = "./") -> None:
 42 |     r'''
 43 |     Plot average degree per timestamp.
 44 |     Parameters:
 45 |      graph: Graph object created by tgx.Graph containing edgelist
 46 |      network_name: name of the graph to be used in the output file name
 47 |      filepath: path to save the output figure
 48 |     '''
 49 |     ave_degree = _calculate_average_degree_per_ts(graph)
 50 | 
 51 |     if network_name is not None:
 52 |         filename = f"{network_name}_ave_degree_per_ts"
 53 |     else:
 54 |         filename = "ave_degree_per_ts"
 55 |     plot_for_snapshots(ave_degree, y_title= "Average degree", filename=filepath+filename)    
 56 |     return 
 57 | 
 58 | 
 59 | 
 60 | def nodes_over_time(graph: object,  
 61 |                  network_name: str,
 62 |                  filepath: str = "./") -> None:
 63 | 
 64 |     r'''
 65 |     Plot number of active nodes per timestamp.
 66 |     Parameters:
 67 |      graph: Graph object created by tgx.Graph containing edgelist
 68 |      network_name: name of the graph to be used in the output file name
 69 |      filepath: path to save the output figure
 70 |     '''
 71 |     active_nodes = _calculate_node_per_ts(graph)
 72 |     if network_name is not None:
 73 |         filename = f"{network_name}_nodes_per_ts"
 74 |     else:
 75 |         filename = "nodes_per_ts"
 76 |     plot_for_snapshots(active_nodes, y_title="Number of nodes", filename=filepath+filename)
 77 |     return 
 78 | 
 79 | def edges_over_time(graph: object, 
 80 |                  network_name: str = None,
 81 |                  filepath: str = "./") -> None:
 82 |     r'''
 83 |     Plot number of edges per timestamp.
 84 |     Parameters:
 85 |      graph: Graph object created by tgx.Graph containing edgelist
 86 |      network_name: name of the graph to be used in the output file name
 87 |      filepath: path to save the output figure
 88 |     '''
 89 |     active_edges = _calculate_edge_per_ts(graph)
 90 |     if network_name is not None:
 91 |         filename = f"{network_name}_edges_per_ts"
 92 |     else:
 93 |         filename = "_edges_per_ts"
 94 |     plot_for_snapshots(active_edges, y_title="Number of edges", filename=filepath+filename)
 95 |     return 
 96 | 
 97 | def nodes_and_edges_over_time(graph: object, 
 98 |                            network_name: str ,
 99 |                            filepath: str = "./"):
100 |     r"""
101 |     Plot number of nodes per timestamp and number of edges per timestamp in one fiugre.
102 |     Parameters:
103 |      graph: Graph object created by tgx.Graph containing edgelist
104 |      network_name: name of the graph to be used in the output file name
105 |      filepath: path to save the output figure
106 |     """
107 |     print("Plotting number of nodes and edges per timestamp.")
108 |     edges = _calculate_edge_per_ts(graph)
109 |     nodes = _calculate_node_per_ts(graph)
110 |     ts = list(range(0, len(graph.data)))
111 |     if network_name is not None:
112 |         filename = f"{network_name}_node_and_edges_per_ts"
113 |     else:
114 |         filename = "node_and_edges_per_ts"
115 |     return plot_nodes_edges_per_ts(edges, nodes, ts, filename=filepath+filename)
116 | 
117 |     
118 | 
119 | def _calculate_average_degree_per_ts(graph):
120 |     total_nodes = graph.total_nodes()
121 |     total_ts = len(graph.data)
122 |     ave_degree = []
123 |     for ts in range(total_ts):
124 |         num_edges = len(graph.data[ts])
125 |         ave_degree.append(num_edges*2/ total_nodes)
126 |     return ave_degree
127 | 
128 | 
129 | def _calculate_node_per_ts(graph):
130 |     active_nodes = []
131 |     for ts in range(len(graph.data)):
132 |         active_nodes.append(graph.edgelist_node_count(graph.data[ts]))
133 |     return active_nodes
134 | 
135 | def _calculate_edge_per_ts(graph):
136 |     active_edges = []
137 |     for ts in range(len(graph.data)):
138 |         active_edges.append(len(graph.data[ts]))
139 |     return active_edges
140 | 
141 | def get_avg_e_per_ts(graph_edgelist: dict) -> float:
142 |     r"""
143 |     Calculate the average number of edges per timestamp
144 |     
145 |     Parameters:
146 |      graph: Graph object created by tgx.Graph containing edgelist
147 |     """
148 |     sum_num_e_per_ts = 0
149 |     unique_ts = list(graph_edgelist.keys())
150 |     for ts in unique_ts:
151 |         num_e_at_this_ts = 0
152 |         edge_at_this_ts = graph_edgelist[ts]
153 |         for e, repeat in edge_at_this_ts.items():
154 |             num_e_at_this_ts += repeat
155 |         sum_num_e_per_ts += num_e_at_this_ts
156 |     avg_num_e_per_ts = (sum_num_e_per_ts * 1.0) / len(unique_ts)
157 | 
158 |     print(f"INFO: avg_num_e_per_ts: {avg_num_e_per_ts}")
159 |     return avg_num_e_per_ts
160 | 
161 | 
162 | def get_avg_degree(graph: object) -> float:
163 |     r"""
164 |     Calculate average degree over the timestamps
165 |     Parameters:
166 |      graph: Graph object created by tgx.Graph containing edgelist
167 |     """
168 |     graph_edgelist = graph.data
169 |     degree_avg_at_ts_list = []
170 |     unique_ts = list(graph_edgelist.keys())
171 |     for ts in unique_ts:
172 |         e_at_this_ts = graph_edgelist[ts]
173 |         G = nx.MultiGraph()
174 |         for e, repeat in e_at_this_ts.items():
175 |             G.add_edge(e[0], e[1], weight=repeat)
176 |         nodes = G.nodes()
177 |         degrees = [G.degree[n] for n in nodes]
178 |         degree_avg_at_ts_list.append(np.mean(degrees))
179 | 
180 |     print(f"INFO: avg_degree: {np.mean(degree_avg_at_ts_list)}")
181 |     return np.mean(degree_avg_at_ts_list)
182 | 
183 | 
184 | def get_num_timestamps(graph_edgelist:dict) -> int:
185 |     r"""
186 |     Calculate the number of timestamps
187 |     Parameters:
188 |      graph: Graph object created by tgx.Graph containing edgelist
189 |     """
190 |     print(f"INFO: Number of timestamps: {len(graph_edgelist)}")
191 |     return len(graph_edgelist)
192 | 
193 | def get_num_unique_edges(graph: object) -> int:
194 |     r"""
195 |     Calculate the number of unique edges
196 |     Parameters:
197 |      graph: Graph object created by tgx.Graph containing edgelist
198 |     """
199 |     graph_edgelist = graph.data
200 |     unique_edges = {}
201 |     for ts, e_list in graph_edgelist.items():
202 |         for e in e_list:
203 |             if e not in unique_edges:
204 |                 unique_edges[e] = 1
205 |     print(f"INFO: Number of unique edges: {len(unique_edges)}")
206 |     return len(unique_edges)
207 | 
208 | 
209 | def _split_data_chronological(graph_edgelist: dict, test_ratio: int):
210 |     r"""
211 |     split the timestamped edge-list chronologically
212 |     """
213 |     # split the temporal graph data chronologically
214 |     unique_ts = np.sort(list(graph_edgelist.keys()))
215 |     test_split_time = list(np.quantile(unique_ts, [1 - test_ratio]))[0]
216 |     
217 |     # make train-validation & test splits
218 |     train_val_e_set, test_e_set = {}, {}
219 |     for ts, e_list in graph_edgelist.items():
220 |         for (u,v) in e_list:
221 |             
222 |             if ts < test_split_time:
223 |                 if (u,v) not in train_val_e_set:
224 |                     train_val_e_set[(u,v)] = 1
225 |             else:
226 |                 if (u,v) not in test_e_set:
227 |                     test_e_set[(u,v)] = 1
228 |     return train_val_e_set, test_e_set
229 | 
230 | def find(x, parent):
231 |     if parent[x] == x:
232 |         return x
233 |     parent[x] = find(parent[x], parent)  
234 |     return parent[x]
235 | 
236 | 
237 | def merge(x, y, parent):
238 |     root_x = find(x, parent)
239 |     root_y = find(y, parent)
240 | 
241 |     if root_x != root_y:
242 |         parent[root_x] = root_y  
243 | 
244 | def get_reoccurrence(graph:object, test_ratio: float=0.15) -> float:
245 |     r"""
246 |     Calculate the recurrence index
247 |     Parameters:
248 |         graph: Graph object created by tgx.Graph containing edgelist
249 |         test_ratio: The ratio to split the data chronologically
250 |     """
251 |     graph_edgelist = graph.data
252 |     train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio)
253 |     train_val_size = len(train_val_e_set)
254 |     # intersect = 0
255 |     # total_train_freq = 0
256 |     # for e, freq in train_val_e_set.items():
257 |     #     if freq > 1:
258 |     #         print(e)
259 |     #     total_train_freq += freq
260 |     #     if e in test_e_set:
261 |     #         intersect += freq
262 | 
263 |     # print(total_train_freq, intersect)
264 |     # reoccurrence = float(intersect * 1.0 / total_train_freq)
265 |     intersect = 0
266 |     for e in test_e_set:
267 |         if e in train_val_e_set:
268 |             intersect += 1
269 |     reoccurrence = float(intersect * 1.0 / train_val_size)
270 |     print(f"INFO: Reoccurrence: {reoccurrence}")
271 |     return reoccurrence
272 | 
273 | def get_surprise(graph, test_ratio: float = 0.15) -> float:
274 |     r"""
275 |     Calculate the surprise index
276 |     Parameters:
277 |         graph: Graph object created by tgx.Graph containing edgelist
278 |         test_ratio: The ratio to split the data chronologically
279 |     """
280 |     graph_edgelist = graph.data
281 |     train_val_e_set, test_e_set = _split_data_chronological(graph_edgelist, test_ratio)
282 |     test_size = len(test_e_set)
283 | 
284 |     difference = 0
285 |     # total_test_freq = 0
286 |     # for e, freq in test_e_set.items():
287 |     #     total_test_freq += freq
288 |     #     if e not in train_val_e_set:
289 |     #         difference += freq
290 |     # surprise = float(difference * 1.0 / total_test_freq)
291 | 
292 |     for e in test_e_set:
293 |         if e not in train_val_e_set:
294 |             difference += 1
295 |     surprise = float(difference * 1.0 / test_size)
296 |     print(f"INFO: Surprise: {surprise}")
297 |     return surprise
298 | 
299 | def get_novelty(graph : object) -> float:
300 |     r"""
301 |     Calculate the novelty index
302 |     Parameters:
303 |         graph: Graph object created by tgx.Graph containing edgelist
304 |     """
305 |     graph_edgelist = graph.data
306 |     unique_ts = np.sort(list(graph_edgelist.keys()))
307 |     novelty_ts = []
308 |     for ts_idx, ts in enumerate(unique_ts):
309 |         e_set_this_ts = set(list(graph_edgelist[ts]))
310 |         e_set_seen = []
311 |         for idx in range(0, ts_idx):
312 |             e_set_seen.append(list(graph_edgelist[unique_ts[idx]]))
313 |         e_set_seen = set(item for sublist in e_set_seen for item in sublist)
314 |         novelty_ts.append(float(len(e_set_this_ts - e_set_seen) * 1.0 / len(e_set_this_ts)))
315 | 
316 |     novelty = float(np.sum(novelty_ts) * 1.0 / len(unique_ts))
317 |     print(f"INFO: Novelty: {novelty}")
318 |     return novelty
319 | 
320 | 
321 | def get_avg_node_activity(graph: object) -> float:
322 |     r"""
323 |     Calculate the average node activity,
324 |         the proportion of time steps a node is present
325 |     Parameters:
326 |         graph: Graph object created by tgx.Graph containing edgelist
327 |     """
328 |     graph_edgelist = graph.data
329 |     num_unique_ts = len(graph_edgelist)
330 |     node_ts = {}
331 |     for ts, e_list in graph_edgelist.items():
332 |         for e in e_list:
333 |             # source
334 |             if e[0] not in node_ts:
335 |                 node_ts[e[0]] = {ts: True}
336 |             else:
337 |                 if ts not in node_ts[e[0]]:
338 |                     node_ts[e[0]][ts] = True
339 | 
340 |             # destination
341 |             if e[1] not in node_ts:
342 |                 node_ts[e[1]] = {ts: True}
343 |             else:
344 |                 if ts not in node_ts[e[1]]:
345 |                     node_ts[e[1]][ts] = True
346 | 
347 |     node_activity_ratio = []
348 |     for n, ts_list in node_ts.items():
349 |         node_activity_ratio.append(float(len(ts_list) * 1.0 / num_unique_ts))
350 | 
351 |     avg_node_activity = float(np.sum(node_activity_ratio) * 1.0 / len(node_activity_ratio))
352 |     print(f"INFO: Node activity ratio: {avg_node_activity}")
353 |     return avg_node_activity
354 | 
355 | 
356 | def get_avg_node_engagement(graph: object): 
357 |     r"""
358 |     get the average node engagement over time.
359 |     node engagement represents the average number of distinct nodes that establish
360 |     at least one new connection during each time step.
361 |     """
362 |     graph_edgelist = graph.data
363 |     engaging_nodes = []
364 |     previous_edges = set()
365 |     for ts, e_list in graph_edgelist.items():
366 |         node_set = set()
367 |         new_edges = {(u, v) for (u, v) in e_list if frozenset({u, v}) not in previous_edges}
368 |         for u, v in new_edges:
369 |             if u not in node_set:
370 |                 node_set.add(u)
371 |             if v not in node_set:
372 |                 node_set.add(v)
373 |         # engaging_nodes.append((ts, len(node_set)))
374 |         engaging_nodes.append(len(node_set))
375 |         previous_edges = {frozenset({u, v}) for (u, v) in e_list}        # Update the set of previous edges for the next timestamp
376 |     return engaging_nodes
377 | 
378 | def degree_density(graph: tuple, 
379 |                    k: int = 10, 
380 |                    network_name: str = None, 
381 |                    plot_path: str = "./") -> None:
382 |     r"""
383 |     Plot density map of node degrees per time window
384 |     Parameters:
385 |         graph_edgelist: Dictionary containing graph data
386 |         k: number of time windows
387 |         network_name: name of the graph to be used in the output file name
388 |         plot_path: path to save the output figure
389 |     """
390 |     graph_edgelist = graph.data
391 |     degrees_by_k_list = []
392 |     temp = []
393 |     temp_idx = 0
394 |     unique_ts = list(graph_edgelist.keys())
395 | 
396 |     for ts in unique_ts:
397 |         e_at_this_ts = graph_edgelist[ts]
398 |         G = nx.MultiGraph()
399 | 
400 |         for e in e_at_this_ts:
401 |             G.add_edge(e[0], e[1])
402 | 
403 |         nodes = G.nodes()
404 |         degrees = [G.degree[n] for n in nodes]
405 | 
406 |         if temp_idx<k:
407 |             temp.extend(degrees)
408 |             temp_idx += 1
409 |         else: 
410 |             degrees_by_k_list.append(temp)
411 |             temp = degrees
412 |             temp_idx = 1
413 | 
414 |     if temp:
415 |         degrees_by_k_list.append(temp)
416 | 
417 |     if network_name is not None:
418 |         filename = f"{network_name}_degree_density"
419 |     else:
420 |         filename = "_degree_density"
421 | 
422 |     plot_density_map(degrees_by_k_list, y_title="Node Degree", filename = plot_path + filename)
423 |     return 
424 | 
425 | 
426 | def connected_components_per_ts(graph: tuple,  
427 |                  network_name: str = None,
428 |                  plot_path: str = "./") -> None:
429 |     r"""
430 |     Plot number of connected components per timestamp
431 |     Parameters:
432 |         graph: a list containing graph snapshots
433 |         network_name: name of the graph to be used in the output file name
434 |         plot_path: path to save the output figure
435 |     """
436 |     num_components = []
437 |     for t in range(len(graph.data)):
438 |         edgelist_t = graph.data[t]
439 |         nodes_t = graph.edgelist_node_list(edgelist_t)
440 |         parent = {node: node for node in nodes_t} 
441 | 
442 |         for edge in edgelist_t:
443 |             (u, v) = edge
444 |             _merge(u, v, parent)
445 | 
446 |         num = 0
447 |         for u in nodes_t:
448 |             if parent[u] == u:
449 |                 num += 1       
450 |         num_components.append(num)  
451 | 
452 |     if network_name is not None:
453 |         filename = f"{network_name}_connected_components_per_ts"
454 |     else:
455 |         filename = "_connected_components_per_ts"
456 | 
457 |     plot_for_snapshots(num_components, y_title="Number of connected components", filename=plot_path+filename)
458 |     return 
459 | 
460 | 
461 | # TODO turn this into a plotting function as well, can return the computed stats
462 | def size_connected_components(graph: tuple) -> List[List]: 
463 |     r"""
464 |     Calculate the sizes of connected components per timestamp
465 |     Returns:
466 |         list[list]: A list containing lists of sizes of connected components for each timestamp.
467 |     """
468 |     component_sizes = []
469 |     for t in range(len(graph.data)):
470 |         edgelist_t = graph.data[t]
471 |         nodes_t = graph.edgelist_node_list(edgelist_t)
472 |         parent = {node: node for node in nodes_t} 
473 | 
474 |         for edge in edgelist_t:
475 |             (u, v) = edge
476 |             _merge(u, v, parent)
477 | 
478 |         component_sizes_t = {}
479 |         for u in nodes_t:
480 |             root = _find(u, parent)
481 |             if root not in component_sizes_t:
482 |                 component_sizes_t[root] = 0  
483 |             component_sizes_t[root] += 1  
484 |             
485 |         component_sizes_t_list = list(component_sizes_t.values())
486 |         component_sizes.append(component_sizes_t_list)
487 | 
488 |     return component_sizes
489 | 
490 | # TODO turn this into a plotting function as well, can return the computed stats
491 | def get_avg_node_engagement(graph: tuple) -> List[int]: 
492 |     r"""
493 |     Calculate the average node engagement per timestamp,
494 |         the average number of distinct nodes that establish
495 |         at least one new connection.
496 |     Parameters:
497 |         graph_edgelist: Dictionary containing graph data
498 |     """
499 |     engaging_nodes = []
500 |     previous_edges = set()
501 | 
502 |     for ts in range(len(graph.data)):
503 |         edgelist_t = graph.data[ts]
504 |         new_nodes = set()
505 | 
506 |         for edge in edgelist_t:
507 |             (u, v) = edge
508 |             if frozenset({u, v}) not in previous_edges:
509 |                 if u not in new_nodes:
510 |                     new_nodes.add(u)
511 |                 if v not in new_nodes:
512 |                     new_nodes.add(v)   
513 |                     
514 |         engaging_nodes.append(len(new_nodes))
515 |         previous_edges = {frozenset({u, v}) for (u, v) in edgelist_t}        # Update the set of previous edges for next timestamp
516 | 
517 |     return engaging_nodes


--------------------------------------------------------------------------------