├── netin ├── utils │ ├── __init__.py │ ├── io.py │ ├── constants.py │ └── validator.py ├── generators │ ├── tests │ │ ├── __init__.py │ │ ├── test_directed.py │ │ ├── test_undirected.py │ │ ├── test_patch.py │ │ └── test_dpah.py │ ├── __init__.py │ ├── dpa.py │ ├── pa.py │ ├── tch.py │ ├── patc.py │ ├── dpah.py │ ├── g_tc.py │ ├── dh.py │ ├── patch.py │ ├── tc.py │ ├── pah.py │ ├── h.py │ ├── undirected.py │ └── directed.py ├── algorithms │ ├── relational_classification │ │ ├── __init__.py │ │ ├── local │ │ │ └── __init__.py │ │ ├── inference │ │ │ └── __init__.py │ │ └── relational │ │ │ └── __init__.py │ ├── sampling │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── random_nodes.py │ │ ├── random_edges.py │ │ ├── random_neighbor.py │ │ ├── degree_rank.py │ │ ├── degree_group_rank.py │ │ ├── partial_crawls.py │ │ └── sampling.py │ └── __init__.py ├── viz │ ├── __init__.py │ └── constants.py ├── __init__.py └── stats │ ├── __init__.py │ ├── distributions.py │ ├── ranking.py │ └── networks.py ├── requirements ├── test.txt ├── docs.txt └── default.txt ├── setup.cfg ├── docs ├── source │ ├── netin-logo.png │ ├── visualizations.rst │ ├── graphs.rst │ ├── mechanisms.rst │ ├── algorithms.rst │ ├── statistics.rst │ ├── generators │ │ ├── directed.rst │ │ └── undirected.rst │ ├── algorithms │ │ └── sampling.rst │ ├── index.rst │ └── conf.py ├── Makefile └── make.bat ├── examples ├── undirected │ ├── pa.py │ ├── patc.py │ ├── pah.py │ └── patch.py └── directed │ ├── dpa.py │ ├── dh.py │ └── dpah.py ├── .gitignore ├── MANIFEST.in ├── .github └── workflows │ ├── documentation.yaml │ ├── release.yml │ └── python-app.yml ├── setup.py ├── CODE_OF_CONDUCT.rst ├── README.rst └── LICENSE /netin/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netin/generators/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netin/algorithms/relational_classification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- 1 | setuptools~=67.6.1 2 | pytest~=7.2.2 -------------------------------------------------------------------------------- /netin/algorithms/relational_classification/local/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netin/algorithms/relational_classification/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /netin/algorithms/relational_classification/relational/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description_file=README.rst 3 | license_files=LICENSE -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | setuptools~=67.6.1 2 | sphinx~=6.1.3 3 | sphinx-autobuild~=2021.3.14 -------------------------------------------------------------------------------- /docs/source/netin-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CSHVienna/NetworkInequalities/HEAD/docs/source/netin-logo.png -------------------------------------------------------------------------------- /docs/source/visualizations.rst: -------------------------------------------------------------------------------- 1 | Visualization 2 | ============= 3 | 4 | .. automodule:: netin.viz.handlers 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/source/graphs.rst: -------------------------------------------------------------------------------- 1 | Graphs 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Contents: 7 | :glob: 8 | 9 | generators/* 10 | -------------------------------------------------------------------------------- /docs/source/mechanisms.rst: -------------------------------------------------------------------------------- 1 | Mechanisms 2 | ========== 3 | 4 | .. autoclass:: netin.TriadicClosure 5 | :members: 6 | 7 | .. autoclass:: netin.Homophily 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/algorithms.rst: -------------------------------------------------------------------------------- 1 | Algorithms 2 | ========== 3 | 4 | .. automodule:: netin.sampling 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :caption: Contents: 9 | :glob: 10 | 11 | algorithms/* 12 | -------------------------------------------------------------------------------- /requirements/default.txt: -------------------------------------------------------------------------------- 1 | networkx~=3.1 2 | numpy~=1.24.2 3 | setuptools~=67.6.1 4 | matplotlib~=3.7.1 5 | powerlaw~=1.5 6 | pandas~=2.0.0 7 | seaborn~=0.12.2 8 | pqdm~=0.2.0 9 | joblib~=1.2.0 10 | scikit-learn~=1.2.2 11 | pytest~=7.2.2 12 | sympy~=1.11.1 -------------------------------------------------------------------------------- /docs/source/statistics.rst: -------------------------------------------------------------------------------- 1 | Statistics 2 | ========== 3 | 4 | .. automodule:: netin.stats.distributions 5 | :members: 6 | 7 | .. automodule:: netin.stats.networks 8 | :members: 9 | 10 | .. automodule:: netin.stats.ranking 11 | :members: 12 | -------------------------------------------------------------------------------- /examples/undirected/pa.py: -------------------------------------------------------------------------------- 1 | from netin import PA 2 | 3 | 4 | def run(): 5 | n = 200 6 | k = 2 7 | f_m = 0.1 8 | seed = 1234 9 | g = PA(n=n, k=k, f_m=f_m, seed=seed) 10 | g.generate() 11 | g.info() 12 | 13 | 14 | if __name__ == '__main__': 15 | run() 16 | -------------------------------------------------------------------------------- /docs/source/generators/directed.rst: -------------------------------------------------------------------------------- 1 | Directed Graphs 2 | =============== 3 | 4 | .. autoclass:: netin.DiGraph 5 | :members: 6 | 7 | .. autoclass:: netin.DH 8 | :members: 9 | 10 | .. autoclass:: netin.DPA 11 | :members: 12 | 13 | .. autoclass:: netin.DPAH 14 | :members: 15 | -------------------------------------------------------------------------------- /examples/undirected/patc.py: -------------------------------------------------------------------------------- 1 | from netin import PATC 2 | 3 | 4 | def run(): 5 | n = 200 6 | k = 2 7 | f_m = 0.1 8 | tc = 0.9 9 | seed = 1234 10 | g = PATC(n=n, k=k, f_m=f_m, tc=tc, seed=seed) 11 | g.generate() 12 | g.info() 13 | 14 | 15 | if __name__ == '__main__': 16 | run() 17 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .random_nodes import Sampling 2 | from .random_nodes import RandomNodes 3 | from .random_edges import RandomEdges 4 | from .degree_rank import DegreeRank 5 | from .degree_group_rank import DegreeGroupRank 6 | from .random_neighbor import RandomNeighbor 7 | from .partial_crawls import PartialCrawls 8 | -------------------------------------------------------------------------------- /examples/undirected/pah.py: -------------------------------------------------------------------------------- 1 | from netin import PAH 2 | 3 | 4 | def run(): 5 | n = 1000 6 | k = 2 7 | f_m = 0.1 8 | h_MM = 0.5 9 | h_mm = 0.5 10 | seed = 1234 11 | g = PAH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 12 | g.generate() 13 | g.info() 14 | 15 | 16 | if __name__ == '__main__': 17 | run() 18 | -------------------------------------------------------------------------------- /netin/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # from .sampling.random_nodes import RandomNodes 2 | # from .sampling.random_edges import RandomEdges 3 | # from .sampling.degree_rank import DegreeRank 4 | # from .sampling.degree_group_rank import DegreeGroupRank 5 | # from .sampling.random_neighbor import RandomNeighbor 6 | # from .sampling.partial_crawls import PartialCrawls 7 | -------------------------------------------------------------------------------- /examples/directed/dpa.py: -------------------------------------------------------------------------------- 1 | from netin import DPA 2 | 3 | 4 | def run(): 5 | n = 200 6 | d = 0.1 7 | f_m = 0.1 8 | plo_M = 2.0 9 | plo_m = 2.0 10 | seed = 1234 11 | g = DPA(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 12 | g.generate() 13 | g.info() 14 | 15 | 16 | if __name__ == '__main__': 17 | run() 18 | -------------------------------------------------------------------------------- /examples/undirected/patch.py: -------------------------------------------------------------------------------- 1 | from netin import PATCH 2 | 3 | 4 | def run(): 5 | n = 200 6 | k = 2 7 | f_m = 0.1 8 | h_MM = 0.1 9 | h_mm = 0.9 10 | tc = 0.5 11 | seed = 1234 12 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 13 | g.generate() 14 | g.info() 15 | 16 | 17 | if __name__ == '__main__': 18 | run() 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ignore log files and databases 4 | *.log 5 | *.sql 6 | *.sqlite 7 | *.ipynb_checkpoints 8 | *.pyc 9 | 10 | # ignore cache 11 | .idea 12 | .pytest_cache 13 | .DS_Store 14 | .vscode 15 | __pycache__ 16 | 17 | # temporals 18 | tmp 19 | _tmp 20 | deleteme* 21 | 22 | # internal 23 | reports 24 | pre_submit.sh 25 | 26 | # pip install 27 | build 28 | dist 29 | netin.egg-info 30 | -------------------------------------------------------------------------------- /docs/source/generators/undirected.rst: -------------------------------------------------------------------------------- 1 | Undirected Graphs 2 | ================= 3 | 4 | .. autoclass:: netin.UnDiGraph 5 | :members: 6 | 7 | .. autoclass:: netin.PA 8 | :members: 9 | 10 | .. autoclass:: netin.PATC 11 | :members: 12 | 13 | .. autoclass:: netin.PAH 14 | :members: 15 | 16 | .. autoclass:: netin.PATCH 17 | :members: 18 | 19 | .. autoclass:: netin.TCH 20 | :members: 21 | -------------------------------------------------------------------------------- /examples/directed/dh.py: -------------------------------------------------------------------------------- 1 | from netin import DH 2 | 3 | 4 | def run(): 5 | n = 2000 6 | d = 0.001 7 | f_m = 0.1 8 | plo_M = 2.0 9 | plo_m = 2.0 10 | h_MM = 0.1 11 | h_mm = 0.1 12 | seed = 1234 13 | g = DH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 14 | g.generate() 15 | g.info() 16 | 17 | 18 | if __name__ == '__main__': 19 | run() 20 | -------------------------------------------------------------------------------- /examples/directed/dpah.py: -------------------------------------------------------------------------------- 1 | from netin import DPAH 2 | 3 | 4 | def run(): 5 | n = 1000 6 | d = 0.005 7 | f_m = 0.1 8 | plo_M = 2.0 9 | plo_m = 2.0 10 | h_MM = 0.5 11 | h_mm = 0.9 12 | seed = 1234 13 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 14 | g.generate() 15 | g.info() 16 | 17 | 18 | if __name__ == '__main__': 19 | run() 20 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/constants.py: -------------------------------------------------------------------------------- 1 | RANDOM_NODES = "Random node sampling" 2 | RANDOM_EDGES = "Random edge sampling" 3 | DEGREE_RANK = "Degree Rank" 4 | DEGREE_GROUP_RANK = "Degree Group Rank" 5 | RANDOM_NEIGHBORS = "Random neighbor sampling" 6 | PARTIAL_CRAWLS = "Partial crawl sampling" 7 | 8 | DESC = "desc" 9 | ASC = "asc" 10 | 11 | SNSIZE = 0.01 12 | 13 | MAX_TRIES = 20 14 | MIN_EDGES = 2 15 | MIN_CLASSES = 2 16 | -------------------------------------------------------------------------------- /netin/generators/__init__.py: -------------------------------------------------------------------------------- 1 | from .dh import DH 2 | from .directed import DiGraph 3 | from .dpa import DPA 4 | from .dpah import DPAH 5 | from .graph import Graph 6 | from .graph import convert_networkx_to_netin 7 | from .h import Homophily 8 | from .pa import PA 9 | from .pah import PAH 10 | from .patc import PATC 11 | from .patch import PATCH 12 | from .tc import TriadicClosure 13 | from .tch import TCH 14 | from .undirected import UnDiGraph 15 | -------------------------------------------------------------------------------- /netin/viz/__init__.py: -------------------------------------------------------------------------------- 1 | from .constants import COLOR_MAJORITY 2 | from .constants import COLOR_MINORITY 3 | from .constants import COLOR_MIXED 4 | from .constants import COLOR_UNKNOWN 5 | from .handlers import plot_graph 6 | from .handlers import plot_distribution 7 | from .handlers import plot_powerlaw_fit 8 | from .handlers import plot_fraction_of_minority 9 | from .handlers import plot_gini_coefficient 10 | from .handlers import plot_disparity 11 | from .handlers import reset_style 12 | from .handlers import set_paper_style 13 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include setup.py 3 | include LICENSE.txt 4 | include README.rst 5 | 6 | recursive-include examples *.txt *.py *.edgelist *.mbox *.gz *.bz2 *.zip *.html *.js *.css 7 | recursive-include doc *.py *.rst Makefile *.html *.png *.txt *.css *.inc *.js 8 | recursive-include requirements *.txt *.md 9 | 10 | include netin/tests/*.py 11 | include netin/*/tests/*.txt 12 | include netin/*/tests/*.py 13 | include netin/*/*/tests/*.py 14 | global-exclude *~ 15 | global-exclude *.pyc 16 | global-exclude .svn 17 | -------------------------------------------------------------------------------- /docs/source/algorithms/sampling.rst: -------------------------------------------------------------------------------- 1 | Sampling 2 | ======== 3 | 4 | .. autoclass:: netin.sampling.Sampling 5 | :members: 6 | 7 | .. autoclass:: netin.sampling.RandomNodes 8 | :members: 9 | 10 | .. autoclass:: netin.sampling.RandomEdges 11 | :members: 12 | 13 | .. autoclass:: netin.sampling.RandomNeighbor 14 | :members: 15 | 16 | .. autoclass:: netin.sampling.DegreeRank 17 | :members: 18 | 19 | .. autoclass:: netin.sampling.DegreeGroupRank 20 | :members: 21 | 22 | .. autoclass:: netin.sampling.PartialCrawls 23 | :members: 24 | -------------------------------------------------------------------------------- /netin/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | NetIn is a Python package for the analysis of network inequalities. 3 | It is based on the NetworkX package and provides a set of functions to study inequalities (e.g., in ranking, inference) 4 | in social networks. 5 | """ 6 | 7 | __version__ = '1.0.8' 8 | 9 | from netin import generators 10 | from netin.generators import * 11 | 12 | from netin import utils 13 | from netin.utils import * 14 | 15 | from netin import algorithms 16 | from netin.algorithms import sampling 17 | 18 | from netin.generators import convert_networkx_to_netin 19 | -------------------------------------------------------------------------------- /netin/viz/constants.py: -------------------------------------------------------------------------------- 1 | MAX_PLOTS_PER_ROW = 5 2 | COLOR_MAJORITY = 'tab:blue' 3 | COLOR_MINORITY = 'tab:orange' 4 | COLOR_MIXED = 'tab:green' 5 | COLOR_UNKNOWN = 'tab:grey' 6 | COLOR_BLACK = 'black' 7 | INACTIVE_COLOR = 'lightgrey' 8 | RANKING_LABEL = 'rank k' 9 | FM_TOPK_AXIS_LABEL = 'f$_m$ in top-k' 10 | GINI_TOPK_AXIS_LABEL = 'Gini in top-k' 11 | INEQUITY_AXIS_LABEL = 'ME' 12 | INEQUALITY_AXIS_LABEL = 'Gini' 13 | DPI = 300 14 | DEFAULT_CELL_SIZE = 3 15 | DEFAULT_FIGSIZE = (10, 5) 16 | MINORITY_CURVE = ['get_fraction_of_minority'] 17 | TYPE_OF_DISTRIBUTION = ['pdf', 'cdf', 'ccdf'] -------------------------------------------------------------------------------- /netin/stats/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributions import get_pdf 2 | from .distributions import get_cdf 3 | from .distributions import get_ccdf 4 | from .distributions import fit_power_law 5 | from .distributions import get_fraction_of_minority 6 | from .distributions import get_gini_coefficient 7 | from .distributions import get_disparity 8 | from .networks import get_min_degree 9 | from .networks import get_minority_fraction 10 | from .networks import get_edge_type_counts 11 | from .networks import get_average_degree 12 | from .networks import get_average_degrees 13 | from .networks import get_similitude 14 | from .networks import get_node_attributes 15 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. NetIn documentation master file, created by 2 | sphinx-quickstart on Wed Apr 19 11:08:02 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Network Inequality 7 | ================================= 8 | 9 | .. automodule:: netin 10 | 11 | .. toctree:: 12 | :maxdepth: 3 13 | :caption: Contents: 14 | :glob: 15 | 16 | graphs 17 | algorithms 18 | mechanisms 19 | statistics 20 | visualizations 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | -------------------------------------------------------------------------------- /netin/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | import networkx as nx 5 | from typing import Union, Tuple 6 | 7 | 8 | def read_graph(fn: str) -> Union[nx.Graph, nx.DiGraph]: 9 | """ 10 | Loads a graph from a file. 11 | 12 | Parameters 13 | ---------- 14 | fn: str 15 | Path to file 16 | 17 | Returns 18 | ------- 19 | Union[nx.Graph, nx.DiGraph] 20 | Graph 21 | """ 22 | if fn.endswith('.gml'): 23 | return nx.read_gml(fn) 24 | elif fn.endswith('.pkl') or fn.endswith('.gpickle'): 25 | with open(fn, 'rb') as f: 26 | return pickle.load(f) 27 | else: 28 | raise ValueError(f'Unsupported file format: {fn}') 29 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.github/workflows/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: [push, pull_request, workflow_dispatch] 3 | permissions: 4 | contents: write 5 | jobs: 6 | docs: 7 | runs-on: ubuntu-latest 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.ref }} 10 | steps: 11 | - uses: actions/checkout@v3 12 | - uses: actions/setup-python@v3 13 | with: 14 | python-version: '3.9' 15 | - name: Upgrade pip 16 | run: | 17 | # install pip=>20.1 to use "pip cache dir" 18 | python3 -m pip install --upgrade pip 19 | - name: Install dependencies 20 | run: | 21 | pip install sphinx 22 | - name: Install netin 23 | run: | 24 | pip install ./ 25 | - name: Sphinx build 26 | run: | 27 | sphinx-build docs/source/ docs/build/html/ 28 | - name: Deploy 29 | uses: peaceiris/actions-gh-pages@v3 30 | if: ${{ github.ref == 'refs/heads/main' }} 31 | with: 32 | publish_branch: documentation 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_dir: docs/build/html/ 35 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will create a release when a tag is pushed 2 | 3 | name: Create release 4 | 5 | on: 6 | push: 7 | tags: 8 | - "netin-*.*.*" 9 | 10 | jobs: 11 | release: 12 | name: Create release 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout the repository 16 | uses: actions/checkout@v5 17 | 18 | # Split version from version string 19 | # `netin-x.y.z` returns `x.y.z` 20 | - id: get-version 21 | run: | 22 | version=${GITHUB_REF_NAME#netin-} 23 | echo "version=$version" >> $GITHUB_OUTPUT 24 | # Check if version contains 'a' followed by digits at the end 25 | if echo "$version" | grep -q 'a[0-9]\+$'; then 26 | echo "is_prerelease=true" >> $GITHUB_OUTPUT 27 | else 28 | echo "is_prerelease=false" >> $GITHUB_OUTPUT 29 | fi 30 | 31 | - run: echo "Extracted version ${{steps.get-version.outputs.version}}" 32 | 33 | # Create a GitHub release draft 34 | - name: Release 35 | uses: softprops/action-gh-release@v2 36 | with: 37 | draft: true 38 | name: NetIn - ${{ steps.get-version.outputs.version }} 39 | prerelease: ${{ steps.get-version.outputs.is_prerelease == 'true' }} 40 | generate_release_notes: true 41 | make_latest: ${{ steps.get-version.outputs.is_prerelease == 'false' }} 42 | -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.9.16" 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest 30 | if [ -f requirements/default.txt ]; then pip install -r requirements/default.txt; fi 31 | if [ -f requirements/test.txt ]; then pip install -r requirements/test.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | pytest 41 | # pip install pytest pytest-cov 42 | # pytest tests.py --doctest-modules --junitxml=reports/junit/test-results.xml --cov=com --cov-report=xml --cov-report=html 43 | -------------------------------------------------------------------------------- /netin/utils/constants.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # GENERAL 4 | 5 | EMPTY = [None, np.nan] 6 | 7 | # GRAPHS 8 | 9 | NO_HOMOPHILY = [0.5, None, np.nan] 10 | NO_TRIADIC_CLOSURE = [0, None, np.nan] 11 | 12 | CLASS_ATTRIBUTE = 'm' 13 | 14 | MAJORITY_LABEL = 'M' 15 | MINORITY_LABEL = 'm' 16 | CLASS_LABELS = [MAJORITY_LABEL, MINORITY_LABEL] 17 | 18 | MAJORITY_VALUE = 0 # value for synthetic, and position for both synthetic and empirical pre-processed graphs 19 | MINORITY_VALUE = 1 # value for synthetic, and position for both synthetic and empirical pre-processed graphs 20 | CLASS_VALUES = [MAJORITY_VALUE, MINORITY_VALUE] 21 | 22 | # GENERATIVE MODELS 23 | 24 | EPSILON = 0.00001 25 | MAX_TRIES_EDGE = 100 26 | 27 | DIRECTED_MODEL_NAME = 'Directed' 28 | UNDIRECTED_MODEL_NAME = 'Undirected' 29 | 30 | H_MODEL_NAME = 'Homophily' 31 | TC_MODEL_NAME = 'Triadic Closure' 32 | 33 | PA_MODEL_NAME = 'PA' 34 | PAH_MODEL_NAME = 'PAH' 35 | PATC_MODEL_NAME = 'PATC' 36 | PATCH_MODEL_NAME = 'PATCH' 37 | 38 | TCH_MODEL_NAME = 'TCH' 39 | 40 | DH_MODEL_NAME = 'DH' 41 | DPA_MODEL_NAME = 'DPA' 42 | DPAH_MODEL_NAME = 'DPAH' 43 | 44 | HOMOPHILY_MODELS = [PAH_MODEL_NAME, PATCH_MODEL_NAME, DH_MODEL_NAME, DPAH_MODEL_NAME, H_MODEL_NAME] 45 | 46 | # NODE METRICS 47 | 48 | VALID_METRICS = ['degree', 'in_degree', 'out_degree', 'clustering', 49 | 'betweenness', 'closeness', 'eigenvector', 'pagerank'] 50 | 51 | # RANKING 52 | 53 | RANK_RANGE = np.arange(0.1, 1 + 0.1, 0.1).astype(np.float32) 54 | INEQUITY_BETA = 0.05 55 | INEQUITY_OVER = 'over-represented' 56 | INEQUITY_UNDER = 'under-represented' 57 | INEQUITY_FAIR = 'fair' 58 | INEQUALITY_CUTS = [0.3, 0.6] 59 | INEQUALITY_HIGH = 'skewed' 60 | INEQUALITY_MODERATE = 'moderate' 61 | INEQUALITY_LOW = 'equality' 62 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | from netin import __version__ as _version_netin 2 | # Configuration file for the Sphinx documentation builder. 3 | # 4 | # For the full list of built-in configuration values, see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Project information ----------------------------------------------------- 8 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 9 | 10 | project = 'NetIn' 11 | copyright = '2023, Fariba Karimi, Lisette Espin-Noboa, Jan Bachmann' 12 | author = 'Fariba Karimi, Lisette Espin-Noboa, Jan Bachmann' 13 | release = _version_netin 14 | 15 | # -- General configuration --------------------------------------------------- 16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 17 | 18 | extensions = [ 19 | # https://www.sphinx-doc.org/en/master/tutorial/automatic-doc-generation.html 20 | # Generate documentation from files 21 | 'sphinx.ext.autodoc', 22 | # https://www.sphinx-doc.org/en/master/tutorial/automatic-doc-generation.html 23 | # Generate a summary of the project 24 | 'sphinx.ext.autosummary', 25 | # https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html 26 | # Make numpy style docstrings interpretable by sphinx 27 | # 'sphinx.ext.napoleon', 28 | ] 29 | 30 | templates_path = ['_templates'] 31 | exclude_patterns = [] 32 | 33 | suppress_warnings = ["ref.citation", "ref.footnote"] 34 | 35 | # -- Options for HTML output ------------------------------------------------- 36 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 37 | 38 | html_theme = 'alabaster' 39 | html_static_path = ['_static'] 40 | html_logo = 'netin-logo.png' 41 | html_favicon = 'netin-logo.png' 42 | html_theme_options = { 43 | } 44 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/random_nodes.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import numpy as np 5 | 6 | import netin 7 | from . import constants as const 8 | from .sampling import Sampling 9 | 10 | 11 | ############################################ 12 | # Class 13 | ############################################ 14 | class RandomNodes(Sampling): 15 | """Random node sampling. 16 | 17 | Parameters 18 | ---------- 19 | g: netin.Graph | netin.DiGraph 20 | global network 21 | 22 | pseeds: float 23 | fraction of seeds to sample 24 | 25 | max_tries: int 26 | maximum number of tries to sample a subgraph with enough classes and edges 27 | 28 | random_seed: object 29 | seed for random number generator 30 | 31 | kwargs: dict 32 | additional parameters for the sampling method 33 | """ 34 | 35 | ###################################################### 36 | # Constructor 37 | ###################################################### 38 | def __init__(self, g: netin.Graph, pseeds: float, max_tries: int = const.MAX_TRIES, 39 | random_seed: object = None, **kwargs): 40 | super().__init__(g, pseeds, max_tries, random_seed, **kwargs) 41 | 42 | @property 43 | def method_name(self) -> str: 44 | return const.RANDOM_NODES 45 | 46 | def sampling(self): 47 | super().sampling() 48 | 49 | def _sample(self): 50 | """ 51 | Creates a subgraph from G based on random node sampling 52 | """ 53 | num_classes = 0 54 | edges = None 55 | 56 | ### 1. pick random nodes 57 | while num_classes < const.MIN_CLASSES: 58 | nodes = list(self.g.node_list) 59 | np.random.shuffle(nodes) 60 | nodes = nodes[:self.nseeds] 61 | num_classes = self._count_classes(nodes) 62 | 63 | return nodes, edges 64 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/random_edges.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import numpy as np 5 | 6 | import netin 7 | from . import constants as const 8 | from .sampling import Sampling 9 | 10 | 11 | ############################################ 12 | # Class 13 | ############################################ 14 | class RandomEdges(Sampling): 15 | """Random edge sampling. 16 | 17 | Parameters 18 | ---------- 19 | g: netin.Graph | netin.DiGraph 20 | global network 21 | 22 | pseeds: float 23 | fraction of seeds to sample 24 | 25 | max_tries: int 26 | maximum number of tries to sample a subgraph with enough classes and edges 27 | 28 | random_seed: object 29 | seed for random number generator 30 | 31 | kwargs: dict 32 | additional parameters for the sampling method 33 | """ 34 | 35 | ###################################################### 36 | # Constructor 37 | ###################################################### 38 | def __init__(self, g: netin.Graph, pseeds: float, max_tries: int = const.MAX_TRIES, 39 | random_seed: object = None, **kwargs): 40 | super().__init__(g, pseeds, max_tries, random_seed, **kwargs) 41 | 42 | @property 43 | def method_name(self) -> str: 44 | return const.RANDOM_EDGES 45 | 46 | def sampling(self): 47 | super().sampling() 48 | 49 | def _sample(self): 50 | """ 51 | Creates a subgraph from G based on random edge sampling 52 | """ 53 | num_classes = 0 54 | nodes = None 55 | edges = None 56 | 57 | ### 1. pick random edges 58 | while num_classes < const.MIN_CLASSES: 59 | tmp_edges = list(self.g.edges()) 60 | np.random.shuffle(tmp_edges) 61 | edges = set() 62 | nodes = set() 63 | 64 | while len(nodes) < self.nseeds and len(tmp_edges) > 0: 65 | edge = tmp_edges.pop(0) 66 | nodes |= set(edge) 67 | edges.add(edge) 68 | 69 | num_classes = self._count_classes(nodes) 70 | 71 | return nodes, edges 72 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/random_neighbor.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import numpy as np 5 | 6 | import netin 7 | from . import constants as const 8 | from .sampling import Sampling 9 | 10 | 11 | ############################################ 12 | # Class 13 | ############################################ 14 | class RandomNeighbor(Sampling): 15 | """Random neighbor sampling. 16 | 17 | Parameters 18 | ---------- 19 | g: netin.Graph | netin.DiGraph 20 | global network 21 | 22 | pseeds: float 23 | fraction of seeds to sample 24 | 25 | max_tries: int 26 | maximum number of tries to sample a subgraph with enough classes and edges 27 | 28 | random_seed: object 29 | seed for random number generator 30 | 31 | kwargs: dict 32 | additional parameters for the sampling method 33 | """ 34 | 35 | ###################################################### 36 | # Constructor 37 | ###################################################### 38 | def __init__(self, g: netin.Graph, pseeds: float, max_tries: int = const.MAX_TRIES, 39 | random_seed: object = None): 40 | super().__init__(g, pseeds, max_tries, random_seed) 41 | 42 | @property 43 | def method_name(self) -> str: 44 | return const.RANDOM_NEIGHBORS 45 | 46 | def sampling(self): 47 | super().sampling() 48 | 49 | def _sample(self): 50 | """ 51 | Creates a subgraph from G based on random edge sampling 52 | """ 53 | num_classes = 0 54 | nodes = set() 55 | edges = set() 56 | 57 | ### 1. pick random neighbors including source node both at random 58 | while num_classes < const.MIN_CLASSES: 59 | tmp_nodes = list(self.g.node_list) 60 | np.random.shuffle(tmp_nodes) 61 | 62 | for node in tmp_nodes: 63 | tmp_edges = list(self.g.edges(node)) 64 | if len(tmp_edges) == 0: 65 | continue 66 | np.random.shuffle(tmp_edges) 67 | edge = tmp_edges[0] 68 | nodes |= set(edge) 69 | edges |= set([edge]) 70 | 71 | if len(nodes) >= self.nseeds: 72 | break 73 | 74 | num_classes = self._count_classes(nodes) 75 | 76 | return nodes, edges 77 | -------------------------------------------------------------------------------- /netin/utils/validator.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import warnings 3 | 4 | import networkx as nx 5 | 6 | from netin.utils import constants as const 7 | 8 | 9 | def validate_int(value: int, minimum: int, maximum: int = None): 10 | if minimum in const.EMPTY and maximum in const.EMPTY: 11 | raise ValueError('At least one of minimum or maximum must be specified') 12 | if type(value) is not int: 13 | raise TypeError('value must be a int') 14 | if value < minimum or (maximum is not None and value > maximum): 15 | raise ValueError(f'Value is out of range.') 16 | 17 | 18 | def validate_float(value: float, minimum: float, maximum: Union[None, float] = None, allow_none: bool = False): 19 | if value in const.EMPTY and allow_none: 20 | return True 21 | if value in const.EMPTY and not allow_none: 22 | raise ValueError('Value cannot be None') 23 | if type(value) is not float: 24 | raise TypeError('value must be a float') 25 | if minimum in const.EMPTY and maximum in const.EMPTY: 26 | raise ValueError('At least one of minimum or maximum must be specified') 27 | if value < minimum or (maximum is not None and value > maximum): 28 | raise ValueError(f'Value is out of range.') 29 | 30 | 31 | def validate_values(value: object, values: list): 32 | if value not in values: 33 | raise ValueError(f'Value must be one of {values}') 34 | 35 | 36 | def calibrate_null_probabilities(p: float) -> float: 37 | return const.EPSILON if p == 0 else 1 - const.EPSILON if p == 1 else p 38 | 39 | 40 | def validate_graph_metadata(g: Union[nx.Graph, nx.DiGraph]): 41 | err = [] 42 | for gkey in ['class_attribute', 'class_values', 'class_labels']: 43 | if gkey not in g.graph: 44 | err.append(gkey) 45 | if len(err) > 0: 46 | raise ValueError(f'Graph must have these attributes: "{", ".join(err)}".') 47 | 48 | nkey = g.graph['class_attribute'] 49 | for n, obj in g.nodes(data=True): 50 | if nkey not in obj: 51 | raise ValueError(f'Nodes must have a "{nkey}" attribute') 52 | break 53 | 54 | 55 | def validate_more_than_one(iterable): 56 | if len(iterable) < 2: 57 | raise ValueError('At least two elements are required') 58 | 59 | 60 | def ignore_params(params: list, **kwargs) -> dict: 61 | tmp = set() 62 | for param in params: 63 | p = kwargs.pop(param, None) 64 | if p is not None: 65 | tmp.add(param) 66 | if len(tmp) > 0: 67 | warnings.warn(f"These parameters are ignored: {', '.join(tmp)}") 68 | 69 | return kwargs 70 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/degree_rank.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import operator 5 | from typing import Tuple 6 | from typing import Union 7 | 8 | import netin 9 | from . import constants as const 10 | from .sampling import Sampling 11 | 12 | 13 | ############################################ 14 | # Class 15 | ############################################ 16 | class DegreeRank(Sampling): 17 | """Sampling by degree rank 18 | 19 | Parameters 20 | ---------- 21 | g: netin.Graph | netin.DiGraph 22 | global network 23 | 24 | pseeds: float 25 | fraction of seeds to sample 26 | 27 | max_tries: int 28 | maximum number of tries to sample a subgraph with enough classes and edges 29 | 30 | random_seed: object 31 | seed for random number generator 32 | 33 | kwargs: dict 34 | additional parameters for the sampling method 35 | 36 | order: str 37 | order of nodes by degree. Options: "asc" | "desc" 38 | 39 | """ 40 | 41 | ###################################################### 42 | # Constructor 43 | ###################################################### 44 | def __init__(self, g: netin.Graph, pseeds: float, random_seed: object = None, **kwargs): 45 | super().__init__(g=g, pseeds=pseeds, max_tries=1, random_seed=random_seed, **kwargs) 46 | self.order = self.kwargs.get("order", const.DESC) 47 | 48 | @property 49 | def method_name(self) -> str: 50 | name = f"{const.DEGREE_RANK} ({const.DESC if self.is_descending() else const.ASC})" 51 | return name 52 | 53 | def sampling(self): 54 | super().sampling() 55 | 56 | def is_ascending(self) -> bool: 57 | return not self.is_descending() 58 | 59 | def is_descending(self) -> bool: 60 | return self.order == const.DESC 61 | 62 | def _sample(self) -> Tuple[list, Union[list, None]]: 63 | """ 64 | Creates a subgraph from G based on degree rank 65 | """ 66 | edges = None 67 | 68 | ### 1. pick random nodes 69 | nodes = sorted([(n, d) for n, d in self.g.degree() if d > 0], 70 | key=operator.itemgetter(1), 71 | reverse=self.is_descending()) 72 | nodes = nodes[:self.nseeds] 73 | nodes, degree = zip(*nodes) 74 | num_classes = self._count_classes(nodes) 75 | 76 | if num_classes < const.MIN_CLASSES: 77 | raise ValueError(f"{num_classes} class(es). Not enough classes in the sample of {self.nseeds} nodes." 78 | "Try increasing the number of seeds or sampling by DegreeGroupRank.") 79 | 80 | return nodes, edges 81 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/degree_group_rank.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import operator 5 | from typing import Tuple 6 | from typing import Union 7 | 8 | import netin 9 | from . import constants as const 10 | from .degree_rank import DegreeRank 11 | 12 | 13 | ############################################ 14 | # Class 15 | ############################################ 16 | class DegreeGroupRank(DegreeRank): 17 | """Sampling by degree group rank (by degree rank per class) 18 | 19 | Parameters 20 | ---------- 21 | g: netin.Graph | netin.DiGraph 22 | global network 23 | 24 | pseeds: float 25 | fraction of seeds to sample 26 | 27 | max_tries: int 28 | maximum number of tries to sample a subgraph with enough classes and edges 29 | 30 | random_seed: object 31 | seed for random number generator 32 | 33 | kwargs: dict 34 | additional parameters for the sampling method 35 | 36 | order: str 37 | order of nodes by degree. Options: "asc" | "desc" 38 | 39 | """ 40 | 41 | ###################################################### 42 | # Constructor 43 | ###################################################### 44 | def __init__(self, g: netin.Graph, pseeds: float, random_seed: object = None, **kwargs): 45 | super().__init__(g=g, pseeds=pseeds, random_seed=random_seed, **kwargs) 46 | self.order = self.kwargs.get("order", const.DESC) 47 | 48 | @property 49 | def method_name(self) -> str: 50 | name = f"{const.DEGREE_GROUP_RANK} ({const.DESC if self.is_descending() else const.ASC})" 51 | return name 52 | 53 | def sampling(self): 54 | super().sampling() 55 | 56 | def _sample(self) -> Tuple[list, Union[list, None]]: 57 | """ 58 | Creates a subgraph from G based on degree rank 59 | """ 60 | nodes = [] 61 | edges = None 62 | 63 | ### 1. pick nodes 64 | _nodes = {} 65 | for class_value in self.g.class_values: 66 | valid = [(n, d) for n, d in self.g.degree() if d > 0 and 67 | self.g.get_class_value_by_node(n) == class_value] 68 | _nodes[class_value] = sorted(valid, 69 | key=operator.itemgetter(1), 70 | reverse=self.is_descending()) 71 | _nodes[class_value], _ = zip(*_nodes[class_value]) 72 | 73 | while len(nodes) < self.nseeds: 74 | for class_value in self.g.class_values: 75 | _nodes[class_value] = list(_nodes[class_value]) 76 | if len(_nodes[class_value]) > 0: 77 | nodes.append(_nodes[class_value].pop(0)) 78 | 79 | num_classes = self._count_classes(nodes) 80 | 81 | if num_classes < const.MIN_CLASSES: 82 | raise ValueError(f"{num_classes} class(es). Not enough classes in the sample of {self.nseeds} nodes." 83 | "Try increasing the number of seeds or sampling by DegreeGroupRank.") 84 | 85 | return nodes, edges 86 | -------------------------------------------------------------------------------- /netin/generators/tests/test_directed.py: -------------------------------------------------------------------------------- 1 | from netin import DH 2 | from netin import DPA 3 | from netin import DPAH 4 | from netin.utils import constants as const 5 | 6 | 7 | class TestDiGraph(object): 8 | 9 | def test_case_dpa(self): 10 | n = 200 11 | d = 0.01 12 | f_m = 0.1 13 | plo_M = 2.0 14 | plo_m = 2.0 15 | seed = 5678 16 | g = DPA(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 17 | g.generate() 18 | c1 = g.is_directed() 19 | c2 = g.number_of_nodes() == n 20 | c3 = g.number_of_edges() == g.get_expected_number_of_edges() 21 | c4 = g.calculate_fraction_of_minority() == f_m 22 | c5 = g.model_name == const.DPA_MODEL_NAME 23 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 24 | 25 | def test_case_dh(self): 26 | n = 200 27 | d = 0.01 28 | f_m = 0.1 29 | h_MM = 0.9 30 | h_mm = 0.9 31 | plo_M = 2.0 32 | plo_m = 2.0 33 | seed = 5678 34 | g = DH(n=n, d=d, f_m=f_m, h_MM=h_MM, h_mm=h_mm, plo_M=plo_M, plo_m=plo_m, seed=seed) 35 | g.generate() 36 | c1 = g.is_directed() 37 | c2 = g.number_of_nodes() == n 38 | c3 = g.number_of_edges() == g.get_expected_number_of_edges() 39 | c4 = g.calculate_fraction_of_minority() == f_m 40 | c5 = g.model_name == const.DH_MODEL_NAME 41 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 42 | 43 | def test_case_dpah(self): 44 | n = 200 45 | d = 0.01 46 | f_m = 0.1 47 | h_MM = 0.9 48 | h_mm = 0.9 49 | plo_M = 2.0 50 | plo_m = 2.0 51 | seed = 5678 52 | g = DPAH(n=n, d=d, f_m=f_m, h_MM=h_MM, h_mm=h_mm, plo_M=plo_M, plo_m=plo_m, seed=seed) 53 | g.generate() 54 | c1 = g.is_directed() 55 | c2 = g.number_of_nodes() == n 56 | c3 = g.number_of_edges() == g.get_expected_number_of_edges() 57 | c4 = g.calculate_fraction_of_minority() == f_m 58 | c5 = g.model_name == const.DPAH_MODEL_NAME 59 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 60 | 61 | def test_case_all(self): 62 | n = 200 63 | d = 0.01 64 | f_m = 0.1 65 | h_MM = 0.9 66 | h_mm = 0.9 67 | plo_M = 2.0 68 | plo_m = 2.0 69 | seed = 1234 70 | g_dpa = DPA(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 71 | g_dh = DH(n=n, d=d, f_m=f_m, h_MM=h_MM, h_mm=h_mm, plo_M=plo_M, plo_m=plo_m, seed=seed) 72 | g_dpah = DPAH(n=n, d=d, f_m=f_m, h_MM=h_MM, h_mm=h_mm, plo_M=plo_M, plo_m=plo_m, seed=seed) 73 | 74 | g_dpa.generate() 75 | g_dh.generate() 76 | g_dpah.generate() 77 | 78 | c1 = g_dpa.number_of_nodes() == g_dh.number_of_nodes() == g_dpah.number_of_nodes() == n 79 | c2 = g_dpa.calculate_fraction_of_minority() == g_dh.calculate_fraction_of_minority() == g_dpah.calculate_fraction_of_minority() == f_m 80 | c3 = g_dpa.get_expected_number_of_edges() == g_dh.get_expected_number_of_edges() == g_dpah.get_expected_number_of_edges() 81 | 82 | c4 = g_dpa.model_name == const.DPA_MODEL_NAME 83 | c5 = g_dh.model_name == const.DH_MODEL_NAME 84 | c6 = g_dpah.model_name == const.DPAH_MODEL_NAME 85 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect directed parameters." 86 | -------------------------------------------------------------------------------- /netin/generators/dpa.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.generators.directed import DiGraph 6 | from netin.utils import constants as const 7 | 8 | 9 | class DPA(DiGraph): 10 | """Creates a new DPA instance. A directed graph with preferential attachment. 11 | 12 | Parameters 13 | ---------- 14 | n: int 15 | number of nodes (minimum=2) 16 | 17 | d: float 18 | edge density (minimum=0, maximum=1) 19 | 20 | f_m: float 21 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 22 | 23 | plo_M: float 24 | activity (out-degree power law exponent) majority group (minimum=1) 25 | 26 | plo_m: float 27 | activity (out-degree power law exponent) minority group (minimum=1) 28 | 29 | seed: object 30 | seed for random number generator 31 | 32 | Notes 33 | ----- 34 | The initialization is a directed graph with n nodes and no edges. 35 | Source nodes are selected based on their activity given by plo_M (if majority) or plo_m (if minority) 36 | [Espin-Noboa2022]_. Target nodes are selected via preferential attachment [BarabasiAlbert1999]_. 37 | 38 | """ 39 | 40 | ############################################################ 41 | # Constructor 42 | ############################################################ 43 | 44 | def __init__(self, n: int, d: float, f_m: float, plo_M: float, plo_m: float, seed: object = None): 45 | super().__init__(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 46 | self.model_name = const.DPA_MODEL_NAME 47 | 48 | ############################################################ 49 | # Generation 50 | ############################################################ 51 | 52 | def get_in_degree(self, n: int) -> int: 53 | """ 54 | Returns the in-degree of node `n`. 55 | This in-degree is not calculated, it is taken from the object `in_degrees` that is populated while 56 | generating the graph. 57 | 58 | Parameters 59 | ---------- 60 | n: int 61 | node id 62 | 63 | Returns 64 | ------- 65 | int 66 | in-degree of node `n` 67 | """ 68 | return self.in_degrees[n] 69 | 70 | def get_target_probabilities(self, source: int, available_nodes: Union[None, list[int], np.array]) -> np.array: 71 | """ 72 | Returns the probabilities for each target node in `available_nodes` to be selected as target node 73 | given source node `source`. 74 | 75 | Parameters 76 | ---------- 77 | source: int 78 | source node id 79 | 80 | available_nodes: Set[int] 81 | set of target node ids 82 | 83 | Returns 84 | ------- 85 | np.array 86 | array of probabilities for each target node. 87 | """ 88 | probs = np.array([self.get_in_degree(n) + const.EPSILON for n in available_nodes]) 89 | probs /= probs.sum() 90 | return probs 91 | 92 | def makecopy(self): 93 | """ 94 | Makes a copy of the current object. 95 | """ 96 | return self.__class__(n=self.n, 97 | d=self.d, 98 | f_m=self.f_m, 99 | plo_M=self.plo_M, 100 | plo_m=self.plo_m, 101 | seed=self.seed) 102 | -------------------------------------------------------------------------------- /netin/generators/pa.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.generators.undirected import UnDiGraph 6 | from netin.utils import constants as const 7 | 8 | 9 | class PA(UnDiGraph): 10 | """Creates a new PA instance. An undirected graph with preferential attachment. 11 | 12 | Parameters 13 | ---------- 14 | n: int 15 | number of nodes (minimum=2) 16 | 17 | k: int 18 | minimum degree of nodes (minimum=1) 19 | 20 | f_m: float 21 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 22 | 23 | seed: object 24 | seed for random number generator 25 | 26 | Notes 27 | ----- 28 | The initialization is an undirected graph with n nodes and no edges. 29 | Then, everytime a node is selected as source, it gets connected to k target nodes. 30 | Target nodes are selected via preferential attachment (in-degree), see [BarabasiAlbert1999]_. 31 | """ 32 | 33 | ############################################################ 34 | # Constructor 35 | ############################################################ 36 | 37 | def __init__(self, n: int, k: int, f_m: float, seed: object = None): 38 | super().__init__(n, k, f_m, seed) 39 | self.model_name = const.PA_MODEL_NAME 40 | 41 | ############################################################ 42 | # Generation 43 | ############################################################ 44 | 45 | def get_target_probabilities(self, source: int, available_nodes: list[int]) -> tuple[np.array, list[int]]: 46 | """ 47 | Returns the probabilities of the target nodes to be selected given a source node. 48 | This probability is proportional to the degree of the target node. 49 | 50 | Parameters 51 | ---------- 52 | source: int 53 | source node (id) 54 | 55 | available_nodes: set 56 | set of target nodes (ids) 57 | 58 | special_targets: object 59 | special available_nodes 60 | 61 | Returns 62 | ------- 63 | probs: np.array 64 | probabilities of the target nodes to be selected 65 | 66 | available_nodes: set 67 | set of target nodes (ids) 68 | """ 69 | probs = np.array([(self.degree(target) + const.EPSILON) for target in available_nodes]) 70 | probs /= probs.sum() 71 | return probs, available_nodes 72 | 73 | def makecopy(self): 74 | """ 75 | Makes a copy of the current object. 76 | """ 77 | return self.__class__(n=self.n, 78 | k=self.k, 79 | f_m=self.f_m, 80 | seed=self.seed) 81 | 82 | @staticmethod 83 | def fit(g, n=None, k=None, seed=None): 84 | """ 85 | It fits the PA model to the given graph. 86 | 87 | Parameters 88 | ---------- 89 | g: netin.UnDiGraph 90 | graph to fit the model to 91 | 92 | n: int 93 | number of nodes to override (e.g., to generate a smaller network) 94 | 95 | k: int 96 | minimum node degree to override (e.g., to generate a denser network ``k>1``) 97 | 98 | seed: object 99 | seed for random number generator 100 | 101 | Returns 102 | ------- 103 | netin.PA 104 | fitted model 105 | """ 106 | n = n or g.number_of_nodes() 107 | k = k or g.calculate_minimum_degree() 108 | f_m = g.calculate_fraction_of_minority() 109 | 110 | new_g = PA(n=n, 111 | k=k, 112 | f_m=f_m, 113 | seed=seed) 114 | new_g.generate() 115 | 116 | return new_g 117 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/partial_crawls.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | import operator 5 | from typing import List 6 | from typing import Tuple 7 | 8 | import numpy as np 9 | 10 | import netin 11 | from . import constants as const 12 | from .sampling import Sampling 13 | 14 | 15 | ############################################ 16 | # Class 17 | ############################################ 18 | class PartialCrawls(Sampling): 19 | """Sampling by partial crawls. 20 | 21 | Parameters 22 | ---------- 23 | g: netin.Graph | netin.DiGraph 24 | global network 25 | 26 | pseeds: float 27 | fraction of seeds to sample 28 | 29 | max_tries: int 30 | maximum number of tries to sample a subgraph with enough classes and edges 31 | 32 | random_seed: object 33 | seed for random number generator 34 | 35 | kwargs: dict 36 | additional parameters for the sampling method 37 | 38 | References 39 | ---------- 40 | .. [Yang2017] J. Yang, B. Ribeiro, & J. Neville "Should We Be Confident in Peer Effects Estimated From Social Network Crawls?" ICWSM (Vol. 11, No. 1, pp. 708-711), 2017. 41 | """ 42 | 43 | ###################################################### 44 | # Constructor 45 | ###################################################### 46 | def __init__(self, g: netin.Graph, pseeds: float, max_tries: int = const.MAX_TRIES, 47 | random_seed: object = None, **kwargs): 48 | super().__init__(g, pseeds, max_tries, random_seed, **kwargs) 49 | self.snsize = kwargs.get('snsize', const.SNSIZE) 50 | 51 | @property 52 | def method_name(self) -> str: 53 | return const.PARTIAL_CRAWLS 54 | 55 | def sampling(self): 56 | super().sampling() 57 | 58 | def _sample(self): 59 | """ 60 | Creates a subgraph from G based on random edge sampling 61 | """ 62 | num_classes = 0 63 | 64 | ### 1. pick random neighbors including source node both at random 65 | while num_classes < const.MIN_CLASSES: 66 | super_node = self._get_super_node() 67 | edges = self._get_edges_from_tours(super_node) 68 | nodes = set(np.array(list(edges)).flatten().tolist()) 69 | num_classes = self._count_classes(nodes) 70 | 71 | return nodes, edges 72 | 73 | def _get_super_node(self) -> List[int]: 74 | """ 75 | Randomly selects (sn * N) nodes from the graph `g`. 76 | 77 | Returns 78 | ------- 79 | list 80 | list of nodes 81 | """ 82 | nodes = list(self.g.node_list) 83 | np.random.shuffle(nodes) 84 | super_node_size = int(round(self.g.number_of_nodes() * self.snsize)) 85 | sn = nodes[:super_node_size] 86 | return sn 87 | 88 | def _get_edges_from_tours(self, super_node: List[int]) -> List[Tuple[int, int]]: 89 | """ 90 | Sorts nodes in super node proportional to their edges outside the super node. 91 | Performs random walks from/to super node until collecting (pseeds * N) nodes 92 | returns a list od edges 93 | 94 | Parameters 95 | ---------- 96 | super_node: list 97 | list of nodes in the super node 98 | 99 | Returns 100 | ------- 101 | list 102 | list of edges 103 | """ 104 | # proportional to the number of edges out of the super node 105 | sorted_S = {ni: len([nj for nj in self.g.neighbors(ni) if nj not in super_node]) for ni in self.g.node_list} 106 | sorted_S = sorted(sorted_S.items(), key=operator.itemgetter(1), reverse=True) 107 | sorted_S = [n[0] for n in sorted_S] 108 | 109 | # tours 110 | edges = set() 111 | sampled_nodes = set() 112 | for vi in sorted_S: 113 | nbrs_i = list(self.g.neighbors(vi)) 114 | if len(nbrs_i) == 0: 115 | continue 116 | vj = np.random.choice(nbrs_i, 1)[0] # random neighbor 117 | while vj not in super_node and len(sampled_nodes) < self.nseeds: 118 | edges.add((vi, vj)) 119 | sampled_nodes |= {vi, vj} 120 | vi = vj 121 | nbrs_i = list(self.g.neighbors(vi)) 122 | if len(nbrs_i) == 0: 123 | break 124 | vj = np.random.choice(nbrs_i, 1)[0] # random neighbor 125 | 126 | return edges 127 | -------------------------------------------------------------------------------- /netin/generators/tests/test_undirected.py: -------------------------------------------------------------------------------- 1 | from netin import PA 2 | from netin import PAH 3 | from netin import PATC 4 | from netin import PATCH 5 | from netin.utils import constants as const 6 | 7 | 8 | class TestUnDiGraph(object): 9 | 10 | def test_patch_case_pa(self): 11 | n = 200 12 | k = 2 13 | f_m = 0.1 14 | seed = 5678 15 | g = PA(n=n, k=k, f_m=f_m, seed=seed) 16 | g.generate() 17 | c1 = not g.is_directed() 18 | c2 = g.number_of_nodes() == n 19 | c3 = g.calculate_minimum_degree() == k 20 | c4 = g.calculate_fraction_of_minority() == f_m 21 | c5 = g.model_name == const.PA_MODEL_NAME 22 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 23 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 24 | 25 | def test_patch_case_pah(self): 26 | n = 200 27 | k = 2 28 | f_m = 0.1 29 | h_MM = 0.9 30 | h_mm = 0.1 31 | seed = 5678 32 | g = PAH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 33 | g.generate() 34 | c1 = not g.is_directed() 35 | c2 = g.number_of_nodes() == n 36 | c3 = g.calculate_minimum_degree() == k 37 | c4 = g.calculate_fraction_of_minority() == f_m 38 | c5 = g.model_name == const.PAH_MODEL_NAME 39 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 40 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 41 | 42 | def test_case_patc(self): 43 | n = 200 44 | k = 2 45 | f_m = 0.1 46 | tc = 0.5 47 | seed = 5678 48 | g = PATC(n=n, k=k, f_m=f_m, tc=tc, seed=seed) 49 | g.generate() 50 | c1 = not g.is_directed() 51 | c2 = g.number_of_nodes() == n 52 | c3 = g.calculate_minimum_degree() == k 53 | c4 = g.calculate_fraction_of_minority() == f_m 54 | c5 = g.model_name == const.PATC_MODEL_NAME 55 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 56 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 57 | 58 | def test_case_patch(self): 59 | n = 200 60 | k = 2 61 | f_m = 0.1 62 | h_MM = 0.1 63 | h_mm = 0.1 64 | tc = 1.0 65 | seed = 5678 66 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 67 | g.generate() 68 | c1 = not g.is_directed() 69 | c2 = g.number_of_nodes() == n 70 | c3 = g.calculate_minimum_degree() == k 71 | c4 = g.calculate_fraction_of_minority() == f_m 72 | c5 = g.model_name == const.PATCH_MODEL_NAME 73 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 74 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 75 | 76 | def test_case_all(self): 77 | n = 200 78 | k = 2 79 | f_m = 0.1 80 | h_MM = 0.5 81 | h_mm = 0.5 82 | tc = 0.0 83 | seed = 1234 84 | g_pa = PA(n=n, k=k, f_m=f_m, seed=seed) 85 | g_pah = PAH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 86 | g_patc = PATC(n=n, k=k, f_m=f_m, tc=tc, seed=seed) 87 | g_patch = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 88 | 89 | g_pa.generate() 90 | g_pah.generate() 91 | g_patc.generate() 92 | g_patch.generate() 93 | 94 | c1 = not g_pa.is_directed() and not g_pah.is_directed() and not g_patc.is_directed() and not g_patch.is_directed() 95 | c2 = g_pa.number_of_nodes() == g_pah.number_of_nodes() == g_patc.number_of_nodes() == g_patch.number_of_nodes() == n 96 | c3 = g_pa.calculate_minimum_degree() == g_pah.calculate_minimum_degree() == g_patc.calculate_minimum_degree() == g_patch.calculate_minimum_degree() == k 97 | c4 = g_pa.calculate_fraction_of_minority() == g_pah.calculate_fraction_of_minority() == g_patc.calculate_fraction_of_minority() == g_patch.calculate_fraction_of_minority() == f_m 98 | c5 = g_pa.get_expected_number_of_edges() == g_pah.get_expected_number_of_edges() == g_patc.get_expected_number_of_edges() == g_patch.get_expected_number_of_edges() 99 | c6 = g_pa.model_name == const.PA_MODEL_NAME 100 | c7 = g_pah.model_name == const.PAH_MODEL_NAME 101 | c8 = g_patc.model_name == const.PATC_MODEL_NAME 102 | c9 = g_patch.model_name == const.PATCH_MODEL_NAME 103 | assert c1 and c2 and c3 and c4 and c5 and c6 and c7 and c8 and c9, "Incorrect undirected parameters." 104 | -------------------------------------------------------------------------------- /netin/generators/tests/test_patch.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import networkx as nx 4 | 5 | from netin import PATCH 6 | from netin.utils import constants as const 7 | 8 | 9 | class TestPATCH(object): 10 | 11 | def test_patch_case_1(self): 12 | n = 200 13 | k = 2 14 | f_m = 0.1 15 | h_MM = 0.1 16 | h_mm = 0.1 17 | tc = 0.5 18 | seed = 1234 19 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 20 | g.generate() 21 | c1 = not g.is_directed() 22 | c2 = g.number_of_nodes() == n 23 | c3 = g.calculate_minimum_degree() == k 24 | c4 = g.calculate_fraction_of_minority() == f_m 25 | c5 = g.model_name == const.PATCH_MODEL_NAME 26 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 27 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 28 | 29 | def test_patch_case_2(self): 30 | n = 200 31 | k = 2 32 | f_m = 0.1 33 | h_MM = 0.9 34 | h_mm = 0.1 35 | tc = 0.5 36 | seed = 1234 37 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 38 | g.generate() 39 | c1 = not g.is_directed() 40 | c2 = g.number_of_nodes() == n 41 | c3 = g.calculate_minimum_degree() == k 42 | c4 = g.calculate_fraction_of_minority() == f_m 43 | c5 = g.model_name == const.PATCH_MODEL_NAME 44 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 45 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 46 | 47 | def test_patch_case_3(self): 48 | n = 200 49 | k = 2 50 | f_m = 0.1 51 | h_MM = 0.1 52 | h_mm = 0.9 53 | tc = 0.5 54 | seed = 1234 55 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 56 | g.generate() 57 | c1 = not g.is_directed() 58 | c2 = g.number_of_nodes() == n 59 | c3 = g.calculate_minimum_degree() == k 60 | c4 = g.calculate_fraction_of_minority() == f_m 61 | c5 = g.model_name == const.PATCH_MODEL_NAME 62 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 63 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 64 | 65 | def test_patch_case_4(self): 66 | n = 200 67 | k = 2 68 | f_m = 0.1 69 | h_MM = 0.1 70 | h_mm = 0.1 71 | tc = 1.0 72 | seed = 1234 73 | g = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, tc=tc, seed=seed) 74 | g.generate() 75 | c1 = not g.is_directed() 76 | c2 = g.number_of_nodes() == n 77 | c3 = g.calculate_minimum_degree() == k 78 | c4 = g.calculate_fraction_of_minority() == f_m 79 | c5 = g.model_name == const.PATCH_MODEL_NAME 80 | c6 = sum(k for _, k in g.degree()) == ((k*(k-1)) + ((n-k)*k*2)) 81 | assert c1 and c2 and c3 and c4 and c5 and c6, "Incorrect undirected parameters." 82 | 83 | def test_patch_case_5(self): 84 | n = 200 85 | k = 2 86 | f_m = 0.1 87 | h_MM = 0.1 88 | h_mm = 0.1 89 | seed = 1234 90 | with pytest.raises(TypeError, match="missing 1 required positional argument: 'tc'"): 91 | _ = PATCH(n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 92 | 93 | def test_patch_case_6(self): 94 | n = 200 95 | k = 2 96 | f_m = 0.1 97 | tc = 0.2 98 | seed = 1234 99 | with pytest.raises(TypeError, match="missing 2 required positional arguments: 'h_mm' and 'h_MM'"): 100 | _ = PATCH(n=n, k=k, f_m=f_m, tc=tc, seed=seed) 101 | 102 | def test_patch_case_7(self): 103 | n = 200 104 | k = 2 105 | f_m = 0.1 106 | seed = 1234 107 | with pytest.raises(TypeError, match="missing 3 required positional arguments: 'h_mm', 'h_MM', and 'tc'"): 108 | _ = PATCH(n=n, k=k, f_m=f_m, seed=seed) 109 | 110 | def test_patch_ccf_increase(self): 111 | """Test that increasing TC probabilities lead to higher clustering coefficients. 112 | """ 113 | n = 200 114 | k = 2 115 | f_m = 0.1 116 | seed = 1234 117 | h = .5 118 | tc = [0., .25, .5, .75, 1.] 119 | 120 | l_g = [PATCH(n=n, k=k, f_m=f_m, seed=seed, tc=p_tc, h_MM=h, h_mm=h) for p_tc in tc] 121 | l_ccf = [] 122 | for g in l_g: 123 | g.generate() 124 | l_ccf.append(nx.average_clustering(g)) 125 | 126 | assert(all(l_ccf[i] > l_ccf[i-1] for i in range(1, len(l_ccf)))) 127 | -------------------------------------------------------------------------------- /netin/generators/tch.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.generators.h import Homophily 6 | from netin.utils import constants as const 7 | 8 | from .g_tc import GraphTC 9 | 10 | class TCH(GraphTC, Homophily): 11 | """Creates a new TCH graph. An undirected graph with homophily and triadic closure as link formation mechanisms. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | k: int 19 | minimum degree of nodes (minimum=1) 20 | 21 | f_m: float 22 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 23 | 24 | h_MM: float 25 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 26 | 27 | h_mm: float 28 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 29 | 30 | tc: float 31 | probability of a new edge to close a triad (minimum=0, maximum=1.) 32 | 33 | tc_uniform: bool 34 | specifies whether the triadic closure target is chosen uniform at random or if it follows the regular link formation mechanisms (e.g., homophily) (default=True) 35 | 36 | Notes 37 | ----- 38 | The initialization is an undirected graph with n nodes and no edges. 39 | Then, everytime a node is selected as source, it gets connected to k target nodes. 40 | Target nodes are selected via homophily (h_**; see :class:`netin.Homophily`) [Karimi2018]_ with probability ``1-p_{TC}``, 41 | and with probability ``p_{TC}`` via triadic closure (see :class:`netin.TriadicClosure`) [HolmeKim2002]_. 42 | 43 | Note that this model is still work in progress and not fully implemented yet. 44 | """ 45 | 46 | ############################################################ 47 | # Constructor 48 | ############################################################ 49 | 50 | def __init__(self, n: int, k: int, f_m: float, h_mm: float, h_MM: float, tc: float, tc_uniform: bool = True, 51 | seed: object = None): 52 | GraphTC.__init__(self, n=n, k=k, f_m=f_m, tc=tc, tc_uniform=tc_uniform, seed=seed) 53 | Homophily.__init__(self, n=n, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 54 | self.model_name = const.TCH_MODEL_NAME 55 | 56 | ############################################################ 57 | # Generation 58 | ############################################################ 59 | def get_target_probabilities_regular(self, source: int, target_list: list[int]) -> \ 60 | tuple[np.ndarray, list[int]]: 61 | """ 62 | Returns the probability of nodes to be selected as target nodes using the homophily mechanism. 63 | 64 | Parameters 65 | ---------- 66 | source: int 67 | source node id 68 | 69 | target_list: set 70 | set of target node ids 71 | 72 | special_targets: dict 73 | dictionary of special target node ids to be considered 74 | 75 | Returns 76 | ------- 77 | tuple 78 | probabilities of nodes to be selected as target nodes, and set of target of nodes 79 | """ 80 | return Homophily.get_target_probabilities(self=self, source=source, available_nodes=target_list) 81 | 82 | ############################################################ 83 | # Calculations 84 | ############################################################ 85 | 86 | def info_params(self): 87 | """ 88 | Shows the (input) parameters of the graph. 89 | """ 90 | Homophily.info_params(self) 91 | GraphTC.info_params(self) 92 | 93 | def info_computed(self): 94 | """ 95 | Shows the (computed) properties of the graph. 96 | """ 97 | Homophily.info_computed(self) 98 | GraphTC.info_computed(self) 99 | 100 | def infer_triadic_closure(self) -> float: 101 | """ 102 | Infers analytically the triadic closure value of the graph. 103 | @TODO: This still needs to be implemented. 104 | Returns 105 | ------- 106 | float 107 | triadic closure probability of the graph 108 | """ 109 | raise NotImplementedError("Inferring triadic closure probability not implemented yet.") 110 | 111 | def makecopy(self): 112 | """ 113 | Makes a copy of the current object. 114 | """ 115 | return self.__class__(n=self.n, 116 | k=self.k, 117 | f_m=self.f_m, 118 | tc=self.tc, 119 | h_MM=self.h_MM, 120 | h_mm=self.h_mm, 121 | seed=self.seed) 122 | -------------------------------------------------------------------------------- /netin/generators/tests/test_dpah.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import pytest 3 | 4 | from netin import DPAH 5 | from netin.utils import constants as const 6 | 7 | 8 | class TestDPAH(object): 9 | 10 | def test_dpah_case_1(self): 11 | n = 200 12 | d = 0.1 13 | f_m = 0.1 14 | plo_M = 2.0 15 | plo_m = 2.0 16 | h_MM = 0.1 17 | h_mm = 0.1 18 | seed = 1234 19 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 20 | g.generate() 21 | c1 = g.is_directed() 22 | c2 = g.number_of_nodes() == n 23 | c3 = nx.density(g) == d 24 | c4 = g.calculate_fraction_of_minority() == f_m 25 | c5 = g.model_name == const.DPAH_MODEL_NAME 26 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 27 | 28 | def test_dpah_case_2(self): 29 | n = 200 30 | d = 0.1 31 | f_m = 0.1 32 | plo_M = 2.0 33 | plo_m = 2.0 34 | h_MM = 0.9 35 | h_mm = 0.1 36 | seed = 1234 37 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 38 | g.generate() 39 | c1 = g.is_directed() 40 | c2 = g.number_of_nodes() == n 41 | c3 = nx.density(g) == d 42 | c4 = g.calculate_fraction_of_minority() == f_m 43 | c5 = g.model_name == const.DPAH_MODEL_NAME 44 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 45 | 46 | def test_dpah_case_3(self): 47 | n = 200 48 | d = 0.1 49 | f_m = 0.1 50 | plo_M = 2.0 51 | plo_m = 2.0 52 | h_MM = 0.1 53 | h_mm = 0.9 54 | seed = 1234 55 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 56 | g.generate() 57 | c1 = g.is_directed() 58 | c2 = g.number_of_nodes() == n 59 | c3 = nx.density(g) == d 60 | c4 = g.calculate_fraction_of_minority() == f_m 61 | c5 = g.model_name == const.DPAH_MODEL_NAME 62 | assert c1 and c2 and c3 and c4 and c5, "Incorrect directed parameters." 63 | 64 | def test_dpah_case_4(self): 65 | n = 200 66 | d = 0.1 67 | f_m = 0.1 68 | plo_M = 3.0 69 | plo_m = 1.0 70 | h_MM = 0.1 71 | h_mm = 0.1 72 | seed = 1234 73 | with pytest.raises(ValueError, match="Value is out of range."): 74 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 75 | g.generate() 76 | 77 | def test_dpah_case_5(self): 78 | n = 200 79 | d = 0.1 80 | f_m = 0.1 81 | plo_M = 1.0 82 | plo_m = 3.0 83 | h_MM = 0.1 84 | h_mm = 0.1 85 | seed = 1234 86 | with pytest.raises(ValueError, match="Value is out of range."): 87 | g = DPAH(n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 88 | g.generate() 89 | 90 | def test_dpah_case_6(self): 91 | n = 200 92 | d = 0.1 93 | f_m = 0.1 94 | plo_M = 1.0 95 | plo_m = 3.0 96 | h_MM = 0.1 97 | h_mm = 0.1 98 | seed = 1234 99 | with pytest.raises(TypeError, match="missing 1 required positional argument: 'plo_M'"): 100 | _ = DPAH(n=n, d=d, f_m=f_m, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 101 | 102 | def test_dpah_case_7(self): 103 | n = 200 104 | d = 0.1 105 | f_m = 0.1 106 | plo_M = 1.0 107 | plo_m = 3.0 108 | h_MM = 0.1 109 | h_mm = 0.1 110 | seed = 1234 111 | with pytest.raises(TypeError, match="missing 1 required positional argument: 'f_m'"): 112 | _ = DPAH(n=n, d=d, plo_M=plo_M, plo_m=plo_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 113 | 114 | def test_dpah_case_8(self): 115 | n = 200 116 | d = 0.1 117 | f_m = 0.1 118 | plo_M = 1.0 119 | plo_m = 3.0 120 | h_MM = 0.1 121 | h_mm = 0.1 122 | seed = 1234 123 | with pytest.raises(TypeError, match="missing 2 required positional arguments: 'plo_M' and 'h_mm'"): 124 | _ = DPAH(n=n, d=d, f_m=f_m, plo_m=plo_m, h_MM=h_MM, seed=seed) 125 | 126 | def test_dpah_case_9(self): 127 | n = 200 128 | d = 0.1 129 | f_m = 0.1 130 | plo_M = 1.0 131 | plo_m = 3.0 132 | h_MM = 0.1 133 | h_mm = 0.1 134 | seed = 1234 135 | with pytest.raises(TypeError, 136 | match="missing 7 required positional arguments: 'n', 'd', 'f_m', 'plo_M', 'plo_m', 'h_MM', and 'h_mm'"): 137 | _ = DPAH(seed=seed) 138 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 NetInCSH. All rights reserved. 2 | # This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 3 | # License. (https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode 4 | 5 | import os 6 | import sys 7 | from glob import glob 8 | 9 | from setuptools import setup 10 | 11 | with open("netin/__init__.py") as fid: 12 | for line in fid: 13 | if line.startswith("__version__"): 14 | version = line.strip().split()[-1][1:-1] 15 | break 16 | 17 | if sys.version_info[:2] < (3, 8): 18 | error = (f"NetIn {version} requires Python 3.9 or later ({sys.version_info[:2]} detected). \n") 19 | sys.stderr.write(error + "\n") 20 | sys.exit(1) 21 | 22 | name = "netin" 23 | description = "Python package to study inequalities in social networks" 24 | authors = { 25 | "Karimi": ("Fariba Karimi", "karimi@csh.ac.at"), 26 | "Espín-Noboa": ("Lisette Espín-Noboa", "espin@csh.ac.at"), 27 | "Bachmann": ("Jan Bachmann", "bachmann@csh.ac.at"), 28 | } 29 | maintainer = "NetIn Developers" 30 | maintainer_email = "netin-dev@googlegroups.com" 31 | url = "https://github.com/CSHVienna/NetworkInequalities" 32 | platforms = ["Linux", "Mac OSX", "Windows", "Unix"] 33 | keywords = [ 34 | "Networks", 35 | "Inequalities", 36 | "Social Networks", 37 | "Ranking", 38 | "Inference" 39 | "Graph Theory", 40 | "Mathematics", 41 | "network", 42 | "undirected", 43 | "discrete mathematics", 44 | "math", 45 | ] 46 | classifiers = [ 47 | "Development Status :: 3 - Alpha", 48 | "Intended Audience :: Developers", 49 | "Intended Audience :: Education", 50 | "Intended Audience :: Information Technology", 51 | "Intended Audience :: Science/Research", 52 | "Operating System :: OS Independent", 53 | "Programming Language :: Python :: 3.9", 54 | "Topic :: Software Development :: Libraries :: Python Modules", 55 | "Topic :: Scientific/Engineering", 56 | "Topic :: Scientific/Engineering :: Information Analysis", 57 | "Topic :: Scientific/Engineering :: Mathematics", 58 | "Topic :: Scientific/Engineering :: Physics", 59 | ] 60 | 61 | packages = [ 62 | "netin", 63 | "netin.generators", 64 | "netin.utils", 65 | "netin.stats", 66 | "netin.viz", 67 | "netin.algorithms.sampling", 68 | "netin.generators.tests", 69 | ] 70 | 71 | docdirbase = "share/doc/netin-%s" % version 72 | 73 | # add basic documentation 74 | data = [(docdirbase, glob("*.txt"))] 75 | 76 | # add examples 77 | for d in [ 78 | ".", 79 | "directed", 80 | "undirected", 81 | "notebooks", 82 | "advanced", 83 | "algorithms", 84 | "basic", 85 | "drawing", 86 | "subclass", 87 | ]: 88 | dd = os.path.join(docdirbase, "examples", d) 89 | pp = os.path.join("examples", d) 90 | data.append((dd, glob(os.path.join(pp, "*.txt")))) 91 | data.append((dd, glob(os.path.join(pp, "*.py")))) 92 | data.append((dd, glob(os.path.join(pp, "*.bz2")))) 93 | data.append((dd, glob(os.path.join(pp, "*.gz")))) 94 | data.append((dd, glob(os.path.join(pp, "*.mbox")))) 95 | data.append((dd, glob(os.path.join(pp, "*.edgelist")))) 96 | # add js force examples 97 | dd = os.path.join(docdirbase, "examples", "javascript/force") 98 | pp = os.path.join("examples", "javascript/force") 99 | data.append((dd, glob(os.path.join(pp, "*")))) 100 | 101 | # add the tests subpackage(s) 102 | package_data = { 103 | "netin": ["tests/*.py"], 104 | "netin.generators": ["tests/*.py"], 105 | } 106 | 107 | 108 | def parse_requirements_file(filename): 109 | with open(filename) as fid: 110 | requires = [l.strip() for l in fid.readlines() if not l.startswith("#")] 111 | return requires 112 | 113 | 114 | install_requires = parse_requirements_file("requirements/default.txt") 115 | 116 | extras_require = { 117 | dep: parse_requirements_file("requirements/" + dep + ".txt") 118 | for dep in ["default", "test"] # , "developer", "doc", "extra"] 119 | } 120 | 121 | with open("README.rst") as fh: 122 | long_description = fh.read() 123 | 124 | if __name__ == "__main__": 125 | setup( 126 | name=name, 127 | version=version, 128 | maintainer=maintainer, 129 | maintainer_email=maintainer_email, 130 | author=authors["Espín-Noboa"][0], 131 | author_email=authors["Espín-Noboa"][1], 132 | description=description, 133 | keywords=keywords, 134 | long_description=long_description, 135 | long_description_content_type="text/x-rst", 136 | platforms=platforms, 137 | url=url, 138 | classifiers=classifiers, 139 | packages=packages, 140 | data_files=data, 141 | package_data=package_data, 142 | install_requires=install_requires, 143 | extras_require=extras_require, 144 | python_requires=">=3.9", 145 | zip_safe=False, 146 | ) 147 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | .. _code_of_conduct: 2 | 3 | Code of Conduct 4 | =============== 5 | 6 | 7 | Introduction 8 | ------------ 9 | The Network Inequality Group (NetIn) is committed to providing a welcoming and 10 | inclusive environment for everyone. We value the participation of all 11 | members and strive to create an environment where everyone feels respected 12 | and safe. 13 | 14 | This code of conduct applies to all spaces managed by the NetIn group, 15 | including all public and private mailing lists, issue trackers, wikis, 16 | and any other communication channel used by our community. 17 | 18 | This code of conduct should be honored by everyone who participates in 19 | the NetIn community formally or informally, or claims any affiliation with the 20 | project, in any project-related activities and especially when representing the 21 | project, in any role. 22 | 23 | This code is not exhaustive or complete. It serves to distill our common 24 | understanding of a collaborative, shared environment and goals. Please try to 25 | follow this code in spirit as much as in letter, to create a friendly and 26 | productive environment that enriches the surrounding community. 27 | 28 | Specific Guidelines 29 | ------------------- 30 | 31 | We strive to: 32 | 33 | 1. Be open. We invite anyone to participate in our community. We prefer to use 34 | public methods of communication for project-related messages, unless 35 | discussing something sensitive. This applies to messages for help or 36 | project-related support, too; not only is a public support request much more 37 | likely to result in an answer to a question, it also ensures that any 38 | inadvertent mistakes in answering are more easily detected and corrected. 39 | 40 | 2. Be empathetic, welcoming, friendly, and patient. We work together to resolve 41 | conflict, and assume good intentions. We may all experience some frustration 42 | from time to time, but we do not allow frustration to turn into a personal 43 | attack. A community where people feel uncomfortable or threatened is not a 44 | productive one. 45 | 46 | 3. Be collaborative. Our work will be used by other people, and in turn we will 47 | depend on the work of others. When we make something for the benefit of the 48 | project, we are willing to explain to others how it works, so that they can 49 | build on the work to make it even better. Any decision we make will affect 50 | users and colleagues, and we take those consequences seriously when making 51 | decisions. 52 | 53 | 4. Be inquisitive. Nobody knows everything! Asking questions early avoids many 54 | problems later, so we encourage questions, although we may direct them to 55 | the appropriate forum. We will try hard to be responsive and helpful. 56 | 57 | 5. Be careful in the words that we choose. We are careful and respectful in 58 | our communication and we take responsibility for our own speech. Be kind to 59 | others. Do not insult or put down other participants. We will not accept 60 | harassment or other exclusionary behaviour, such as: 61 | 62 | - Violent threats or language directed against another person. 63 | - Sexist, racist, or otherwise discriminatory jokes and language. 64 | - Posting sexually explicit or violent material. 65 | - Posting (or threatening to post) other people's personally identifying information ("doxing"). 66 | - Sharing private content, such as emails sent privately or non-publicly, 67 | or unlogged forums such as IRC channel history, without the sender's consent. 68 | - Personal insults, especially those using racist or sexist terms. 69 | - Unwelcome sexual attention. 70 | - Excessive profanity. Please avoid swearwords; people differ greatly in their sensitivity to swearing. 71 | - Repeated harassment of others. In general, if someone asks you to stop, then stop. 72 | - Advocating for, or encouraging, any of the above behaviour. 73 | 74 | Diversity Statement 75 | ------------------- 76 | As contributors and maintainers of this project, we pledge to respect all people who contribute to the project, regardless of their race, ethnicity, national origin, sexual orientation, gender identity, age, religion, or any other personal characteristic. 77 | 78 | We will not tolerate any form of harassment, intimidation, or discrimination, whether it is verbal, physical, or online. This includes but is not limited to: 79 | 80 | Offensive comments related to race, ethnicity, national origin, sexual orientation, gender identity, age, religion, or any other personal characteristic 81 | Sexual or discriminatory imagery 82 | Deliberate intimidation, stalking, or following 83 | Bullying or trolling 84 | Spamming or sustained disruption of discussions 85 | We will work to ensure that everyone in our community feels safe and comfortable participating in our project. We will listen to feedback and concerns from community members, and we will take appropriate action to address any violations of this code of conduct. 86 | 87 | 88 | Reporting Guidelines 89 | -------------------- 90 | If you experience or witness any behavior that violates this code of conduct, 91 | please report it to the project maintainers at netin.conduct@gmail.com. 92 | All reports will be taken seriously and investigated promptly. 93 | 94 | Thank you for your contributions and your commitment to creating a positive and inclusive community for all. 95 | 96 | 97 | Endnotes 98 | -------- 99 | 100 | This document is adapted from: 101 | 102 | - `NetworkX Code of Conduct `_ -------------------------------------------------------------------------------- /netin/generators/patc.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List, Dict, Tuple, Any 2 | 3 | import numpy as np 4 | 5 | from netin.utils import constants as const 6 | from .pa import PA 7 | from .g_tc import GraphTC 8 | 9 | 10 | class PATC(GraphTC, PA): 11 | """Creates a new PATC instance. An undirected graph with preferential attachment and triadic closure. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | k: int 19 | minimum degree of nodes (minimum=1) 20 | 21 | f_m: float 22 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 23 | 24 | tc: float 25 | probability of a new edge to close a triad (minimum=0, maximum=1.) 26 | 27 | seed: object 28 | seed for random number generator 29 | 30 | Notes 31 | ----- 32 | The initialization is an undirected graph with n nodes and no edges. 33 | Then, everytime a node is selected as source, it gets connected to k target nodes. 34 | Target nodes are selected via preferential attachment `in-degree` [BarabasiAlbert1999]_, or 35 | triadic closure `tc` [HolmeKim2002]_. 36 | """ 37 | 38 | ############################################################ 39 | # Constructor 40 | ############################################################ 41 | 42 | def __init__(self, n: int, k: int, f_m: float, tc: float, seed: object = None): 43 | PA.__init__(self, n=n, k=k, f_m=f_m, seed=seed) 44 | GraphTC.__init__(self, n=n, k=k, f_m=f_m, tc=tc, seed=seed) 45 | self.model_name = const.PATC_MODEL_NAME 46 | 47 | ############################################################ 48 | # Init 49 | ############################################################ 50 | 51 | def validate_parameters(self): 52 | """ 53 | Validates the parameters of the undirected. 54 | """ 55 | PA.validate_parameters(self) 56 | GraphTC.validate_parameters(self) 57 | 58 | def get_metadata_as_dict(self) -> Dict[str, Any]: 59 | """ 60 | Returns the metadata information (input parameters of the model) of the graph as a dictionary. 61 | 62 | Returns 63 | ------- 64 | dict 65 | Dictionary with the graph's metadata 66 | """ 67 | obj1 = PA.get_metadata_as_dict(self) 68 | obj2 = GraphTC.get_metadata_as_dict(self) 69 | obj1.update(obj2) 70 | return obj1 71 | 72 | ############################################################ 73 | # Generation 74 | ############################################################ 75 | 76 | def info_params(self): 77 | """ 78 | Shows the parameters of the model. 79 | """ 80 | PA.info_params(self) 81 | GraphTC.info_params(self) 82 | 83 | def get_target_probabilities_regular(self, source: int, 84 | target_list: List[int]) -> \ 85 | Tuple[np.array, List[int]]: 86 | """ 87 | Returns the probabilities of selecting a target node from a set of nodes based on the preferential attachment. 88 | 89 | Parameters 90 | ---------- 91 | source: int 92 | source node 93 | 94 | target_list: set[int] 95 | set of target nodes 96 | 97 | special_targets: object 98 | special available_nodes 99 | 100 | Returns 101 | ------- 102 | Tuple[np.array, set[int]] 103 | probabilities of selecting a target node from a set of nodes, and the set of target nodes 104 | 105 | See Also 106 | -------- 107 | :py:meth:`get_target_probabilities() ` in :class:`netin.PA`. 108 | """ 109 | return PA.get_target_probabilities(self, source, target_list) 110 | 111 | ############################################################ 112 | # Calculations 113 | ############################################################ 114 | 115 | def makecopy(self): 116 | """ 117 | Makes a copy of the current object. 118 | """ 119 | return self.__class__(n=self.n, 120 | k=self.k, 121 | f_m=self.f_m, 122 | tc=self.tc, 123 | seed=self.seed) 124 | 125 | @staticmethod 126 | def fit(g, n=None, k=None, seed=None): 127 | """ 128 | It fits the PATC model to the given graph. 129 | 130 | Parameters 131 | ---------- 132 | g: netin.UnDiGraph 133 | graph to fit the model to 134 | 135 | n: int 136 | number of nodes to override (e.g., to generate a smaller network) 137 | 138 | k: int 139 | minimum node degree to override (e.g., to generate a denser network ``k>1``) 140 | 141 | seed: object 142 | seed for random number generator 143 | 144 | Returns 145 | ------- 146 | netin.PATC 147 | fitted model 148 | """ 149 | n = n or g.number_of_nodes() 150 | k = k or g.calculate_minimum_degree() 151 | f_m = g.calculate_fraction_of_minority() 152 | tc = infer_triadic_closure(g) 153 | 154 | new_g = PATC(n=n, 155 | k=k, 156 | f_m=f_m, 157 | tc=tc, 158 | seed=seed) 159 | new_g.generate() 160 | 161 | return new_g 162 | 163 | def infer_triadic_closure(g): 164 | import networkx as nx 165 | return nx.average_clustering(g) 166 | -------------------------------------------------------------------------------- /netin/generators/dpah.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.generators.h import Homophily 6 | from netin.utils import constants as const 7 | from .dpa import DPA 8 | 9 | 10 | class DPAH(DPA, Homophily): 11 | """Creates a new DPAH instance. A directed graph with preferential attachment and homophily. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | d: float 19 | edge density (minimum=0, maximum=1) 20 | 21 | f_m: float 22 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 23 | 24 | plo_M: float 25 | activity (out-degree power law exponent) majority group (minimum=1) 26 | 27 | plo_m: float 28 | activity (out-degree power law exponent) minority group (minimum=1) 29 | 30 | h_MM: float 31 | homophily within majority group (minimum=0, maximum=1) 32 | 33 | h_mm: float 34 | homophily within minority group (minimum=0, maximum=1) 35 | 36 | seed: object 37 | seed for random number generator 38 | 39 | Notes 40 | ----- 41 | The initialization is a directed graph with n nodes where f_m are the minority. 42 | Source nodes are selected based on their activity given by plo_M (if majority) or plo_m (if minority). 43 | Target nodes are selected via preferential attachment (in-degree) an homophily (h**). 44 | This model is based on [Espin-Noboa2022]_ which is the directed version of the "BA Homophily" model [Karimi2018]_. 45 | """ 46 | 47 | ############################################################ 48 | # Constructor 49 | ############################################################ 50 | 51 | def __init__(self, n: int, d: float, f_m: float, plo_M: float, plo_m: float, h_MM: float, h_mm: float, 52 | seed: object = None): 53 | DPA.__init__(self, n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 54 | Homophily.__init__(self, n=n, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 55 | self.model_name = const.DPAH_MODEL_NAME 56 | 57 | ############################################################ 58 | # Generation 59 | ############################################################ 60 | 61 | def initialize(self, class_attribute: str = 'm', class_values: list = None, class_labels: list = None): 62 | """ 63 | Initializes the model. 64 | 65 | Parameters 66 | ---------- 67 | class_attribute: str 68 | name of the attribute that represents the class 69 | 70 | class_values: list 71 | values of the class attribute 72 | 73 | class_labels: list 74 | labels of the class attribute mapping the class_values. 75 | """ 76 | DPA.initialize(self, class_attribute, class_values, class_labels) 77 | Homophily.initialize(self, class_attribute, class_values, class_labels) 78 | 79 | def get_target_probabilities(self, source: int, available_nodes: Union[None, list[int]]) -> np.array: 80 | """ 81 | Returns the probabilities of selecting a target node from a set of nodes based on 82 | preferential attachment and homophily, i.e., in-degree or target and homophily between source and target. 83 | 84 | Parameters 85 | ---------- 86 | source: int 87 | source node 88 | 89 | available_nodes: Set[int] 90 | set of target nodes 91 | 92 | Returns 93 | ------- 94 | np.array 95 | probabilities of selecting a target node from a set of nodes 96 | """ 97 | probs = np.array([self.get_homophily_between_source_and_target(source, target) * 98 | (self.get_in_degree(target) + const.EPSILON) for target in available_nodes]) 99 | probs /= probs.sum() 100 | return probs 101 | 102 | ############################################################ 103 | # Calculations 104 | ############################################################ 105 | 106 | def info_params(self): 107 | """ 108 | Shows the (input) parameters of the model. 109 | """ 110 | DPA.info_params(self) 111 | Homophily.info_params(self) 112 | 113 | def info_computed(self): 114 | """ 115 | Shows the (computed) properties of the graph. 116 | """ 117 | DPA.info_computed(self) 118 | Homophily.info_computed(self) 119 | 120 | def infer_homophily_values(self) -> tuple[float, float]: 121 | """ 122 | Infers the level of homophily within the majority and minority groups analytically. 123 | 124 | Returns 125 | ------- 126 | Tuple[float, float] 127 | homophily within the majority and minority groups 128 | """ 129 | h_MM, h_mm = None, None 130 | return h_MM, h_mm 131 | 132 | def makecopy(self): 133 | """ 134 | Makes a copy of the current object. 135 | """ 136 | obj = self.__class__(n=self.n, 137 | d=self.d, 138 | f_m=self.f_m, 139 | plo_M=self.plo_M, 140 | plo_m=self.plo_m, 141 | h_MM=self.h_MM, 142 | h_mm=self.h_mm, 143 | seed=self.seed) 144 | 145 | # @TODO: check if this is necessary 146 | # obj.initialize(class_attribute=self.class_attribute, 147 | # class_values=self.class_values, 148 | # class_labels=self.class_labels) 149 | return obj 150 | -------------------------------------------------------------------------------- /netin/generators/g_tc.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import Union, Dict, Any, List, Tuple 3 | 4 | import numpy as np 5 | 6 | from netin.generators.undirected import UnDiGraph 7 | from netin.generators.tc import TriadicClosure 8 | from netin.utils import constants as const 9 | 10 | 11 | class GraphTC(UnDiGraph, TriadicClosure): 12 | """Abstract base class for undirected triadic closure graphs. 13 | 14 | Parameters 15 | ---------- 16 | n: int 17 | number of nodes (minimum=2) 18 | 19 | k: int 20 | minimum degree of nodes (minimum=1) 21 | 22 | f_m: float 23 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 24 | 25 | tc: float 26 | probability of a new edge to close a triad (minimum=0, maximum=1.) 27 | 28 | tc_uniform: bool 29 | specifies whether the triadic closure target is chosen uniform at random or if it follows the regular link formation mechanisms (e.g., homophily) (default=True) 30 | 31 | Notes 32 | ----- 33 | The initialization is an undirected graph with n nodes and no edges. 34 | The first `k` nodes are marked as active. 35 | Then, every node, starting from `k+1` is selected as source in order of their id. 36 | When selected as source, a node connects to `k` active target nodes which were chosen as sources before. 37 | Target nodes are selected via any link formation mechanism (to be implemented in sub-classes) with probability ``1-p_{TC}``, 38 | and with probability ``p_{TC}`` via triadic closure (see :class:`netin.TriadicClosure`) [HolmeKim2002]_. 39 | Afterwards, the source node is flagged as being active (so that subsequent sources can connect to it). 40 | 41 | Note that this model is still work in progress and not fully implemented yet. 42 | """ 43 | 44 | ############################################################ 45 | # Constructor 46 | ############################################################ 47 | 48 | def __init__(self, n: int, k: int, f_m: float, tc: float, tc_uniform: bool = True, 49 | seed: object = None): 50 | UnDiGraph.__init__(self, n, k, f_m, seed) 51 | TriadicClosure.__init__(self, n=n, f_m=f_m, tc=tc, seed=seed) 52 | self.tc_uniform = tc_uniform 53 | self.model_name = const.TCH_MODEL_NAME 54 | 55 | def get_metadata_as_dict(self) -> Dict[str, Any]: 56 | """ 57 | Returns the metadata (parameters) of the model as a dictionary. 58 | 59 | Returns 60 | ------- 61 | dict 62 | metadata of the model 63 | """ 64 | obj = UnDiGraph.get_metadata_as_dict(self) 65 | obj.update({ 66 | 'tc_uniform': self.tc_uniform 67 | }) 68 | return obj 69 | 70 | ############################################################ 71 | # Generation 72 | ############################################################ 73 | 74 | def get_target_probabilities(self, source: int, available_nodes: List[int]) -> Tuple[np.array, List[int]]: 75 | """ 76 | Returns the probabilities of nodes to be selected as target nodes. 77 | 78 | Parameters 79 | ---------- 80 | source: int 81 | source node id 82 | 83 | available_nodes: List[int] 84 | list of available nodes to connect to 85 | 86 | Returns 87 | ------- 88 | Tuple[np.ndarray, List[int]] 89 | probabilities of nodes to be selected as target nodes, and list of target of nodes 90 | 91 | """ 92 | tc_prob = np.random.random() 93 | 94 | if source != self._node_source_curr: 95 | self.init_special_targets(source) 96 | 97 | if tc_prob < self.tc and len(self._tc_candidates) > 0: 98 | if not self.tc_uniform: 99 | # Triadic closure is uniform 100 | return self.get_target_probabilities_regular( 101 | source, 102 | list(self._tc_candidates.keys())) 103 | return TriadicClosure\ 104 | .get_target_probabilities(self, source, available_nodes) 105 | 106 | # Edge is added based on regular mechanism (not triadic closure) 107 | return self.get_target_probabilities_regular(source, available_nodes) 108 | 109 | @abstractmethod 110 | def get_target_probabilities_regular(self, source: int, target_list: List[int]) -> \ 111 | Tuple[np.ndarray, List[int]]: 112 | raise NotImplementedError 113 | 114 | ############################################################ 115 | # Calculations 116 | ############################################################ 117 | 118 | def info_params(self): 119 | """ 120 | Shows the (input) parameters of the graph. 121 | """ 122 | print('tc_uniform: {}'.format(self.tc_uniform)) 123 | TriadicClosure.info_params(self) 124 | 125 | def info_computed(self): 126 | """ 127 | Shows the (computed) properties of the graph. 128 | """ 129 | TriadicClosure.info_computed(self) 130 | 131 | 132 | def infer_triadic_closure(self) -> float: 133 | """ 134 | Infers analytically the triadic closure value of the graph. 135 | @TODO: This still needs to be implemented. 136 | Returns 137 | ------- 138 | float 139 | triadic closure probability of the graph 140 | """ 141 | raise NotImplementedError("Inferring triadic closure probability not implemented yet.") 142 | 143 | def makecopy(self): 144 | """ 145 | Makes a copy of the current object. 146 | """ 147 | return self.__class__(n=self.n, 148 | k=self.k, 149 | f_m=self.f_m, 150 | tc=self.tc, 151 | seed=self.seed) 152 | -------------------------------------------------------------------------------- /netin/stats/distributions.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Set, List, Tuple 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import powerlaw 6 | 7 | 8 | def get_pdf(df: pd.DataFrame, x: str, total: float) -> Tuple[np.ndarray, np.ndarray]: 9 | """Computes the probability density of the input data. 10 | 11 | Parameters 12 | ---------- 13 | df : pd.DataFrame 14 | DataFrame that contains the data. 15 | x : str 16 | The column name of the data. 17 | total : float 18 | The total amount by which to normalize the data. 19 | 20 | Returns 21 | ------- 22 | Tuple[np.ndarray, np.ndarray] 23 | Two arrays holding the x values and y values (their probability). 24 | """ 25 | values = df.groupby(x).size() 26 | xs = values.index.values 27 | ys = values.values / total 28 | return xs, ys 29 | 30 | 31 | def get_cdf(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, np.ndarray): 32 | """Computes the cumulative distribution CDF of the input data. 33 | 34 | Parameters 35 | ---------- 36 | df : pd.DataFrame 37 | DataFrame that contains the data. 38 | x : str 39 | The column name of the data. 40 | total : float 41 | The total amount by which to normalize the data. (not used here) 42 | 43 | Returns 44 | ------- 45 | Tuple[np.ndarray, np.ndarray] 46 | Two arrays holding the x values and the y values (CDF) 47 | """ 48 | xs = np.sort(df[x].values) 49 | n = xs.size 50 | ys = np.arange(1, n + 1) / n 51 | return xs, ys 52 | 53 | 54 | def get_ccdf(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, np.ndarray): 55 | """Computes the complementary cumulative distribution CCDF of the input data. 56 | 57 | Parameters 58 | ---------- 59 | df : pd.DataFrame 60 | DataFrame that contains the data. 61 | x : str 62 | The column name of the data. 63 | total : float 64 | The total amount by which to normalize the data. 65 | 66 | Returns 67 | ------- 68 | Tuple[np.ndarray, np.ndarray] 69 | Two arrays holding the x values and the y values (CCDF) 70 | """ 71 | xs, ys = get_cdf(df, x, total) 72 | return xs, 1 - ys 73 | 74 | 75 | def get_disparity(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, np.ndarray): 76 | """Computes the disparity of the input data given by the column `x`. 77 | 78 | Parameters 79 | ---------- 80 | df: pd.DataFrame 81 | DataFrame that contains the data. 82 | 83 | x: str 84 | The column name of the data. 85 | 86 | total: float 87 | The total amount by which to normalize the data. (not used here) 88 | 89 | Returns 90 | ------- 91 | Tuple[np.ndarray, np.ndarray] 92 | Two arrays holding the x values (ranking) and the y values (disparity) 93 | """ 94 | from netin.stats import ranking 95 | from netin.utils import constants as const 96 | 97 | gx, gy = get_gini_coefficient(df, x, total) 98 | fx, fy = get_fraction_of_minority(df, x, total) 99 | f_m = df.query("class_label == @const.MINORITY_LABEL").shape[0] / df.shape[0] 100 | 101 | inequality_y = ranking.get_ranking_inequality(gy) 102 | inequity_x = ranking.get_ranking_inequity(f_m, fy) 103 | return inequity_x, inequality_y 104 | 105 | 106 | def get_fraction_of_minority(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, np.ndarray): 107 | """Computes the fraction of minority in each top-k rank. 108 | 109 | Parameters 110 | ---------- 111 | df: pd.DataFrame 112 | DataFrame that contains the data. 113 | 114 | x: str 115 | The column name of the data. 116 | 117 | total: float 118 | The total amount by which to normalize the data. (not used here) 119 | 120 | Returns 121 | ------- 122 | Tuple[np.ndarray, np.ndarray] 123 | Two arrays holding the x values (ranking) and the y values (fraction of minority) 124 | """ 125 | from netin.stats import ranking 126 | xs, ys = ranking.get_fraction_of_minority_in_ranking(df, x) 127 | return xs, ys 128 | 129 | 130 | def get_gini_coefficient(df: pd.DataFrame, x: str, total: float = None) -> (np.ndarray, np.ndarray): 131 | """Computes the Gini coefficient of the distribution in each top-k rank. 132 | 133 | Parameters 134 | ---------- 135 | df: pd.DataFrame 136 | DataFrame that contains the data. 137 | 138 | x: str 139 | The column name of the data. 140 | 141 | total: float 142 | The total amount by which to normalize the data. (not used here) 143 | 144 | Returns 145 | ------- 146 | Tuple[np.ndarray, np.ndarray] 147 | Two arrays holding the x values (ranking) and the y values (Gini coefficient) 148 | """ 149 | from netin.stats import ranking 150 | xs, ys = ranking.get_gini_in_ranking(df, x) 151 | return xs, ys 152 | 153 | 154 | def fit_power_law(data: Union[np.array, Set, List], discrete: bool = True, 155 | xmin: Union[None, int, float] = None, xmax: Union[None, int, float] = None, **kwargs) -> powerlaw.Fit: 156 | """Fits a power-law of a given distribution. 157 | 158 | Parameters 159 | ---------- 160 | data: Union[np.array, Set, List] 161 | The data to fit. 162 | 163 | discrete: bool 164 | Whether the data is discrete or not. 165 | 166 | xmin: Union[None, int, float] 167 | The minimum value of the data. 168 | 169 | xmax: Union[None, int, float] 170 | The maximum value of the data. 171 | 172 | kwargs: dict 173 | Additional arguments to pass to the powerlaw.Fit constructor. 174 | 175 | Returns 176 | ------- 177 | powerlaw.Fit 178 | The fitted power-law. 179 | """ 180 | fit = powerlaw.Fit(data, discrete=discrete, xmax=xmax, xmin=xmin, **kwargs) 181 | return fit 182 | -------------------------------------------------------------------------------- /netin/generators/dh.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.generators.directed import DiGraph 6 | from netin.generators.h import Homophily 7 | from netin.utils import constants as const 8 | 9 | 10 | class DH(DiGraph, Homophily): 11 | """ Creates a new DH instance. A directed graph with homophily. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | d: float 19 | edge density (minimum=0, maximum=1) 20 | 21 | f_m: float 22 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 23 | 24 | plo_M: float 25 | activity (out-degree power law exponent) majority group (minimum=1) 26 | 27 | plo_m: float 28 | activity (out-degree power law exponent) minority group (minimum=1) 29 | 30 | h_MM: float 31 | homophily within majority group (minimum=0, maximum=1) 32 | 33 | h_mm: float 34 | homophily within minority group (minimum=0, maximum=1) 35 | 36 | seed: object 37 | seed for random number generator 38 | 39 | Notes 40 | ----- 41 | The initialization is a directed graph with n nodes and no edges. 42 | Source nodes are selected based on their activity given by plo_M (if majority) or plo_m (if minority). 43 | Target nodes are selected via homophily, see [Espin-Noboa2022]_. 44 | """ 45 | 46 | ############################################################ 47 | # Constructor 48 | ############################################################ 49 | 50 | def __init__(self, n: int, d: float, f_m: float, plo_M: float, plo_m: float, h_MM: float, h_mm: float, 51 | seed: object = None): 52 | DiGraph.__init__(self, n=n, d=d, f_m=f_m, plo_M=plo_M, plo_m=plo_m, seed=seed) 53 | Homophily.__init__(self, n=n, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 54 | self.model_name = const.DH_MODEL_NAME 55 | 56 | ############################################################ 57 | # Generation 58 | ############################################################ 59 | 60 | def initialize(self, class_attribute: str = 'm', class_values: list = None, class_labels: list = None): 61 | """ 62 | Initializes the model. 63 | 64 | Parameters 65 | ---------- 66 | class_attribute: str 67 | name of the attribute that represents the class 68 | 69 | class_values: list 70 | values of the class attribute 71 | 72 | class_labels: list 73 | labels of the class attribute mapping the class_values. 74 | """ 75 | DiGraph.initialize(self, class_attribute, class_values, class_labels) 76 | Homophily.initialize(self, class_attribute, class_values, class_labels) 77 | 78 | def get_target_probabilities(self, source: int, available_nodes: Union[None, list[int], np.array]) -> np.array: 79 | """ 80 | Returns the probabilities of the target nodes to be selected given a source node. 81 | 82 | Parameters 83 | ---------- 84 | source: int 85 | source node (id) 86 | 87 | available_nodes: set 88 | set of target nodes (ids) 89 | 90 | special_targets: object 91 | special available_nodes 92 | 93 | Returns 94 | ------- 95 | probs: np.array 96 | probabilities of the target nodes to be selected 97 | 98 | """ 99 | probs, ts = Homophily.get_target_probabilities(self, source, available_nodes) 100 | return probs 101 | 102 | ############################################################ 103 | # Calculations 104 | ############################################################ 105 | 106 | def info_params(self): 107 | """ 108 | Shows the parameters of the model. 109 | """ 110 | DiGraph.info_params(self) 111 | Homophily.info_params(self) 112 | 113 | def info_computed(self): 114 | """ 115 | Shows the computed properties of the graph. 116 | """ 117 | Homophily.info_computed(self) 118 | 119 | def infer_homophily_values(self) -> tuple[float, float]: 120 | """ 121 | Infers analytically the homophily values for the majority and minority classes. 122 | 123 | Returns 124 | ------- 125 | h_MM: float 126 | homophily within majority group 127 | 128 | h_mm: float 129 | homophily within minority group 130 | """ 131 | from sympy import symbols 132 | from sympy import Eq 133 | from sympy import solve 134 | 135 | f_m = self.calculate_fraction_of_minority() 136 | f_M = 1 - f_m 137 | 138 | e = self.calculate_edge_type_counts() 139 | e_MM = e['MM'] 140 | e_mm = e['mm'] 141 | e_Mm = e['Mm'] 142 | e_mM = e['mM'] 143 | 144 | p_MM = e_MM / (e_MM + e_Mm) 145 | p_mm = e_mm / (e_mm + e_mM) 146 | 147 | # equations 148 | hmm, hMM, hmM, hMm = symbols('hmm hMM hmM hMm') 149 | eq1 = Eq((f_m * hmm) / ((f_m * hmm) + (f_M * hmM)), p_mm) 150 | eq2 = Eq(hmm + hmM, 1) 151 | 152 | eq3 = Eq((f_M * hMM) / ((f_M * hMM) + (f_m * hMm)), p_MM) 153 | eq4 = Eq(hMM + hMm, 1) 154 | 155 | solution = solve((eq1, eq2, eq3, eq4), (hmm, hmM, hMM, hMm)) 156 | h_MM, h_mm = solution[hMM], solution[hmm] 157 | return h_MM, h_mm 158 | 159 | def makecopy(self): 160 | """ 161 | Makes a copy of the current object. 162 | """ 163 | return self.__class__(n=self.n, 164 | d=self.d, 165 | f_m=self.f_m, 166 | plo_M=self.plo_M, 167 | plo_m=self.plo_m, 168 | h_MM=self.h_MM, 169 | h_mm=self.h_mm, 170 | seed=self.seed) 171 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | NetIn 2 | ======== 3 | 4 | .. image:: https://github.com/CSHVienna/NetworkInequalities/blob/main/docs/source/netin-logo.png?raw=true 5 | :width: 100 6 | :alt: NetworkInequality 7 | 8 | NetIn is a python package for network inference. 9 | It is based on the NetworkX package and provides a set of methods to study network inequalities. 10 | The package is currently under development and will be updated regularly. 11 | 12 | .. image:: https://github.com/CSHVienna/NetworkInequalities/actions/workflows/python-app.yml/badge.svg 13 | :target: https://github.com/CSHVienna/NetworkInequalities/actions/workflows/python-app.yml 14 | 15 | .. image:: https://img.shields.io/badge/python-3.9-blue.svg 16 | :target: https://www.python.org/downloads/release/python-3916/ 17 | 18 | .. image:: https://img.shields.io/badge/NetworkX-3.1-blue.svg 19 | :target: https://networkx.org/ 20 | 21 | .. image:: https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg 22 | :target: https://creativecommons.org/licenses/by-nc-sa/4.0/ 23 | 24 | .. image:: https://static.pepy.tech/personalized-badge/netin?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads 25 | :target: https://pepy.tech/project/netin 26 | 27 | - **Website:** https://www.networkinequality.com 28 | - **Documentation:** https://cshvienna.github.io/NetworkInequalities 29 | - **Source:** https://github.com/CSHVienna/NetworkInequalities 30 | - **Bug reports:** https://github.com/CSHVienna/NetworkInequalities/issues 31 | - **GitHub Discussions:** https://github.com/CSHVienna/NetworkInequalities/discussions 32 | - **Mailing list:** https://groups.google.com/forum/#!forum/netin-dev 33 | 34 | Simple examples 35 | --------------- 36 | 37 | Create an undirected network with preferential attachment and homophily. 38 | 39 | .. code:: pycon 40 | 41 | >>> from netin import PAH 42 | >>> G = PAH(n=200, k=2, f_m=0.2, h_MM=0.1, h_mm=0.9, seed=42) 43 | >>> G.generate() 44 | >>> G.info() 45 | 46 | 47 | Create a directed network with preferential attachment and homophily. 48 | 49 | .. code:: pycon 50 | 51 | >>> from netin import DPAH 52 | >>> G = DPAH(n=200, f_m=0.2, d=0.02, h_MM=0.1, h_mm=0.6, plo_M=2.0, plo_m=2.0, seed=42) 53 | >>> G.generate() 54 | >>> G.info() 55 | 56 | Install 57 | ------- 58 | 59 | Install the latest version of NetIn:: 60 | 61 | $ pip install netin 62 | 63 | 64 | Install from source:: 65 | 66 | $ git clone https://github.com/CSHVienna/NetworkInequalities 67 | $ cd NetworkInequalities 68 | $ pip install -e . 69 | 70 | 71 | Bugs 72 | ---- 73 | 74 | Please report any bugs that you find `here `_. 75 | Or, even better, fork the repository on `GitHub `_ 76 | and create a pull request (PR). We welcome all changes, big or small, and we 77 | will help you make the PR if you are new to `git`. 78 | 79 | License 80 | ------- 81 | 82 | Released under Creative Commons by-nc-sa 4.0 (see `LICENSE`):: 83 | 84 | Copyright (C) 2023-2024 NetIn Developers 85 | Fariba Karimi 86 | Lisette Espin-Noboa 87 | Jan Bachmann 88 | 89 | How to cite 90 | ----------- 91 | 92 | If you use any implementation from this repository, please cite both the repository and the corresponding paper as follows: 93 | 94 | 1. Citing the GitHub repository 95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 96 | 97 | .. code-block:: bibtex 98 | 99 | @software{pynetin, 100 | author = {{CSH Algorithmic Fairness and Network Inequality Group}}, 101 | title = {{NetworkInequalities}}, 102 | year = {2023}, 103 | publisher = {GitHub}, 104 | journal = {GitHub repository}, 105 | howpublished = {\url{https://github.com/CSHVienna/NetworkInequalities}} 106 | } 107 | 108 | 109 | 2. Citing the models from the original papers 110 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 111 | 112 | PAH model 113 | ^^^^^^^^^ 114 | 115 | .. code-block:: bibtex 116 | 117 | @article{karimi2018homophily, 118 | title={Homophily influences ranking of minorities in social networks}, 119 | author={Karimi, Fariba and G{\'e}nois, Mathieu and Wagner, Claudia and Singer, Philipp and Strohmaier, Markus}, 120 | journal={Scientific reports}, 121 | volume={8}, 122 | number={1}, 123 | pages={11077}, 124 | year={2018}, 125 | publisher={Nature Publishing Group UK London} 126 | } 127 | 128 | 129 | DPAH, DPA, DH models 130 | ^^^^^^^^^^^^^^^^^^^^^ 131 | 132 | .. code-block:: bibtex 133 | 134 | @article{espin2022inequality, 135 | title={Inequality and inequity in network-based ranking and recommendation algorithms}, 136 | author={Esp{\'\i}n-Noboa, Lisette and Wagner, Claudia and Strohmaier, Markus and Karimi, Fariba}, 137 | journal={Scientific reports}, 138 | volume={12}, 139 | number={1}, 140 | pages={2012}, 141 | year={2022}, 142 | publisher={Nature Publishing Group UK London} 143 | } 144 | 145 | PATCH model 146 | ^^^^^^^^^^^ 147 | 148 | .. code-block:: bibtex 149 | 150 | @unpublished{bachmann2025patch, 151 | author = {Bachmann, Jan and Esp{\'i}n-Noboa, Lisette and Cinardi, Nicola and Martin-Gutierrez, Samuel and Karimi, Fariba}, 152 | title = {PATCH: Network Inequality through Preferential Attachment, Triadic Closure and Homophily}, 153 | year = {2025}, 154 | note = {Work in progress}, 155 | } 156 | 157 | Thank you for citing our work! 🚀 158 | 159 | 160 | 161 | Note on multidimensional interactions 162 | ---------------------------------------------------- 163 | Provisionally, the code to simulate and analyze networks with multidimensional interactions is hosted in the `repository `_ associated with the `paper `_ [Martin-Gutierrez et al. 2024]. 164 | 165 | -------------------------------------------------------------------------------- /netin/generators/patch.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Dict, Any, List, Tuple 2 | 3 | import numpy as np 4 | 5 | from netin.utils import constants as const 6 | from .pah import PAH 7 | from .g_tc import GraphTC 8 | 9 | 10 | class PATCH(GraphTC, PAH): 11 | """Creates a new PATCH instance. An undirected graph with preferential attachment, homophily, and triadic closure. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | k: int 19 | minimum degree of nodes (minimum=1) 20 | 21 | f_m: float 22 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 23 | 24 | h_MM: float 25 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 26 | 27 | h_mm: float 28 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 29 | 30 | tc: float 31 | probability of a new edge to close a triad (minimum=0, maximum=1.) 32 | 33 | tc_uniform: bool 34 | specifies whether the triadic closure target is chosen uniform at random or if it follows the regular link formation mechanisms (e.g., homophily) (default=True) 35 | 36 | Notes 37 | ----- 38 | The initialization is an undirected graph with n nodes and no edges. 39 | Then, everytime a node is selected as source, it gets connected to k target nodes. 40 | Target nodes are selected via preferential attachment (in-degree) [BarabasiAlbert1999]_ and 41 | homophily (h_**; see :class:`netin.Homophily`) [Karimi2018]_ with probability ``1-p_{TC}``, 42 | and with probability ``p_{TC}`` via triadic closure (see :class:`netin.GraphTC`) [HolmeKim2002]_. 43 | 44 | Note that this model is still work in progress and not fully implemented yet. 45 | """ 46 | 47 | ############################################################ 48 | # Constructor 49 | ############################################################ 50 | 51 | def __init__(self, n: int, k: int, f_m: float, h_mm: float, h_MM: float, tc: float, tc_uniform: bool = True, seed: object = None): 52 | PAH.__init__(self, n=n, k=k, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 53 | GraphTC.__init__(self, n=n, k=k, f_m=f_m, tc=tc, tc_uniform=tc_uniform, seed=seed) 54 | self.model_name = const.PATCH_MODEL_NAME 55 | 56 | ############################################################ 57 | # Init 58 | ############################################################ 59 | 60 | def validate_parameters(self): 61 | """ 62 | Validates the parameters of the undirected. 63 | """ 64 | PAH.validate_parameters(self) 65 | GraphTC.validate_parameters(self) 66 | 67 | def get_metadata_as_dict(self) -> Dict[str, Any]: 68 | """ 69 | Returns a dictionary with the metadata of the PATCH graph. 70 | 71 | Returns 72 | ------- 73 | dict 74 | the graph metadata as a dictionary 75 | """ 76 | obj1 = PAH.get_metadata_as_dict(self) 77 | obj2 = GraphTC.get_metadata_as_dict(self) 78 | obj1.update(obj2) 79 | return obj1 80 | 81 | ############################################################ 82 | # Generation 83 | ############################################################ 84 | def get_target_probabilities_regular(self, source: int, 85 | target_list: List[int]) -> Tuple[np.ndarray, List[int]]: 86 | """ 87 | Returns the probability of nodes to be selected as target nodes using the 88 | preferential attachment with homophily mechanism. 89 | 90 | Parameters 91 | ---------- 92 | source: int 93 | source node id 94 | 95 | available_nodes: set 96 | set of target node ids 97 | 98 | special_targets: dict 99 | dictionary of special target node ids to be considered 100 | 101 | Returns 102 | ------- 103 | tuple 104 | probabilities of nodes to be selected as target nodes, and set of target of nodes 105 | """ 106 | return PAH.get_target_probabilities(self, source, target_list) 107 | 108 | def get_target_probabilities(self, source: int, available_nodes: List[int]) -> Tuple[Any, List[int]]: 109 | return GraphTC.get_target_probabilities(self, source, available_nodes) 110 | 111 | ############################################################ 112 | # Calculations 113 | ############################################################ 114 | 115 | def info_params(self): 116 | """ 117 | Shows the (input) parameters of the graph. 118 | """ 119 | PAH.info_params(self) 120 | GraphTC.info_params(self) 121 | 122 | def info_computed(self): 123 | """ 124 | Shows the (computed) properties of the graph. 125 | """ 126 | PAH.info_computed(self) 127 | GraphTC.info_computed(self) 128 | 129 | def infer_homophily_values(self) -> tuple[float, float]: 130 | """ 131 | Infers analytically the homophily values of the graph. 132 | 133 | Returns 134 | ------- 135 | tuple 136 | homophily values of the graph (majority, minority) 137 | """ 138 | h_MM = None 139 | h_mm = None 140 | return h_MM, h_mm 141 | 142 | def infer_triadic_closure(self) -> float: 143 | """ 144 | Infers analytically the triadic closure value of the graph. 145 | 146 | Returns 147 | ------- 148 | float 149 | triadic closure probability of the graph 150 | """ 151 | # @TODO: To be implemented 152 | raise NotImplementedError("Inferring triadic closure not implemented yet.") 153 | 154 | def makecopy(self): 155 | """ 156 | Makes a copy of the current object. 157 | """ 158 | return self.__class__(n=self.n, 159 | k=self.k, 160 | f_m=self.f_m, 161 | tc=self.tc, 162 | h_MM=self.h_MM, 163 | h_mm=self.h_mm, 164 | seed=self.seed) 165 | -------------------------------------------------------------------------------- /netin/stats/ranking.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Set, List, Tuple 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from netin.utils import constants as const 7 | 8 | 9 | ### Inequity (fraction of minority in ranking) ### 10 | 11 | def get_ranking_inequity(f_m: float, ys: np.array) -> float: 12 | """Computes ME: mean error distance between the fraction of minority in each top-k rank `f_m^k` and 13 | the fraction of minority of the entire graph `f_m`. ME is the ranking inequity of the rank. 14 | 15 | Parameters 16 | ---------- 17 | f_m: float 18 | The fraction of minority in the entire graph. 19 | 20 | ys: np.array 21 | The fraction of minority in each top-k rank. 22 | 23 | Returns 24 | ------- 25 | me: float 26 | The ranking inequity of the rank. 27 | """ 28 | me = np.mean([efm - f_m for efm in ys if not np.isnan(efm)]) 29 | return me 30 | 31 | 32 | def get_ranking_inequity_class(me: float, beta: float = None) -> str: 33 | """ 34 | Infers the inequity class (label) given the inequity measure (ME). 35 | 36 | Parameters 37 | ---------- 38 | me: float 39 | The inequity measure (ME). 40 | 41 | beta: float 42 | The threshold to determine the inequity class. 43 | 44 | Returns 45 | ------- 46 | label: str 47 | The inequity class label (i.e., fair, over-represented, under-represented). 48 | 49 | Notes 50 | ----- 51 | See :func:`get_ranking_inequity` for more details on `me`. 52 | 53 | By default, `beta=0.05`, see [Espin-Noboa2022]. 54 | """ 55 | beta = const.INEQUITY_BETA if beta is None else beta 56 | label = const.INEQUITY_OVER if me > beta else const.INEQUITY_UNDER if me < -beta else const.INEQUITY_FAIR 57 | return label 58 | 59 | 60 | def get_fraction_of_minority_in_ranking(df: pd.DataFrame, x: str) -> \ 61 | Union[Tuple[np.ndarray, np.ndarray], Tuple[list, list]]: 62 | """ 63 | Computes the fraction of minority in each top-k rank. 64 | 65 | Parameters 66 | ---------- 67 | df: pd.DataFrame 68 | DataFrame that contains the data. 69 | 70 | x: str 71 | The column name of the data. 72 | 73 | Returns 74 | ------- 75 | xs: np.ndarray 76 | The x values (ranking). 77 | 78 | ys: np.ndarray 79 | The y values (fraction of minority). 80 | """ 81 | xs = const.RANK_RANGE 82 | ys = [] 83 | for rank in xs: 84 | column = f"{x}_rank" 85 | tmp = df.query(f"{column} <= @rank").copy() 86 | total = tmp.shape[0] 87 | efm = np.nan if total == 0 else tmp.query("class_label == @const.MINORITY_LABEL").shape[0] / total 88 | ys.append(efm) 89 | return xs, ys 90 | 91 | 92 | ### Inequality (Gini coefficient) ### 93 | 94 | def get_ranking_inequality(ys: np.array) -> float: 95 | """ 96 | Returns the Gini coefficient of the entire distribution (at op-100%). 97 | 98 | Parameters 99 | ---------- 100 | ys: np.array 101 | The y values (Gini coefficients in each top-k rank). 102 | 103 | Returns 104 | ------- 105 | float 106 | The Gini coefficient of the entire distribution (at op-100%). 107 | """ 108 | gini_global = ys[-1] # top-100% 109 | return gini_global 110 | 111 | 112 | def get_ranking_inequality_class(gini_global: float, cuts: Set[float] = const.INEQUALITY_CUTS) -> str: 113 | """ 114 | Infers the inequality class label given the Gini coefficient of the entire distribution. 115 | 116 | Parameters 117 | ---------- 118 | gini_global: float 119 | The Gini coefficient of the entire distribution. 120 | 121 | cuts: Set[float] 122 | The cuts to determine the inequality class. 123 | 124 | Returns 125 | ------- 126 | label: str 127 | The inequality class label (i.e., equality, moderate, skewed) 128 | 129 | Notes 130 | ----- 131 | By default, `cuts={0.3, 0.6}`, see [Espin-Noboa2022]. 132 | """ 133 | cuts = const.INEQUALITY_CUTS if cuts is None else cuts 134 | if len(cuts) != 2 or len(set(cuts)) == 1: 135 | raise Exception("There must be two cuts for the inequality class") 136 | 137 | label = const.INEQUALITY_HIGH if gini_global >= max(cuts) \ 138 | else const.INEQUALITY_LOW if gini_global <= min(cuts) \ 139 | else const.INEQUALITY_MODERATE 140 | return label 141 | 142 | 143 | def get_gini_in_ranking(df: pd.DataFrame, x: str) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[list, list]]: 144 | """ 145 | Computes the Gini coefficient of a distribution `df[x]` in each top-k rank. 146 | 147 | Parameters 148 | ---------- 149 | df: pd.DataFrame 150 | Dataframe that contains the data. 151 | 152 | x: str 153 | The column name of the data. 154 | 155 | Returns 156 | ------- 157 | xs: np.ndarray 158 | The x values (ranking). 159 | ys: np.ndarray 160 | The y values (Gini coefficients). 161 | """ 162 | xs = const.RANK_RANGE 163 | ys = [] 164 | for rank in xs: 165 | column = f"{x}_rank" 166 | tmp = df.query(f"{column} <= @rank").copy() 167 | g = gini(tmp.loc[:, x].values) 168 | ys.append(g) 169 | return xs, ys 170 | 171 | 172 | def gini(data: np.array) -> float: 173 | """ 174 | Calculates the Gini coefficient of a distribution. 175 | 176 | Parameters 177 | ---------- 178 | data: np.array 179 | The data. 180 | 181 | Returns 182 | ------- 183 | float 184 | The Gini coefficient of the distribution. 185 | 186 | References 187 | ---------- 188 | `Gini coefficient `_ 189 | `Implementation `_ 190 | """ 191 | # https://github.com/oliviaguest/gini/blob/master/gini.py 192 | # based on bottom eq: 193 | # http://www.statsdirect.com/help/generatedimages/equations/equation154.svg 194 | # from: 195 | # http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm 196 | # All values are treated equally, arrays must be 1d: 197 | X = data.flatten().astype(np.float64) 198 | if np.amin(X) < 0: 199 | # Values cannot be negative: 200 | X -= np.amin(X) 201 | # Values cannot be 0: 202 | X += 0.0000001 203 | # Values must be sorted: 204 | X = np.sort(X) 205 | # Index per array element: 206 | index = np.arange(1, X.shape[0] + 1) 207 | # Number of array elements: 208 | n = X.shape[0] 209 | # Gini coefficient: 210 | return (np.sum((2 * index - n - 1) * X)) / (n * np.sum(X)) 211 | -------------------------------------------------------------------------------- /netin/stats/networks.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from typing import Tuple 3 | import warnings 4 | 5 | import numpy as np 6 | import networkx as nx 7 | from collections import Counter 8 | 9 | from netin.utils import constants as const 10 | from netin.utils import validator as val 11 | 12 | 13 | def get_min_degree(g: Union[nx.Graph, nx.DiGraph]) -> int: 14 | """ 15 | Returns the minimum degree of nodes in the graph. 16 | 17 | Parameters 18 | ---------- 19 | g: Union[nx.Graph, nx.DiGraph] 20 | Graph to compute the minimum degree 21 | 22 | Returns 23 | ------- 24 | int 25 | Minimum degree of nodes in the graph 26 | """ 27 | degrees = [d for n, d in g.degree] 28 | return min(degrees) if len(degrees) > 0 else -1 29 | 30 | 31 | def get_minority_fraction(g: Union[nx.Graph, nx.DiGraph], class_attribute: str = None) -> float: 32 | """ 33 | Computes the fraction of the minority class in the graph. 34 | 35 | Parameters 36 | ---------- 37 | g: Union[nx.Graph, nx.DiGraph] 38 | Graph to compute the fraction of the minority class 39 | 40 | """ 41 | n = g.number_of_nodes() 42 | majority, minority, class_attribute = _get_class_labels(g, class_attribute) 43 | minority_count = sum([1 for n, obj in g.nodes(data=True) if obj[class_attribute] == minority]) 44 | f_m = minority_count / n 45 | 46 | return f_m 47 | 48 | 49 | def get_edge_type_counts(g: Union[nx.Graph, nx.DiGraph], fractions: bool = False, 50 | class_attribute: str = None) -> Counter: 51 | """ 52 | Computes the edge type counts of the graph using the `class_attribute` of each node. 53 | 54 | Parameters 55 | ---------- 56 | g: Union[nx.Graph, nx.DiGraph] 57 | Graph to compute the edge type counts 58 | 59 | fractions: bool 60 | If True, the counts are returned as fractions of the total number of edges. 61 | 62 | class_attribute: str 63 | The name of the attribute that holds the class label of each node. 64 | 65 | Returns 66 | ------- 67 | Counter 68 | Counter holding the edge type counts 69 | 70 | Notes 71 | ----- 72 | Class labels are assumed to be binary. The minority class is assumed to be labeled as 1. 73 | """ 74 | majority, minority, class_attribute = _get_class_labels(g, class_attribute) 75 | class_values = [majority, minority] 76 | class_labels = [const.MAJORITY_LABEL, const.MINORITY_LABEL] 77 | 78 | counts = Counter([f"{class_labels[class_values.index(g.nodes[e[0]][class_attribute])]}" 79 | f"{class_labels[class_values.index(g.nodes[e[1]][class_attribute])]}" 80 | for e in g.edges if g.nodes[e[0]][class_attribute] in class_values and 81 | g.nodes[e[1]][class_attribute] in class_values]) 82 | 83 | if fractions: 84 | total = sum(counts.values()) 85 | counts = Counter({k: v / total for k, v in counts.items()}) 86 | 87 | return counts 88 | 89 | 90 | def get_average_degree(g: Union[nx.Graph, nx.DiGraph]) -> float: 91 | """ 92 | Returns the average node degree of the graph. 93 | 94 | Parameters 95 | ---------- 96 | g: Union[nx.Graph, nx.DiGraph] 97 | Graph to compute the average degree for 98 | 99 | Returns 100 | ------- 101 | float 102 | Average degree of the graph 103 | """ 104 | k = sum([d for n, d in g.degree]) / g.number_of_nodes() 105 | return k 106 | 107 | 108 | def get_average_degrees(g: Union[nx.Graph, nx.DiGraph], class_attribute: str = None) -> Tuple[float, float, float]: 109 | """ 110 | Computes and returns the average degree of the graph, the average degree of the majority and the minority class. 111 | 112 | Parameters 113 | ---------- 114 | g: Union[nx.Graph, nx.DiGraph] 115 | Graph to compute the average degree for 116 | 117 | class_attribute: str 118 | Name of the class attribute in the graph 119 | 120 | Returns 121 | ------- 122 | Tuple[float, float, float] 123 | Average degree of the graph, the average degree of the majority and the minority class 124 | """ 125 | k = get_average_degree(g) 126 | 127 | majority, minority, class_attribute = _get_class_labels(g, class_attribute) 128 | kM = np.mean([d for n, d in g.degree if g.nodes[n][class_attribute] == majority]) 129 | km = np.mean([d for n, d in g.degree if g.nodes[n][class_attribute] == minority]) 130 | 131 | return k, kM, km 132 | 133 | 134 | def get_similitude(g: Union[nx.Graph, nx.DiGraph], class_attribute: str = None) -> float: 135 | """ 136 | Computes and returns the fraction of same-class edges in the graph. 137 | 138 | Parameters 139 | ---------- 140 | g: Union[nx.Graph, nx.DiGraph] 141 | Graph to compute the similitude for 142 | 143 | class_attribute: str 144 | Name of the class attribute in the graph 145 | 146 | Returns 147 | ------- 148 | float 149 | Fraction of same-class edges in the graph 150 | """ 151 | majority, minority, class_attribute = _get_class_labels(g, class_attribute) 152 | 153 | tmp = [int(g.nodes[e[0]][class_attribute] == g.nodes[e[1]][class_attribute]) for e in g.edges] 154 | total = len(tmp) 155 | sim = sum(tmp) / total 156 | 157 | return sim 158 | 159 | 160 | def get_node_attributes(g: Union[nx.Graph, nx.DiGraph]) -> list: 161 | """ 162 | Returns the values of the class attribute for all nodes in the graph. 163 | 164 | Parameters 165 | ---------- 166 | g: Union[nx.Graph, nx.DiGraph] 167 | Graph to get the node attributes from 168 | 169 | Returns 170 | ------- 171 | list 172 | List of node attributes 173 | """ 174 | val.validate_graph_metadata(g) 175 | l = [a for n, a in nx.get_node_attributes(g, g.graph['class_attribute']).items()] 176 | return l 177 | 178 | 179 | def _get_graph_metadata_value(g: Union[nx.Graph, nx.DiGraph], key: str, default: object = None) -> Union[object, iter]: 180 | value = default if key not in g.graph or g.graph[key] is None else g.graph[key] 181 | return value 182 | 183 | 184 | def _get_class_labels(g: Union[nx.Graph, nx.DiGraph], class_attribute: str = None) -> Tuple[str, str, str]: 185 | if class_attribute: 186 | counter = Counter([obj[class_attribute] for n, obj in g.nodes(data=True)]) 187 | else: 188 | val.validate_graph_metadata(g) 189 | class_attribute = _get_graph_metadata_value(g, 'class_attribute', const.CLASS_ATTRIBUTE) 190 | counter = Counter([obj[class_attribute] for n, obj in g.nodes(data=True)]) 191 | 192 | if len(counter) > 2: 193 | warnings.warn(f'Graph contains more than two classes: {counter}') 194 | 195 | majority = counter.most_common()[0][0] 196 | minority = counter.most_common()[1][0] 197 | 198 | return majority, minority, class_attribute 199 | -------------------------------------------------------------------------------- /netin/algorithms/sampling/sampling.py: -------------------------------------------------------------------------------- 1 | ############################################ 2 | # System dependencies 3 | ############################################ 4 | from typing import List 5 | import networkx as nx 6 | import numpy as np 7 | import gc 8 | 9 | ############################################ 10 | # Local dependencies 11 | ############################################ 12 | import netin 13 | from netin.utils import validator as val 14 | from netin.stats import networks as net 15 | from . import constants as const 16 | 17 | 18 | ############################################ 19 | # Class 20 | ############################################ 21 | class Sampling(object): 22 | """Base class for sampling methods. 23 | 24 | Parameters 25 | ---------- 26 | g: netin.Graph | netin.DiGraph 27 | global network 28 | 29 | pseeds: float 30 | fraction of seeds to sample 31 | 32 | max_tries: int 33 | maximum number of tries to sample a subgraph with enough classes and edges 34 | 35 | random_seed: object 36 | seed for random number generator 37 | 38 | kwargs: dict 39 | additional parameters for the sampling method 40 | 41 | Notes 42 | ----- 43 | - The original graph ``g`` (passed as parameter) is not modified. 44 | The sampling method creates a copy of it, and stores it in ``self.g``. 45 | - This class does not create a subgraph. 46 | 47 | """ 48 | 49 | def __init__(self, g: netin.Graph, pseeds: float, max_tries: int = const.MAX_TRIES, 50 | random_seed: object = None, **kwargs): 51 | self.g = g.copy() 52 | self.pseeds = pseeds 53 | self.max_tries = max_tries 54 | self.random_seed = random_seed 55 | self.nseeds = int(pseeds * self.g.number_of_nodes()) 56 | self.sample = None 57 | self.nodes = None 58 | self.train_index = None 59 | self.test_index = None 60 | self.membership_y = None 61 | self.feature_x = None 62 | self.test_nodes = None 63 | self.kwargs = kwargs 64 | np.random.seed(self.random_seed) 65 | 66 | def sampling(self): 67 | """ 68 | Creates a new instance of the respective sampling method, and calls its respective extract_subgraph method. 69 | """ 70 | val.validate_float(self.pseeds, 0, 1) 71 | val.validate_more_than_one(net.get_node_attributes(self.g)) 72 | self.sample = nx.DiGraph() if self.g.is_directed() else nx.Graph() 73 | self._extract_subgraph() 74 | self._set_graph_metadata() 75 | 76 | @property 77 | def method_name(self) -> str: 78 | """ 79 | Name of sampling method. 80 | """ 81 | return '' 82 | 83 | def _count_classes(self, nodes: List) -> int: 84 | """ 85 | Counts the number of classes in a given set of nodes 86 | 87 | Parameters 88 | ---------- 89 | nodes: list 90 | list of nodes 91 | 92 | Returns 93 | ------- 94 | int 95 | number of classes 96 | """ 97 | return len(set([self.g.nodes[n][self.g.graph['class_attribute']] for n in nodes])) 98 | 99 | def _extract_subgraph(self): 100 | """ 101 | Creates a subgraph from G based on the sampling technique 102 | """ 103 | max_tries = self.max_tries or const.MAX_TRIES 104 | num_edges = 0 105 | tries = -1 106 | 107 | while num_edges < const.MIN_EDGES: 108 | 109 | tries += 1 110 | if tries >= max_tries: 111 | raise RuntimeWarning(f"The sample has not enough edges ({num_edges}), and max_tries has exceeded. " 112 | "Try increasing the number of tries or increasing the number of seeds.") 113 | return 114 | 115 | nodes, edges = self._sample() 116 | 117 | ### 2. recreate induced subgraph 118 | sample = self.g.copy() 119 | 120 | # removing edges 121 | if edges: 122 | edges_to_remove = set(sample.edges()) - edges 123 | sample.remove_edges_from(edges_to_remove) 124 | 125 | # removing nodes 126 | if nodes: 127 | nodes_to_remove = [n for n in self.g.node_list if n not in nodes] 128 | sample.remove_nodes_from(nodes_to_remove) 129 | 130 | num_edges = sample.number_of_edges() 131 | 132 | if num_edges < const.MIN_EDGES: 133 | raise RuntimeWarning("The sample has no edges.") 134 | 135 | self.sample = sample.copy() 136 | self.sample.node_list = [n for n in self.sample.node_list if n in self.sample.node_list] 137 | self.sample.node_class_values = {n: l for n, l in self.sample.node_class_values.items() if n in self.sample.node_list} 138 | gc.collect() 139 | 140 | def _set_graph_metadata(self): 141 | """ 142 | Updates the training sample subgraph metadata 143 | """ 144 | self.sample.graph['method'] = self.method_name 145 | self.sample.graph['pseeds'] = self.pseeds 146 | nx.set_node_attributes(G=self.g, name='seed', values={n: int(n in self.sample) for n in self.g.node_list}) 147 | self.sample.graph['m'] = net.get_min_degree(self.sample) 148 | self.sample.graph['d'] = nx.density(self.sample) 149 | self.sample.graph['n'] = self.sample.number_of_nodes() 150 | self.sample.graph['f_m'] = net.get_minority_fraction(self.sample) 151 | self.sample.graph['similarity'] = net.get_similitude(self.sample) 152 | self.sample.graph['e'] = self.sample.number_of_edges() 153 | k, km, kM = net.get_average_degrees(self.sample) 154 | self.sample.graph['k'] = k 155 | self.sample.graph['km'] = km 156 | self.sample.graph['kM'] = kM 157 | self.sample.graph['random_seed'] = self.random_seed 158 | self.sample.graph['original_graph'] = self.sample.graph['model'] 159 | del (self.sample.graph['model']) 160 | self.sample.model_name = f"{self.sample.model_name}\n{self.method_name}" 161 | 162 | # for LINK: working with matrices 163 | self.nodes = list(self.g.node_list) 164 | self.train_index = np.array([i for i, n in enumerate(self.nodes) if n in self.sample]) 165 | self.test_nodes, self.test_index = zip(*[(n, i) for i, n in enumerate(self.nodes) if n not in self.sample]) 166 | self.test_index = np.array(self.test_index) 167 | self.feature_x = nx.adjacency_matrix(self.g, self.nodes).toarray() 168 | self.membership_y = np.array( 169 | [self.g.graph['class_values'].index(self.g.nodes[n][self.g.graph['class_attribute']]) for n in self.nodes]) 170 | 171 | def info(self): 172 | """ 173 | Prints a summary of the training sample subgraph, including its attributes. 174 | 175 | """ 176 | print(nx.info(self.sample)) 177 | print(self.sample.graph) 178 | -------------------------------------------------------------------------------- /netin/generators/tc.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import List, Any, Tuple, Dict 3 | 4 | import numpy as np 5 | 6 | from netin.utils import constants as const 7 | from netin.utils import validator as val 8 | from .graph import Graph 9 | 10 | class TriadicClosure(Graph): 11 | """Class to model triadic closure as a mechanism of edge formation given a source and a target node. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | f_m: float 19 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 20 | 21 | tc: float 22 | triadic closure probability (minimum=0, maximum=1) 23 | 24 | seed: object 25 | seed for random number generator 26 | 27 | Notes 28 | ----- 29 | This class does not generate a graph. 30 | """ 31 | 32 | ############################################################ 33 | # Constructor 34 | ############################################################ 35 | 36 | def __init__(self, n: int, f_m: float, tc: float, seed: object = None): 37 | Graph.__init__(self, n=n, f_m=f_m, seed=seed) 38 | self.tc = tc 39 | self.model_name = const.TC_MODEL_NAME 40 | self._tc_candidates = defaultdict(int) 41 | self._node_source_curr = -1 42 | 43 | ############################################################ 44 | # Init 45 | ############################################################ 46 | 47 | def validate_parameters(self): 48 | """ 49 | Validates the parameters of the undirected. 50 | """ 51 | Graph.validate_parameters(self) 52 | val.validate_float(self.tc, minimum=0., maximum=1.) 53 | 54 | def get_metadata_as_dict(self) -> Dict[str, Any]: 55 | """ 56 | Returns the metadata (parameters) of the model as a dictionary. 57 | 58 | Returns 59 | ------- 60 | dict 61 | metadata of the model 62 | """ 63 | obj = Graph.get_metadata_as_dict(self) 64 | obj.update({ 65 | 'tc': self.tc 66 | }) 67 | return obj 68 | 69 | ############################################################ 70 | # Getters & Setters 71 | ############################################################ 72 | 73 | def set_triadic_closure(self, tc: float): 74 | """ 75 | Sets the triadic closure probability `tc`. 76 | 77 | Parameters 78 | ---------- 79 | tc: float 80 | triadic closure probability (minimum=0, maximum=1) 81 | """ 82 | assert 0. <= tc <= 1.,\ 83 | f"Triadic closure probability should be between 0. and 1. but is {tc}" 84 | self.tc = tc 85 | 86 | def get_triadic_closure(self) -> float: 87 | """ 88 | Returns the triadic closure probability `tc`. 89 | 90 | Returns 91 | ------- 92 | tc: float 93 | triadic closure probability (minimum=0, maximum=1) 94 | """ 95 | return self.tc 96 | 97 | ############################################################ 98 | # Generation 99 | ############################################################ 100 | 101 | def initialize(self, 102 | class_attribute: str = 'm', 103 | class_values: List[Any] = None, 104 | class_labels: List[str] = None): 105 | """ 106 | Initializes the model. 107 | 108 | Parameters 109 | ---------- 110 | class_attribute: str 111 | name of the attribute that represents the class 112 | 113 | class_values: list 114 | values of the class attribute 115 | 116 | class_labels: list 117 | labels of the class attribute mapping the class_values. 118 | """ 119 | Graph.initialize(self, class_attribute, class_values, class_labels) 120 | 121 | def init_special_targets(self, source: int) -> object: 122 | """ 123 | Returns an empty dictionary (source node ids) 124 | 125 | Parameters 126 | ---------- 127 | source : int 128 | Newly added node 129 | 130 | Returns 131 | ------- 132 | object 133 | Return an empty dictionary (source node ids) 134 | """ 135 | self._node_source_curr = source 136 | self._tc_candidates = defaultdict(int) 137 | 138 | def get_target_probabilities(self, source: int, 139 | available_nodes: List[int]) -> Tuple[np.array, List[int]]: 140 | """Returns the probabilities of selecting a target node from a set of nodes based on triadic closure, or a regular mechanism, 141 | 142 | Parameters 143 | ---------- 144 | source : int 145 | source node 146 | available_nodes : List[int] 147 | list of available target nodes 148 | 149 | Returns 150 | ------- 151 | Tuple[np.array, List[int]] 152 | Tuple of two equally sizes lists. 153 | The first list contains the probabilities and the second list the available nodes. 154 | """ 155 | # Triadic closure is not uniform (biased towards common neighbors) 156 | available_nodes, probs = zip(*list(self._tc_candidates.items())) 157 | probs = np.array(probs).astype(np.float32) 158 | probs /= probs.sum() 159 | return probs, available_nodes 160 | 161 | def on_edge_added(self, source: int, target: int): 162 | """ 163 | Updates the set of special available_nodes based on the triadic closure mechanism. 164 | When an edge is created, multiple potential triadic closures emerge (i.e., two-hop neighbors that are not yet 165 | directly connected). These are added to the set of special available_nodes. 166 | 167 | Parameters 168 | ---------- 169 | idx_target: int 170 | index of the target node 171 | 172 | source: int 173 | source node 174 | 175 | target: int 176 | target node 177 | 178 | available_nodes: List[int] 179 | list of target nodes 180 | 181 | special_targets: Union[None, Dict[int, int]] 182 | special available_nodes 183 | 184 | Returns 185 | ------- 186 | Union[None, Dict[int, int] 187 | updated special available_nodes 188 | """ 189 | if target in self._tc_candidates: 190 | del self._tc_candidates[target] 191 | for neighbor in self.neighbors(target): 192 | # G[source] gives direct access (O(1)) to source's neighbors 193 | # G.neighbors(source) returns an iterator which would 194 | # need to be searched iteratively 195 | if neighbor not in self[source]: 196 | self._tc_candidates[neighbor] += 1 197 | return super().on_edge_added(source, target) 198 | 199 | ############################################################ 200 | # Calculations 201 | ############################################################ 202 | 203 | def info_params(self): 204 | """ 205 | Shows the parameters of the model. 206 | """ 207 | print('tc: {}'.format(self.tc)) 208 | 209 | def info_computed(self): 210 | """ 211 | Shows the computed properties of the graph. 212 | """ 213 | inferred_tc = self.infer_triadic_closure() 214 | print("- Empirical triadic closure: {}".format(inferred_tc)) 215 | 216 | def infer_triadic_closure(self) -> float: 217 | """ 218 | Infers analytically the triadic closure value of the graph. 219 | 220 | Returns 221 | ------- 222 | float 223 | triadic closure probability of the graph 224 | """ 225 | # @TODO: To be implemented 226 | raise NotImplementedError("Inferring triadic closure not implemented yet.") 227 | -------------------------------------------------------------------------------- /netin/generators/pah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sympy import Eq 3 | from sympy import solve 4 | from sympy import symbols 5 | 6 | from netin.generators.h import Homophily 7 | from netin.utils import constants as const 8 | from .pa import PA 9 | 10 | 11 | class PAH(PA, Homophily): 12 | """Creates a new PAH instance. An undirected graph with preferential attachment and homophily. 13 | 14 | Parameters 15 | ---------- 16 | n: int 17 | number of nodes (minimum=2) 18 | 19 | k: int 20 | minimum degree of nodes (minimum=1) 21 | 22 | f_m: float 23 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 24 | 25 | h_MM: float 26 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 27 | 28 | h_mm: float 29 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 30 | 31 | seed: object 32 | seed for random number generator 33 | 34 | Notes 35 | ----- 36 | The initialization is an undirected graph with n nodes, where f_m are the minority. 37 | Then, everytime a node is selected as source, it gets connected to k target nodes. 38 | Target nodes are selected via preferential attachment (in-degree) and homophily (h_**). 39 | This model is based on [Karimi2018]_ known as the "Barabasi model with homophily" or "BA Homophily". 40 | """ 41 | 42 | ############################################################ 43 | # Constructor 44 | ############################################################ 45 | 46 | def __init__(self, n: int, k: int, f_m: float, h_MM: float, h_mm: float, seed: object = None): 47 | PA.__init__(self, n=n, k=k, f_m=f_m, seed=seed) 48 | Homophily.__init__(self, n=n, f_m=f_m, h_MM=h_MM, h_mm=h_mm, seed=seed) 49 | self.model_name = const.PAH_MODEL_NAME 50 | 51 | ############################################################ 52 | # Init 53 | ############################################################ 54 | 55 | def validate_parameters(self): 56 | """ 57 | Validates the parameters of the undirected. 58 | """ 59 | PA.validate_parameters(self) 60 | Homophily.validate_parameters(self) 61 | 62 | def get_metadata_as_dict(self) -> dict: 63 | """ 64 | Returns the metadata (parameters) of the model as a dictionary. 65 | 66 | Returns 67 | ------- 68 | dict 69 | metadata of the model 70 | """ 71 | obj = PA.get_metadata_as_dict(self) 72 | obj.update(Homophily.get_metadata_as_dict(self)) 73 | return obj 74 | 75 | ############################################################ 76 | # Generation 77 | ############################################################ 78 | 79 | def initialize(self, class_attribute: str = 'm', class_values: list = None, class_labels: list = None): 80 | """ 81 | Initializes the model. 82 | 83 | Parameters 84 | ---------- 85 | class_attribute: str 86 | name of the attribute that represents the class 87 | 88 | class_values: list 89 | values of the class attribute 90 | 91 | class_labels: list 92 | labels of the class attribute mapping the class_values. 93 | """ 94 | PA.initialize(self, class_attribute, class_values, class_labels) 95 | Homophily.initialize(self, class_attribute, class_values, class_labels) 96 | 97 | def get_target_probabilities(self, source: int, available_nodes: list[int]) -> tuple[np.array, list[int]]: 98 | """ 99 | Returns the probabilities of selecting a target node from a set of nodes based on the preferential attachment 100 | and homophily. 101 | 102 | Parameters 103 | ---------- 104 | source: int 105 | source node 106 | 107 | available_nodes: set[int] 108 | set of target nodes 109 | 110 | special_targets: object 111 | special available_nodes 112 | 113 | Returns 114 | ------- 115 | tuple[np.array, set[int]] 116 | probabilities of selecting a target node from a set of nodes, and the set of target nodes 117 | """ 118 | probs = np.array([self.get_homophily_between_source_and_target(source, target) * 119 | (self.degree(target) + const.EPSILON) for target in available_nodes]) 120 | probs /= probs.sum() 121 | return probs, available_nodes 122 | 123 | ############################################################ 124 | # Calculations 125 | ############################################################ 126 | 127 | def info_params(self): 128 | """ 129 | Shows the parameters of the model. 130 | """ 131 | PA.info_params(self) 132 | Homophily.info_params(self) 133 | 134 | def info_computed(self): 135 | """ 136 | Shows the computed properties of the graph. 137 | """ 138 | PA.info_computed(self) 139 | Homophily.info_computed(self) 140 | 141 | def infer_homophily_values(self) -> tuple[float, float]: 142 | """ 143 | Infers the level of homophily using the analytical solution of the model. 144 | 145 | Returns 146 | ------- 147 | tuple[float, float] 148 | homophily between majority nodes, and homophily between minority nodes 149 | 150 | Notes 151 | ----- 152 | See derivations in [Karimi2018]_. 153 | """ 154 | 155 | h_MM, h_mm = infer_homophily(self) 156 | 157 | return h_MM, h_mm 158 | 159 | def makecopy(self): 160 | """ 161 | Makes a copy of the current object. 162 | """ 163 | return self.__class__(n=self.n, 164 | k=self.k, 165 | f_m=self.f_m, 166 | h_MM=self.h_MM, 167 | h_mm=self.h_mm, 168 | seed=self.seed) 169 | 170 | @staticmethod 171 | def fit(g, n=None, k=None, seed=None): 172 | """ 173 | It fits the PAH model to the given graph. 174 | 175 | Parameters 176 | ---------- 177 | g: netin.UnDiGraph 178 | graph to fit the model to 179 | 180 | n: int 181 | number of nodes to override (e.g., to generate a smaller network) 182 | 183 | k: int 184 | minimum node degree to override (e.g., to generate a denser network ``k>1``) 185 | 186 | seed: object 187 | seed for random number generator 188 | 189 | Returns 190 | ------- 191 | netin.PAH 192 | fitted model 193 | """ 194 | n = n or g.number_of_nodes() 195 | k = k or g.calculate_minimum_degree() 196 | f_m = g.calculate_fraction_of_minority() 197 | h_MM, h_mm = infer_homophily(g) 198 | 199 | new_g = PAH(n=n, 200 | k=k, 201 | f_m=f_m, 202 | h_MM=float(h_MM), 203 | h_mm=float(h_mm), 204 | seed=seed) 205 | new_g.generate() 206 | 207 | return new_g 208 | 209 | 210 | def infer_homophily(g) -> tuple[float, float]: 211 | f_m = g.calculate_fraction_of_minority() 212 | f_M = 1 - f_m 213 | 214 | e = g.calculate_edge_type_counts() 215 | e_MM = e['MM'] 216 | e_mm = e['mm'] 217 | M = e['MM'] + e['mm'] + e['Mm'] + e['mM'] 218 | 219 | p_MM = e_MM / M 220 | p_mm = e_mm / M 221 | 222 | pl_M, pl_m = g.calculate_degree_powerlaw_exponents() 223 | b_M = -1 / (pl_M + 1) 224 | b_m = -1 / (pl_m + 1) 225 | 226 | # equations 227 | hmm, hMM, hmM, hMm = symbols('hmm hMM hmM hMm') 228 | eq1 = Eq((f_m * f_m * hmm * (1 - b_M)) / ((f_m * hmm * (1 - b_M)) + (f_M * hmM * (1 - b_m))), p_mm) 229 | eq2 = Eq(hmm + hmM, 1) 230 | 231 | eq3 = Eq((f_M * f_M * hMM * (1 - b_m)) / ((f_M * hMM * (1 - b_m)) + (f_m * hMm * (1 - b_M))), p_MM) 232 | eq4 = Eq(hMM + hMm, 1) 233 | 234 | solution = solve((eq1, eq2, eq3, eq4), (hmm, hmM, hMM, hMm)) 235 | h_MM, h_mm = solution[hMM], solution[hmm] 236 | return h_MM, h_mm 237 | -------------------------------------------------------------------------------- /netin/generators/h.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | from netin.utils import constants as const 6 | from netin.utils import validator as val 7 | from .graph import Graph 8 | 9 | 10 | class Homophily(Graph): 11 | """Class to model homophily as a mechanism of edge formation given a source and a target node. 12 | 13 | Parameters 14 | ---------- 15 | n: int 16 | number of nodes (minimum=2) 17 | 18 | f_m: float 19 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 20 | 21 | h_MM: float 22 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 23 | 24 | h_mm: float 25 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 26 | 27 | Notes 28 | ----- 29 | This class does not generate a graph. 30 | """ 31 | 32 | ############################################################ 33 | # Constructor 34 | ############################################################ 35 | 36 | def __init__(self, n: int, f_m: float, h_MM: float, h_mm: float, seed: object = None): 37 | Graph.__init__(self, n=n, f_m=f_m, seed=seed) 38 | self.h_MM = h_MM 39 | self.h_mm = h_mm 40 | self.mixing_matrix = None 41 | self.model_name = const.H_MODEL_NAME 42 | 43 | ############################################################ 44 | # Init 45 | ############################################################ 46 | 47 | def validate_parameters(self): 48 | """ 49 | Validates the parameters of the graph. 50 | """ 51 | Graph.validate_parameters(self) 52 | val.validate_float(self.h_MM, minimum=0., maximum=1.) 53 | val.validate_float(self.h_mm, minimum=0., maximum=1.) 54 | 55 | def get_metadata_as_dict(self) -> dict: 56 | """ 57 | Returns the metadata info (input parameters of the model) of the graph as a dictionary. 58 | 59 | Returns 60 | ------- 61 | obj dict 62 | dictionary with the metadata info of the graph. 63 | """ 64 | obj = Graph.get_metadata_as_dict(self) 65 | obj.update({ 66 | 'h_MM': self.h_MM, 67 | 'h_mm': self.h_mm, 68 | }) 69 | return obj 70 | 71 | ############################################################ 72 | # Getters & Setters 73 | ############################################################ 74 | 75 | def set_homophily_majority(self, h_MM: float): 76 | """ 77 | Sets the homophily value between majority nodes. 78 | 79 | Parameters 80 | ---------- 81 | h_MM: float 82 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 83 | """ 84 | self.h_MM = h_MM 85 | 86 | def get_homophily_majority(self) -> float: 87 | """ 88 | Returns the homophily value between majority nodes. 89 | 90 | Returns 91 | ------- 92 | h_MM: float 93 | homophily (similarity) between majority nodes (minimum=0, maximum=1.) 94 | """ 95 | return self.h_MM 96 | 97 | def set_homophily_minority(self, h_mm: float): 98 | """ 99 | Sets the homophily value between minority nodes. 100 | 101 | Parameters 102 | ---------- 103 | h_mm: float 104 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 105 | """ 106 | self.h_mm = h_mm 107 | 108 | def get_homophily_minority(self) -> float: 109 | """ 110 | Returns the homophily value between minority nodes. 111 | 112 | Returns 113 | ------- 114 | h_mm: float 115 | homophily (similarity) between minority nodes (minimum=0, maximum=1.) 116 | """ 117 | return self.h_mm 118 | 119 | def get_homophily_between_source_and_target(self, source: int, target: int) -> float: 120 | """ 121 | Returns the homophily value between a source and a target node based on their class values. 122 | This homophily value is inferred from the mixing matrix. 123 | 124 | Parameters 125 | ---------- 126 | source: int 127 | Source node id 128 | 129 | target: int 130 | Target node id 131 | 132 | Returns 133 | ------- 134 | h: float 135 | homophily (similarity) between source and target nodes (minimum=0, maximum=1.) 136 | """ 137 | return self.mixing_matrix[self.node_class_values[source], self.node_class_values[target]] 138 | 139 | ############################################################ 140 | # Generation 141 | ############################################################ 142 | 143 | def initialize(self, class_attribute: str = 'm', class_values: list = None, class_labels: list = None): 144 | """ 145 | Initializes the model. 146 | 147 | Parameters 148 | ---------- 149 | class_attribute: str 150 | name of the attribute that represents the class 151 | 152 | class_values: list 153 | values of the class attribute 154 | 155 | class_labels: list 156 | labels of the class attribute mapping the class_values. 157 | """ 158 | Graph.initialize(self, class_attribute, class_values, class_labels) 159 | self.h_MM = val.calibrate_null_probabilities(self.h_MM) 160 | self.h_mm = val.calibrate_null_probabilities(self.h_mm) 161 | self.mixing_matrix = np.array([[self.h_MM, 1 - self.h_MM], [1 - self.h_mm, self.h_mm]]) 162 | 163 | def get_target_probabilities(self, source: int, available_nodes: Union[list[int], np.array]) -> tuple[np.array, list[int]]: 164 | """ 165 | Returns the probabilities of selecting a target node from a set of nodes based on homophily. 166 | Homophily is inferred from the mixing matrix. 167 | 168 | Parameters 169 | ---------- 170 | source: int 171 | source node 172 | 173 | available_nodes: set[int] 174 | set of target nodes 175 | 176 | special_targets: object 177 | special available_nodes 178 | 179 | Returns 180 | ------- 181 | tuple[np.array, set[int]] 182 | probabilities of selecting a target node from a set of nodes, and the set of target nodes` 183 | """ 184 | probs = np.array([self.get_homophily_between_source_and_target(source, target) for target in available_nodes]) 185 | probs /= probs.sum() 186 | return probs, available_nodes 187 | 188 | def get_target(self, source: int, available_nodes: list[int], 189 | special_targets: Union[None, object, iter]) -> int: 190 | """ 191 | Picks a random target node based on the homophily dynamic. 192 | 193 | Parameters 194 | ---------- 195 | source: int 196 | Newly added node 197 | 198 | available_nodes: Set[int] 199 | Potential target nodes in the graph 200 | 201 | special_targets: object 202 | Special target nodes in the graph 203 | 204 | Returns 205 | ------- 206 | int: Target node that an edge should be added to from `source` 207 | """ 208 | # Collect probabilities to connect to each node in available_nodes 209 | available_nodes = self.get_potential_nodes_to_connect(source, available_nodes) 210 | probs, target_list = self.get_target_probabilities(source, available_nodes) 211 | return np.random.choice(a=target_list, size=1, replace=False, p=probs)[0] 212 | 213 | ############################################################ 214 | # Calculations 215 | ############################################################ 216 | 217 | def info_params(self): 218 | """ 219 | Shows the parameters of the model. 220 | """ 221 | print('h_MM: {}'.format(self.h_MM)) 222 | print('h_mm: {}'.format(self.h_mm)) 223 | print('mixing matrix: \n{}'.format(self.mixing_matrix)) 224 | 225 | def info_computed(self): 226 | """ 227 | Shows the computed properties of the graph. 228 | """ 229 | inferred_h_MM, inferred_h_mm = self.infer_homophily_values() 230 | print("- Empirical homophily within majority: {}".format(inferred_h_MM)) 231 | print("- Empirical homophily within minority: {}".format(inferred_h_mm)) 232 | 233 | def infer_homophily_values(self) -> tuple[float, float]: 234 | """ 235 | Infers analytically the homophily values for the majority and minority classes. 236 | 237 | Returns 238 | ------- 239 | h_MM: float 240 | homophily within majority group 241 | 242 | h_mm: float 243 | homophily within minority group 244 | """ 245 | 246 | e = self.calculate_edge_type_counts() 247 | if self.is_directed(): 248 | h_MM = e['MM'] / (e['MM'] + e['Mm']) 249 | h_mm = e['mm'] / (e['mm'] + e['mM']) 250 | else: 251 | h_MM = e['MM'] / (e['MM'] + e['Mm'] + e['mM']) 252 | h_mm = e['mm'] / (e['mm'] + e['mM'] + e['Mm']) 253 | 254 | return h_MM, h_mm 255 | -------------------------------------------------------------------------------- /netin/generators/undirected.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import powerlaw 5 | 6 | from netin.utils import constants as const 7 | from netin.utils import validator as val 8 | from .graph import Graph 9 | 10 | 11 | class UnDiGraph(Graph): 12 | """Undirected graph base model. 13 | 14 | Parameters 15 | ---------- 16 | n: int 17 | number of nodes (minimum=2) 18 | 19 | k: int 20 | minimum degree of nodes (minimum=1) 21 | 22 | f_m: float 23 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 24 | 25 | seed: object 26 | seed for random number generator 27 | 28 | Notes 29 | ----- 30 | The initialization is an undirected graph with n nodes and no edges. 31 | Then, everytime a node is selected as source, it gets connected to k target nodes. 32 | Target nodes are selected depending on the chosen mechanism of edge formation: 33 | 34 | - PA: Preferential attachment (in-degree), see :class:`netin.PA` [BarabasiAlbert1999]_ 35 | - PAH: Preferential attachment (in-degree) with homophily, see :class:`netin.PAH` [Karimi2018]_ 36 | - PATC: Preferential attachment (in-degree) with triadic closure, see :class:`netin.PATC` [HolmeKim2002]_ 37 | - PATCH: Preferential attachment (in-degree) with homophily and triadic closure, see :class:`netin.PATCH` 38 | 39 | References 40 | ---------- 41 | .. [BarabasiAlbert1999] A. L. Barabasi and R. Albert "Emergence of scaling in random networks", Science 286, pp 509-512, 1999. 42 | .. [Karimi2018] F. Karimi, M. Génois, C. Wagner, P. Singer, & M. Strohmaier, M "Homophily influences ranking of minorities in social networks", Scientific reports 8(1), 11077, 2018. 43 | .. [HolmeKim2002] P. Holme and B. J. Kim “Growing scale-free networks with tunable clustering” Phys. Rev. E 2002. 44 | """ 45 | 46 | ############################################################ 47 | # Constructor 48 | ############################################################ 49 | 50 | def __init__(self, n: int, k: int, f_m: float, seed: object = None): 51 | Graph.__init__(self, n=n, f_m=f_m, seed=seed) 52 | self.k = k # minimum degree of nodes 53 | self.model_name = const.UNDIRECTED_MODEL_NAME 54 | 55 | ############################################################ 56 | # Init 57 | ############################################################ 58 | 59 | def validate_parameters(self): 60 | """ 61 | Validates the parameters of the undirected. 62 | """ 63 | super().validate_parameters() 64 | val.validate_int(self.k, minimum=1) 65 | 66 | def get_metadata_as_dict(self) -> dict: 67 | """ 68 | Returns metadata for a undirected. 69 | """ 70 | obj = super().get_metadata_as_dict() 71 | obj.update({ 72 | 'k': self.k, 73 | }) 74 | return obj 75 | 76 | ############################################################ 77 | # Getters & setters 78 | ############################################################ 79 | 80 | @property 81 | def k(self): 82 | return self._k 83 | 84 | @k.setter 85 | def k(self, k: int): 86 | self._k = k 87 | 88 | ############################################################ 89 | # Generation 90 | ############################################################ 91 | 92 | def get_target(self, 93 | source: int, 94 | available_nodes: list[int]) -> int: 95 | """ 96 | Picks a random target node based on preferential attachment. 97 | 98 | Parameters 99 | ---------- 100 | 101 | source: int 102 | Newly added node 103 | 104 | available_nodes: List[int] 105 | Potential (available) target nodes to connect to 106 | 107 | Returns 108 | ------- 109 | int 110 | Target node that an edge should be added to 111 | """ 112 | # Collect probabilities to connect to each node in available_nodes 113 | available_nodes = self.get_potential_nodes_to_connect(source, available_nodes) 114 | probs, targets = self.get_target_probabilities(source, available_nodes) 115 | return np.random.choice(a=targets, size=1, replace=False, p=probs)[0] 116 | 117 | def _link_init_nodes(self): 118 | for source in self.node_list[:self.k]: 119 | for target in self.node_list[:self.k]: 120 | if (source != target) and (target not in self[source]): 121 | self.add_edge(source, target) 122 | self.on_edge_added(source, target) 123 | 124 | def generate(self): 125 | """ 126 | An undirected graph of n nodes is grown by attaching new nodes each with k edges. 127 | Each edge is either drawn by preferential attachment, homophily, and/or triadic closure. 128 | 129 | For triadic closure, a candidate is chosen uniformly at random from all triad-closing edges (of the new node). 130 | Otherwise, or if there are no triads to close, edges are connected via preferential attachment and/or homophily. 131 | 132 | Homophily varies ranges from 0 (heterophilic) to 1 (homophilic), where 0.5 is neutral. 133 | Similarly, triadic closure varies from 0 (no triadic closure) to 1 (full triadic closure). 134 | 135 | - PA: An undirected graph with h_mm = h_MM in [0.5, None] and tc = 0 is a BA preferential attachment model. 136 | - PAH: An undirected graph with h_mm not in [0.5, None] and h_MM not in [0.5, None] and tc = 0 is a PA model with homophily. 137 | - PATC: An undirected graph with h_mm = h_MM in [0.5, None] and tc > 0 is a PA model with triadic closure. 138 | - PATCH: An undirected graph with h_mm not in [0.5, None] and h_MM not in [0.5, None] and tc > 0 is a PA model with homophily and triadic closure. 139 | 140 | """ 141 | # 1. Init an undirected graph and nodes (assign class labels) 142 | super().generate() 143 | self._link_init_nodes() 144 | 145 | # 2. Iterate until n nodes are added (starts with k pre-existing, unconnected nodes) 146 | for source in self.node_list[self.k:]: 147 | available_nodes = np.arange(source).tolist() # available_nodes via preferential attachment 148 | 149 | for _ in range(self.k): 150 | # Choose next target 151 | target = self.get_target(source, available_nodes) 152 | 153 | # Finally add edge to undirected 154 | self.add_edge(source, target) 155 | 156 | # Call event handlers if present 157 | self.on_edge_added(source, target) 158 | 159 | self.terminate() 160 | 161 | ############################################################ 162 | # Calculations 163 | ############################################################ 164 | 165 | def info_params(self): 166 | """ 167 | Shows the parameters of the model. 168 | """ 169 | print('k: {}'.format(self.k)) 170 | 171 | def info_computed(self): 172 | """ 173 | Shows the computed properties of the graph. 174 | """ 175 | fit_M, fit_m = self.fit_degree_powerlaw() 176 | print(f"- Powerlaw fit (degree):") 177 | print(f"- {self.get_majority_label()}: alpha={fit_M.power_law.alpha}, " 178 | f"sigma={fit_M.power_law.sigma}, " 179 | f"min={fit_M.power_law.xmin}, max={fit_M.power_law.xmax}") 180 | print(f"- {self.get_minority_label()}: alpha={fit_m.power_law.alpha}, " 181 | f"sigma={fit_m.power_law.sigma}, " 182 | f"min={fit_m.power_law.xmin}, max={fit_m.power_law.xmax}") 183 | 184 | def get_expected_number_of_edges(self) -> int: 185 | """ 186 | Computes and returns the expected number of edges based on minimum degree `k` and number of nodes `n` 187 | 188 | Returns 189 | ------- 190 | int 191 | Expected number of edges 192 | """ 193 | return (self.n * self.k) - (self.k ** self.k) 194 | 195 | def fit_degree_powerlaw(self) -> tuple[powerlaw.Fit, powerlaw.Fit]: 196 | """ 197 | Returns the powerlaw fit of the degree distribution to a powerlaw for the majority and minority class. 198 | 199 | Returns 200 | ------- 201 | fit_M : powerlaw.Fit 202 | Powerlaw fit for the majority class 203 | 204 | fit_m: powerlaw.Fit 205 | Powerlaw fit for the minority class 206 | """ 207 | fit_M, fit_m = self.fit_powerlaw(metric='degree') 208 | return fit_M, fit_m 209 | 210 | def calculate_degree_powerlaw_exponents(self) -> tuple[float, float]: 211 | """ 212 | Returns the powerlaw exponents for the majority and minority class. 213 | 214 | Returns 215 | ------- 216 | pl_M : float 217 | Powerlaw exponent for the majority class 218 | 219 | pl_m: float 220 | Powerlaw exponent for the minority class 221 | """ 222 | pl_M, pl_m = self.calculate_powerlaw_exponents(metric='degree') 223 | return pl_M, pl_m 224 | 225 | def makecopy(self): 226 | """ 227 | Makes a copy of the current object. 228 | """ 229 | return self.__class__(n=self.n, 230 | k=self.k, 231 | f_m=self.f_m, 232 | seed=self.seed) 233 | -------------------------------------------------------------------------------- /netin/generators/directed.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Union 3 | 4 | import networkx as nx 5 | import numpy as np 6 | import powerlaw 7 | 8 | from netin.utils import constants as const 9 | from netin.utils import validator as val 10 | from .graph import Graph 11 | 12 | 13 | class DiGraph(nx.DiGraph, Graph): 14 | """Directed graph base model. 15 | 16 | Parameters 17 | ---------- 18 | n: int 19 | number of nodes (minimum=2) 20 | 21 | d: float 22 | edge density (minimum=0, maximum=1) 23 | 24 | f_m: float 25 | fraction of minorities (minimum=1/n, maximum=(n-1)/n) 26 | 27 | plo_M: float 28 | activity (out-degree power law exponent) majority group (minimum=1) 29 | 30 | plo_m: float 31 | activity (out-degree power law exponent) minority group (minimum=1) 32 | 33 | seed: object 34 | seed for random number generator 35 | 36 | Notes 37 | ----- 38 | The initialization is a directed graph with n nodes and no edges. 39 | Source nodes are selected based on their activity given by plo_M (if majority) or plo_m (if minority). 40 | Target nodes are selected depending on the chosen mechanism of edge formation. 41 | 42 | - DPAH: preferential attachment (in-degree) and homophily (h**), see :class:`netin.DPAH` 43 | - DPA: preferential attachment (in-degree), see :class:`netin.DPA` 44 | - DH: homophily (h**), see :class:`netin.DH` 45 | 46 | References 47 | ---------- 48 | .. [Espin-Noboa2022] L. Espín-Noboa, C. Wagner, M. Strohmaier, & F. Karimi "Inequality and inequity in network-based ranking and recommendation algorithms" Scientific reports 12(1), 1-14, 2022. 49 | .. [Karimi2018] F. Karimi, M. Génois, C. Wagner, P. Singer, & M. Strohmaier, M "Homophily influences ranking of minorities in social networks", Scientific reports 8(1), 11077, 2018. 50 | .. [BarabasiAlbert1999] A. L. Barabasi and R. Albert "Emergence of scaling in random networks", Science 286, pp 509-512, 1999. 51 | """ 52 | 53 | ############################################################ 54 | # Constructor 55 | ############################################################ 56 | 57 | def __init__(self, n: int, f_m: float, d: float, plo_M: float, plo_m: float, seed: object = None): 58 | nx.DiGraph.__init__(self) 59 | Graph.__init__(self, n=n, f_m=f_m, seed=seed) 60 | self.d = d 61 | self.plo_M = plo_M 62 | self.plo_m = plo_m 63 | self.in_degrees = None 64 | self.out_degrees = None 65 | self.activity = None 66 | self.expected_number_of_edges = None 67 | self.model_name = const.DIRECTED_MODEL_NAME 68 | 69 | ############################################################ 70 | # Init 71 | ############################################################ 72 | 73 | def validate_parameters(self): 74 | """ 75 | Validates the parameters of the directed. 76 | """ 77 | Graph.validate_parameters(self) 78 | val.validate_float(self.d, minimum=1. / (self.n * (self.n - 1)), maximum=1.) 79 | val.validate_float(self.plo_M, minimum=1. + const.EPSILON) 80 | val.validate_float(self.plo_m, minimum=1. + const.EPSILON) 81 | 82 | def get_metadata_as_dict(self) -> dict: 83 | """ 84 | Returns metadata for a directed. 85 | """ 86 | obj = super().get_metadata_as_dict() 87 | obj.update({ 88 | 'd': self.d, 89 | 'plo_M': self.plo_M, 90 | 'plo_m': self.plo_m, 91 | }) 92 | return obj 93 | 94 | ############################################################ 95 | # Generation 96 | ############################################################ 97 | 98 | def initialize(self, class_attribute: str = 'm', class_values: list = None, class_labels: list = None): 99 | """ 100 | Initializes the model. 101 | 102 | Parameters 103 | ---------- 104 | class_attribute: str 105 | name of the attribute that represents the class 106 | 107 | class_values: list 108 | values of the class attribute 109 | 110 | class_labels: list 111 | labels of the class attribute mapping the class_values. 112 | """ 113 | Graph.initialize(self, class_attribute, class_values, class_labels) 114 | self.init_edges() 115 | self.init_activity() 116 | 117 | def init_edges(self): 118 | """ 119 | Initializes the expected number of edges based on the number of nodes and density of the graph (input param). 120 | It also initializes the in- and out-degrees of the nodes. 121 | """ 122 | self.expected_number_of_edges = int(round(self.d * self.n * (self.n - 1))) 123 | self.in_degrees = np.zeros(self.n) 124 | self.out_degrees = np.zeros(self.n) 125 | 126 | def init_activity(self): 127 | """ 128 | Initializes the level of activity for each node based on the power law exponents (input param). 129 | """ 130 | act_M = powerlaw.Power_Law(parameters=[self.plo_M], discrete=True).generate_random(self.n_M) 131 | act_m = powerlaw.Power_Law(parameters=[self.plo_m], discrete=True).generate_random(self.n_m) 132 | self.activity = np.append(act_M, act_m) 133 | if np.inf in self.activity: 134 | self.activity[self.activity == np.inf] = 0.0 135 | self.activity += 1 136 | self.activity /= self.activity.sum() 137 | 138 | def get_sources(self) -> np.array: 139 | """ 140 | Returns a random sample with replacement of nodes to be used as source nodes. 141 | The sample has the length of the expected number of edges, and the probability of each node to be selected is 142 | based on its activity. 143 | 144 | Returns 145 | ------- 146 | np.array 147 | array of source nodes 148 | """ 149 | return np.random.choice(a=np.arange(self.n), size=self.expected_number_of_edges, replace=True, p=self.activity) 150 | 151 | def get_target(self, source: int, edge_list: dict, **kwargs) -> Union[None, int]: 152 | """ 153 | Returns a target node for a given source node. 154 | 155 | Parameters 156 | ---------- 157 | source: int 158 | source node 159 | 160 | edge_list: dict 161 | dictionary of edges 162 | 163 | kwargs: dict 164 | additional parameters 165 | 166 | Returns 167 | ------- 168 | Union[None, int] 169 | target node 170 | 171 | Notes 172 | ----- 173 | The target node must have out_degree > 0 (the older the node in the network, the more likely to get more links) 174 | """ 175 | one_percent = self.n * 1 / 100. 176 | if np.count_nonzero(self.out_degrees) > one_percent: 177 | # if there are enough edges, then select only nodes with out_degree > 0 that are not already 178 | # connected to the source. 179 | # Having out_degree > 0 means they are nodes that have been in the network for at least one time step 180 | targets = [n for n in np.arange(self.n) if n not in edge_list[source] and self.out_degrees[n] > 0] 181 | else: 182 | # if there are no enough edges, then select all nodes that are not already connected to the source. 183 | targets = [n for n in np.arange(self.n) if n not in edge_list[source]] 184 | targets = np.delete(targets, np.where(targets == source)) 185 | 186 | if targets.shape[0] == 0: 187 | return None 188 | 189 | probs = self.get_target_probabilities(source, targets, **kwargs) 190 | return np.random.choice(a=targets, size=1, replace=False, p=probs)[0] 191 | 192 | def generate(self): 193 | """ 194 | A directed graph of n nodes is grown by attaching new nodes. 195 | Source nodes are selected randomly with replacement based on their activity. 196 | Each target node drawn based on the chosen mechanism of edge formation. 197 | 198 | - DPA: A graph with h_mm = h_MM in [0.5, None] is a directed BA preferential attachment model, see :class:`netin.DPA`. 199 | - DH: A graph with h_mm not in [0.5, None] and h_MM not in [0.5, None] is a directed Erdos-Renyi with homophily, see :class:`netin.DPH`. 200 | - DPAH: A graph with h_mm not in [0.5, None] and h_MM not in [0.5, None] is a DPA model with homophily, see :class:`netin.DPAH`. 201 | """ 202 | # 1. Init directed and nodes (assign class labels) 203 | Graph.generate(self) 204 | 205 | # 2. Iterate until reaching desired number of edges (edge density) 206 | tries = 0 207 | edge_list = defaultdict(list) 208 | while self.number_of_edges() < self.expected_number_of_edges: 209 | tries += 1 210 | for source in self.get_sources(): 211 | target = self.get_target(source, edge_list) 212 | 213 | if target is None: 214 | continue 215 | 216 | if not self.has_edge(source, target): 217 | self.add_edge(source, target) 218 | self.in_degrees[target] += 1 219 | self.out_degrees[source] += 1 220 | edge_list[source].append(target) 221 | 222 | if self.number_of_edges() >= self.expected_number_of_edges: 223 | break 224 | 225 | # if no more edges can be added, break 226 | if tries > const.MAX_TRIES_EDGE and self.number_of_edges() < self.expected_number_of_edges: 227 | print(f">> Edge density ({nx.density(self)}) might differ from {self.d:.5f} (n={self.n}, f_m={self.f_m}" 228 | f"seed={self.seed}, plo_M={self.plo_M}, plo_m={self.plo_m}") 229 | break 230 | 231 | self.terminate() 232 | 233 | ############################################################ 234 | # Calculations 235 | ############################################################ 236 | 237 | def info_params(self): 238 | """ 239 | Shows the (input) parameters of the graph. 240 | """ 241 | print(f'd: {self.d} (expected edges: {self.expected_number_of_edges})') 242 | print(f'plo_M: {self.plo_M}') 243 | print(f'plo_m: {self.plo_m}') 244 | 245 | def info_computed(self): 246 | """ 247 | Shows the computer properties of the graph. 248 | """ 249 | for metric in ['in_degree', 'out_degree']: 250 | fit_M, fit_m = self.fit_powerlaw(metric) 251 | print(f"- Powerlaw fit ({metric}):") 252 | print(f"- {self.get_majority_label()}: alpha={fit_M.power_law.alpha}, sigma={fit_M.power_law.sigma}, " 253 | f"min={fit_M.power_law.xmin}, max={fit_M.power_law.xmax}") 254 | print(f"- {self.get_minority_label()}: alpha={fit_m.power_law.alpha}, sigma={fit_m.power_law.sigma}, " 255 | f"min={fit_m.power_law.xmin}, max={fit_m.power_law.xmax}") 256 | 257 | def calculate_in_degree_powerlaw_exponents(self) -> tuple[float, float]: 258 | """ 259 | Returns the power law exponents for the in-degree distribution of the majority and minority class. 260 | 261 | Returns 262 | ------- 263 | Tuple[float, float] 264 | power law exponents for the in-degree distribution of the majority and minority class 265 | """ 266 | pl_M, pl_m = self.calculate_powerlaw_exponents(metric='in_degree') 267 | 268 | # fit_M, fit_m = self.fit_powerlaw(metric='in_degree') 269 | # pl_M = fit_M.power_law.alpha 270 | # pl_m = fit_m.power_law.alpha 271 | 272 | return pl_M, pl_m 273 | 274 | def calculate_out_degree_powerlaw_exponents(self) -> tuple[float, float]: 275 | """ 276 | Returns the power law exponents for the out-degree distribution of the majority and minority class. 277 | 278 | Returns 279 | ------- 280 | Tuple[float, float] 281 | power law exponents for the out-degree distribution of the majority and minority class 282 | """ 283 | pl_M, pl_m = self.calculate_powerlaw_exponents(metric='out_degree') 284 | 285 | # fit_M, fit_m = self.fit_powerlaw(metric='out_degree') 286 | # pl_M = fit_M.power_law.alpha 287 | # pl_m = fit_m.power_law.alpha 288 | 289 | return pl_M, pl_m 290 | 291 | ############################################################ 292 | # Getters and setters 293 | ############################################################ 294 | 295 | def get_expected_number_of_edges(self) -> int: 296 | """ 297 | Returns the expected number of edges based on number of nodes and edge density. 298 | 299 | Returns 300 | ------- 301 | int 302 | expected number of edges 303 | """ 304 | return self.expected_number_of_edges 305 | 306 | def get_expected_density(self) -> float: 307 | """ 308 | Returns the expected edge density (d, the input parameter). 309 | 310 | Returns 311 | ------- 312 | float 313 | expected edge density 314 | """ 315 | return self.d 316 | 317 | def get_expected_powerlaw_out_degree_majority(self) -> float: 318 | """ 319 | Returns the expected power law exponent for the out-degree distribution of the majority class 320 | (plo_M, the input parameter). 321 | 322 | Returns 323 | ------- 324 | float 325 | expected power law exponent for the out-degree distribution of the majority class 326 | """ 327 | return self.plo_M 328 | 329 | def get_expected_powerlaw_out_degree_minority(self): 330 | """ 331 | Returns the expected power law exponent for the out-degree distribution of the minority class 332 | (plo_m, the input parameter). 333 | 334 | Returns 335 | ------- 336 | float 337 | expected power law exponent for the out-degree distribution of the minority class 338 | """ 339 | return self.plo_m 340 | 341 | def get_activity_distribution(self) -> np.array: 342 | """ 343 | Returns the activity distribution of all the nodes in the graph. 344 | 345 | Returns 346 | ------- 347 | np.array 348 | activity distribution of all the nodes in the graph 349 | """ 350 | return self.activity 351 | 352 | def makecopy(self): 353 | """ 354 | Makes a copy of the current object. 355 | """ 356 | obj = self.__class__(n=self.n, 357 | d=self.d, 358 | f_m=self.f_m, 359 | plo_M=self.plo_M, 360 | plo_m=self.plo_m, 361 | seed=self.seed) 362 | return obj 363 | 364 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. --------------------------------------------------------------------------------