├── .gitignore ├── LICENSE ├── README.md ├── images └── ct.jpeg └── lessons ├── week01 ├── NetworkElementsI.ipynb ├── lesson01.pdf └── lesson02.pdf ├── week02 ├── lesson03.ipynb └── lesson03.pdf ├── week04 ├── Assortativity.ipynb ├── Assortativity.pdf └── NetworkX.ipynb ├── week06 ├── Clustering_Coefficient.ipynb ├── Connected_Components.ipynb ├── Walks_Paths_and_Distances.ipynb ├── Week_06.pdf ├── dijsktra.ipynb ├── dijsktra.pdf └── dijsktra_min_heap.ipynb ├── week08 ├── MinHeap.ipynb └── MinHeap.pdf └── week09 ├── Astar.ipynb └── Week09.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | .DS_Store 132 | 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ivanovitch Silva 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | # Federal University of Rio Grande do Norte 5 | ## Technology Center 6 | ### Department of Computer Engineering and Automation 7 | #### Algorithms and Data Structure II (DCA3702) 8 | 9 | This is the repository for the Algorithms and Data Structures II course, offered by the Department of Computer Engineering and Automation ([DCA](https://www.dca.ufrn.br)) of the Technology Center ([CT](https://www.ct.ufrn.br/)) at the Federal University of Rio Grande do Norte ([UFRN](https://www.ufrn.br)). 10 | 11 | The DCA aims to train professionals capable of designing and developing computer systems for industrial automation, embedded systems, software systems, distributed systems, computer networks, and information systems. The CT offers undergraduate and graduate courses in Engineering. This course is part of the curriculum of the Computer Engineering program at DCA/UFRN. 12 | 13 | 14 | #### References 15 | 16 | - :books: [Coscia, Michele. The Atlas for the Aspiring Network Scientist](https://www.networkatlas.eu/) 17 | - :books: [Newman, Mark. Networks](https://global.oup.com/academic/product/networks-9780198805090?cc=br&lang=en&) 18 | - :books: [Grimmer, Justin; Roberts, Margaret E.; Stewart, Brandon M. Text as Data: A new framework for Machine Learning and the Social Sciences](https://press.princeton.edu/books/paperback/9780691207551/text-as-data) 19 | - :books: [Knickerbocker, David. Network Science with Python](https://www.packtpub.com/product/network-science-with-python/9781801073691) 20 | 21 | 22 | | Tool | Link | 23 | |------|------| 24 | | :smiley: Networkx | [networkx.org](https://networkx.org/) | 25 | | :gear: Gephi | [gephi.org](https://gephi.org/) | 26 | | :rocket: OSMnx | [github.com/gboeing/osmnx](https://github.com/gboeing/osmnx) | 27 | | :floppy_disk: Dataset | [snap.stanford.edu/data](https://snap.stanford.edu/data/) | 28 | 29 | 30 | ### Lessons 31 | 32 | **Week 01** 33 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/tree/main/lessons/week01/lesson01.pdf) **Course Outline:** Provides an overview of the course structure and topics covered. 34 | - 🎉 GitHub Education Benefits 35 | - GitHub Education Pro: Get access to the GitHub Education Pro pack by visiting [GitHub Education](https://education.github.com/pack) 36 | - 📖 Learning Resources 37 | - GitHub Learning Game: Check out the interactive Git learning game at [GitHub Learning Game](https://learngitbranching.js.org/) 38 | - Basic Python: Enhance your Python skills through the [Kaggle Python course](https://www.kaggle.com/learn/python). 39 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week01/lesson02.pdf) **Network Fundamentals I**: Outline, applications, math and graph theory. 40 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/tree/main/lessons/week01/NetworkElementsI.ipynb) Network elements using networkx tool. 41 | - :books: Further reading: chapters 2, 3, 6 and 7 of the book [The Atlas For The Aspiring Network Scientist](https://www.networkatlas.eu/). 42 | 43 | **Week 02** 44 | 45 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week02/lesson03.pdf) **Network Fundamentals II**: Probability, extended graphs, matrices, degree and representation. 46 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/tree/main/lessons/week02/lesson03.ipynb) Extended graphs and representation using networkx tool. 47 | - :books: Further reading: chapters 7, 8, 9 of the book [The Atlas For The Aspiring Network Scientist](https://www.networkatlas.eu/). 48 | 49 | **Week 03** 50 | 51 | - Project: Authorship Temporal Network Analysis 52 | 53 | **Week 04** 54 | 55 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week04/Assortativity.pdf) **Small World**: This week’s content focuses on core concepts such as Small World Networks, Homophily, and Assortativity. You will analyze how these properties shape the topology and dynamics of real-world networks. 56 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week04/Assortativity.ipynb) **Hands on assortativity**: A hands-on notebook for computing and interpreting assortativity in real datasets. 57 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week04/NetworkX.ipynb) **The art of seeing networks**: Explore key NetworkX functionalities for analyzing and visualizing complex networks. 58 | - :books: Further reading: chapters 30 to 31 from [The Atlas For The Aspiring Network Scientist](https://www.networkatlas.eu/), including the exercises. 59 | 60 | **Week 05** 61 | - Project unit 01 62 | 63 | **Week 06** 64 | 65 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/Week_06.pdf) **Small World Cont.**: Paths, Distances, Connected Components, Clustering Coefficient, Social Distance and Six Degrees of Separation 66 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/Walks_Paths_and_Distances.ipynb) Paths, Walks and Distances 67 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/Connected_Components.ipynb) Connected Components 68 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/Clustering_Coefficient.ipynb) Clustering Coefficient 69 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/dijsktra.pdf) **Classical Algorithms: Dijsktra**: Shortest path algorithm 70 | - You will learn: a) Explain how the Dijkstra algorithm works. b) Understand the algorithm’s time complexity. 71 | - Ready to practice? 72 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/dijsktra.ipynb): **Dijsktra**: Implement the algorithm both with and without path reconstruction. 73 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week06/dijsktra_min_heap.ipynb): **Min-Heap**: an implementation of Dijkstra's algorithm using a min-heap, with and without path reconstruction. 74 | 75 | **Week 07** 76 | 77 | - U2T1 Evaluating Algorithms for the Shortest Path in Urban Graphs 78 | 79 | **Week 08** 80 | 81 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week08/MinHeap.pdf) **Heap Structures and Time Complexity**: This lesson explores Min-Heap data structures, focusing on their array representation and the behavior of the main operations such as insert, remove, and peek. 82 | - Topics covered include: 83 | - Properties of a Min-Heap and array-based implementation 84 | - Understanding parent and child index calculations 85 | - Explanation of `siftDown` and `siftUp` mechanisms 86 | - Efficient heap construction from unsorted arrays using `buildHeap` 87 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week08/MinHeap.ipynb) **MinHeap Implementation and Testing**: Interactive Jupyter Notebook that includes the full implementation of a Min-Heap class in Python, along with detailed unit tests to validate correctness and ensure that the min-heap property is preserved after each operation. 88 | 89 | **Week 09** 90 | 91 | - [![Open in PDF](https://img.shields.io/badge/-PDF-EC1C24?style=flat-square&logo=adobeacrobatreader)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week09/Week09.pdf) **A\* Algorithm**: Introduction to the A* algorithm, combining Dijkstra's search with heuristic-based guidance. Key concepts: 92 | - Combines **Dijkstra** (guarantees cheapest path) and **Greedy search** (guides exploration). 93 | - Heuristics: 94 | - **Euclidean**: for local planar maps. 95 | - **Great-circle**: for large geographical distances. 96 | - **Manhattan**: for grid-like networks. 97 | - A* process: 98 | 1. Start at source node. 99 | 2. For each neighbor, compute: 100 | - \( g \): actual cost. 101 | - \( h \): heuristic estimate. 102 | - \( f = g + h \). 103 | 3. Use priority queue (lowest \( f \) first). 104 | 4. Repeat until reaching the goal. 105 | - Visual flow: 106 | `Start → [g + h] → expand node → update queue → repeat → Goal` 107 | 108 | - [![Jupyter](https://img.shields.io/badge/-Notebook-191A1B?style=flat-square&logo=jupyter)](https://github.com/ivanovitchm/datastructure/blob/main/lessons/week09/Astar.ipynb) **A\* Implementation**: Jupyter Notebook with A* implementation using NetworkX and OSMnx, applying different heuristics to real urban graphs. 109 | -------------------------------------------------------------------------------- /images/ct.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/images/ct.jpeg -------------------------------------------------------------------------------- /lessons/week01/NetworkElementsI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.3" 21 | }, 22 | "colab": { 23 | "provenance": [], 24 | "toc_visible": true 25 | } 26 | }, 27 | "cells": [ 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "QG9npHtLmgm7" 32 | }, 33 | "source": [ 34 | "# Introduction" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "id": "N_Ot13Qv9Bhd" 41 | }, 42 | "source": [ 43 | "\n", 44 | "You can use NetworkX to construct and draw graphs that are undirected or directed, with weighted or unweighted edges. An array of functions to analyze graphs is available. This tutorial takes you through a few basic examples and exercises.\n", 45 | "\n", 46 | "Note that many exercises are followed by a block with some `assert` statements. These assertions may be preceded by some setup code. They are provided to give you feedback that you are on the right path -- receiving an `AssertionError` probably means you've done something wrong.\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "source": [ 52 | "\n", 53 | "**Official documentation** for version used in this tutorial: https://networkx.org\n", 54 | "\n", 55 | "**Official tutorial** for version used in this tutorial: https://networkx.org/documentation/stable/tutorial.html\n", 56 | "\n", 57 | "**nx-Guide** educational materials officially developed and curated by the NetworkX community https://networkx.org/nx-guides/index.html" 58 | ], 59 | "metadata": { 60 | "id": "2IuRRlj825O9" 61 | } 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "id": "MyLeqcSLmgm8" 67 | }, 68 | "source": [ 69 | "## The `import` statement\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "id": "k4gPM3vy9UMs" 76 | }, 77 | "source": [ 78 | "\n", 79 | "Recall that `import` statements go at the top of your code, telling Python to load an external module. In this case we want to load NetworkX, but give it a short alias `nx` since we'll have to type it repeatedly, hence the `as` statement.\n", 80 | "\n", 81 | "Lines starting with the `%` character are not Python code, they are \"magic\" directives for Jupyter notebook. The `%matplotlib inline` magic tells Jupyter Notebook to draw graphics inline i.e. in the notebook. This magic should be used right after the import statement." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "Qwj-Cuetmgm8" 88 | }, 89 | "source": [ 90 | "import networkx as nx\n", 91 | "%matplotlib inline" 92 | ], 93 | "execution_count": null, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "id": "S5e8Cvwbmgm8" 100 | }, 101 | "source": [ 102 | "Let's check the installed version of NetworkX. Version 2 is incompatible with v1, so we want to make sure we're not using an out of date package." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "metadata": { 108 | "id": "NDm6CKkImgm9" 109 | }, 110 | "source": [ 111 | "nx.__version__" 112 | ], 113 | "execution_count": null, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "id": "qh0OemKjmgm9" 120 | }, 121 | "source": [ 122 | "## Creating and drawing undirected graphs" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "metadata": { 128 | "id": "JY9ew9PTmgm9" 129 | }, 130 | "source": [ 131 | "# a \"plain\" graph is undirected\n", 132 | "G = nx.Graph()\n", 133 | "\n", 134 | "# give each a node a 'name', which is a letter in this case.\n", 135 | "G.add_node('a')\n", 136 | "\n", 137 | "# the add_nodes_from method allows adding nodes from a sequence, in this case a list\n", 138 | "nodes_to_add = ['b', 'c', 'd']\n", 139 | "G.add_nodes_from(nodes_to_add)\n", 140 | "\n", 141 | "# add edge from 'a' to 'b'\n", 142 | "# since this graph is undirected, the order doesn't matter here\n", 143 | "G.add_edge('a', 'b')\n", 144 | "\n", 145 | "# just like add_nodes_from, we can add edges from a sequence\n", 146 | "# edges should be specified as 2-tuples\n", 147 | "edges_to_add = [('a', 'c'), ('b', 'c'), ('c', 'd')]\n", 148 | "G.add_edges_from(edges_to_add)\n", 149 | "\n", 150 | "# draw the graph\n", 151 | "nx.draw(G, with_labels=True)" 152 | ], 153 | "execution_count": null, 154 | "outputs": [] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": { 159 | "id": "XPJ5hkEPmgm9" 160 | }, 161 | "source": [ 162 | "There are many optional arguments to the draw function to customize the appearance." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "metadata": { 168 | "id": "bQUnoCy9mgm-" 169 | }, 170 | "source": [ 171 | "import matplotlib.pyplot as plt\n", 172 | "\n", 173 | "fig, ax = plt.subplots(1,1,figsize=(10,8))\n", 174 | "nx.draw(G,\n", 175 | " with_labels=True,\n", 176 | " node_color='blue',\n", 177 | " node_size=1600,\n", 178 | " font_color='white',\n", 179 | " font_size=16,ax=ax\n", 180 | " )" 181 | ], 182 | "execution_count": null, 183 | "outputs": [] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": { 188 | "id": "YEa3CBUEmgm-" 189 | }, 190 | "source": [ 191 | "### A note on naming conventions\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "id": "z9D-PUHo9XTC" 198 | }, 199 | "source": [ 200 | "\n", 201 | "Usually in Python, variables are named in `snake_case`, i.e. lowercase with underscores separating words. Classes are conventionally named in `CamelCase`, i.e. with the first letter of each word capitalized.\n", 202 | "\n", 203 | "Obviously NetworkX doesn't use this convention, often using single capital letters for the names of graphs. This is an example of convention leaking from the world of discrete mathematics. Since most of the documentation you will find online uses this convention, we will follow it as well." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "id": "4bPMI-u5mgm-" 210 | }, 211 | "source": [ 212 | "## Graph methods\n", 213 | "\n", 214 | "The graph object has some properties and methods giving data about the whole graph." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "metadata": { 220 | "id": "ERBk73jVmgm-" 221 | }, 222 | "source": [ 223 | "# List all of the nodes\n", 224 | "G.nodes()" 225 | ], 226 | "execution_count": null, 227 | "outputs": [] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "metadata": { 232 | "id": "-28h383Cmgm-" 233 | }, 234 | "source": [ 235 | "# List all of the edges\n", 236 | "G.edges()" 237 | ], 238 | "execution_count": null, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "id": "EKDWMPk7mgm-" 245 | }, 246 | "source": [ 247 | "NodeView and EdgeView objects have iterators, so we can use them in `for` loops:" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "n-VUFHDFmgm-" 254 | }, 255 | "source": [ 256 | "for node in G.nodes:\n", 257 | " print(node)" 258 | ], 259 | "execution_count": null, 260 | "outputs": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "metadata": { 265 | "scrolled": true, 266 | "id": "nlljjdqEmgm_" 267 | }, 268 | "source": [ 269 | "for edge in G.edges:\n", 270 | " print(edge)" 271 | ], 272 | "execution_count": null, 273 | "outputs": [] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": { 278 | "id": "3bdiWAWJmgm_" 279 | }, 280 | "source": [ 281 | "Note that the edges are given as 2-tuples, the same way we entered them.\n", 282 | "\n", 283 | "We can get the number of nodes and edges in a graph using the `number_of_` methods." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "metadata": { 289 | "id": "MJ6j7vf_mgm_" 290 | }, 291 | "source": [ 292 | "G.number_of_nodes()" 293 | ], 294 | "execution_count": null, 295 | "outputs": [] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "metadata": { 300 | "id": "zTnGX7Hbmgm_" 301 | }, 302 | "source": [ 303 | "G.number_of_edges()" 304 | ], 305 | "execution_count": null, 306 | "outputs": [] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": { 311 | "id": "bqNH0CaWmgm_" 312 | }, 313 | "source": [ 314 | "Some graph methods take an edge or node as argument. These provide the graph properties of the given edge or node. For example, the `.neighbors()` method gives the nodes linked to the given node:" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "metadata": { 320 | "id": "RZKxKFdvmgm_" 321 | }, 322 | "source": [ 323 | "# list of neighbors of node 'b'\n", 324 | "G.neighbors('b')" 325 | ], 326 | "execution_count": null, 327 | "outputs": [] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": { 332 | "id": "-yPztVU1mgm_" 333 | }, 334 | "source": [ 335 | "For performance reasons, many graph methods return iterators instead of lists. They are convenient to loop over:" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "metadata": { 341 | "id": "CPAE8-Z8mgm_" 342 | }, 343 | "source": [ 344 | "for neighbor in G.neighbors('b'):\n", 345 | " print(neighbor)" 346 | ], 347 | "execution_count": null, 348 | "outputs": [] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": { 353 | "id": "wQ7ah7ocmgm_" 354 | }, 355 | "source": [ 356 | "and you can always use the `list` constructor to make a list from an iterator:" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "metadata": { 362 | "id": "v7AVsRJrmgnA" 363 | }, 364 | "source": [ 365 | "list(G.neighbors('b'))" 366 | ], 367 | "execution_count": null, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "source": [ 373 | "### An advance example using data visualization" 374 | ], 375 | "metadata": { 376 | "id": "wIwV0_8D2xaa" 377 | } 378 | }, 379 | { 380 | "cell_type": "code", 381 | "source": [ 382 | "import plotly.graph_objects as go\n", 383 | "\n", 384 | "pos = nx.spring_layout(G)\n", 385 | "edge_x = []\n", 386 | "edge_y = []\n", 387 | "\n", 388 | "for edge in G.edges():\n", 389 | " x0, y0 = pos[edge[0]]\n", 390 | " x1, y1 = pos[edge[1]]\n", 391 | " edge_x += [x0, x1, None]\n", 392 | " edge_y += [y0, y1, None]\n", 393 | "\n", 394 | "edge_trace = go.Scatter(x=edge_x, y=edge_y,\n", 395 | " line=dict(width=0.5, color=\"#888\"),\n", 396 | " hoverinfo=\"none\",\n", 397 | " mode=\"lines\")\n", 398 | "\n", 399 | "node_x = [pos[node][0] for node in G.nodes()]\n", 400 | "node_y = [pos[node][1] for node in G.nodes()]\n", 401 | "\n", 402 | "node_trace = go.Scatter(x=node_x, y=node_y,\n", 403 | " mode=\"markers+text\",\n", 404 | " text=list(G.nodes()),\n", 405 | " textposition=\"bottom center\",\n", 406 | " hoverinfo=\"text\",\n", 407 | " marker=dict(showscale=False,\n", 408 | " colorscale='YlGnBu',\n", 409 | " reversescale=True,\n", 410 | " color=[],\n", 411 | " size=10,line=dict(width=2))\n", 412 | " )\n", 413 | "\n", 414 | "fig = go.Figure(data=[edge_trace, node_trace],\n", 415 | " layout=go.Layout(\n", 416 | " title=\"Networkx Graph Visualization with Plotly\",\n", 417 | " showlegend=False,\n", 418 | " hovermode='closest',\n", 419 | " margin=dict(b=20, l=5, r=5, t=40),\n", 420 | " xaxis=dict(showgrid=False,\n", 421 | " zeroline=False,\n", 422 | " showticklabels=False),\n", 423 | " yaxis=dict(showgrid=False,\n", 424 | " zeroline=False,\n", 425 | " showticklabels=False)\n", 426 | " )\n", 427 | " )\n", 428 | "fig.show()" 429 | ], 430 | "metadata": { 431 | "id": "y3E8mXCp3XD1" 432 | }, 433 | "execution_count": null, 434 | "outputs": [] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": { 439 | "id": "LxcVIHh2mgnA" 440 | }, 441 | "source": [ 442 | "# NetworkX functions vs. Graph methods\n" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": { 448 | "id": "p_wy_aLz9Z1J" 449 | }, 450 | "source": [ 451 | "\n", 452 | "The previous data are available via graph *methods*, *i.e.* they are called from the graph object:\n", 453 | "\n", 454 | " G.()\n", 455 | "\n", 456 | "While several of the most-used NetworkX functions are provided as methods, many more of them are module functions and are called like this:\n", 457 | "\n", 458 | " nx.(G, )\n", 459 | "\n", 460 | "that is, with the graph provided as the first, and maybe only, argument. Here are a couple of examples of NetworkX module functions that provide information about a graph:" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "metadata": { 466 | "id": "VIoTGFH3mgnA" 467 | }, 468 | "source": [ 469 | "nx.is_tree(G)" 470 | ], 471 | "execution_count": null, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "metadata": { 477 | "id": "gbIuMKNJmgnA" 478 | }, 479 | "source": [ 480 | "nx.is_connected(G)" 481 | ], 482 | "execution_count": null, 483 | "outputs": [] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": { 488 | "id": "3L7uuaSPmgnA" 489 | }, 490 | "source": [ 491 | "## Node and edge existence\n" 492 | ] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": { 497 | "id": "m_hW0aPg9car" 498 | }, 499 | "source": [ 500 | "\n", 501 | "To check if a node is present in a graph, you can use the `has_node()` method:" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "metadata": { 507 | "id": "WfiSCgkJmgnA" 508 | }, 509 | "source": [ 510 | "G.has_node('a')" 511 | ], 512 | "execution_count": null, 513 | "outputs": [] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "metadata": { 518 | "id": "GqZntbp4mgnA" 519 | }, 520 | "source": [ 521 | "G.has_node('x')" 522 | ], 523 | "execution_count": null, 524 | "outputs": [] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": { 529 | "id": "hwz9zJThmgnA" 530 | }, 531 | "source": [ 532 | "Additionally, the loop syntax used above: `for n in G.nodes` suggests another way we can check if a node is in a graph:" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "metadata": { 538 | "id": "FYfjvghMmgnB" 539 | }, 540 | "source": [ 541 | "'d' in G.nodes" 542 | ], 543 | "execution_count": null, 544 | "outputs": [] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": { 549 | "id": "rh0IEZxmmgnB" 550 | }, 551 | "source": [ 552 | "Likewise we can check if two nodes are connected by an edge:" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "metadata": { 558 | "id": "yoEeHW2vmgnB" 559 | }, 560 | "source": [ 561 | "G.has_edge('a', 'b')" 562 | ], 563 | "execution_count": null, 564 | "outputs": [] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "metadata": { 569 | "id": "xJnFSISSmgnB" 570 | }, 571 | "source": [ 572 | "G.has_edge('a', 'd')" 573 | ], 574 | "execution_count": null, 575 | "outputs": [] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "metadata": { 580 | "id": "r9nCt42kmgnB" 581 | }, 582 | "source": [ 583 | "('c', 'd') in G.edges" 584 | ], 585 | "execution_count": null, 586 | "outputs": [] 587 | }, 588 | { 589 | "cell_type": "markdown", 590 | "metadata": { 591 | "id": "bZCoiuVdmgnB" 592 | }, 593 | "source": [ 594 | "## Node degree\n" 595 | ] 596 | }, 597 | { 598 | "cell_type": "markdown", 599 | "metadata": { 600 | "id": "tgJM-8O_9eSd" 601 | }, 602 | "source": [ 603 | "\n", 604 | "One of the most important questions we can ask about a node in a graph is how many other nodes it connects to. Using the `.neighbors()` method from above, we could formulate this question as so:" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "metadata": { 610 | "id": "MDtjYBvJmgnB" 611 | }, 612 | "source": [ 613 | "len(list(G.neighbors('a')))" 614 | ], 615 | "execution_count": null, 616 | "outputs": [] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": { 621 | "id": "l8ftq28RmgnB" 622 | }, 623 | "source": [ 624 | "but this is such a common task that NetworkX provides us a graph method to do this in a much clearer way:" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "metadata": { 630 | "id": "IHuE22BpmgnB" 631 | }, 632 | "source": [ 633 | "G.degree('a')" 634 | ], 635 | "execution_count": null, 636 | "outputs": [] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": { 641 | "id": "OEbvlDyImgnB" 642 | }, 643 | "source": [ 644 | "## EXERCISE 1\n", 645 | "\n" 646 | ] 647 | }, 648 | { 649 | "cell_type": "markdown", 650 | "metadata": { 651 | "id": "SuO9vOYd9gFV" 652 | }, 653 | "source": [ 654 | "\n", 655 | "Often in the context of trees, a node with degree 1 is called a *leaf*. Write a function named `get_leaves` that takes a graph as an argument, loops through the nodes, and returns a list of nodes with degree 1." 656 | ] 657 | }, 658 | { 659 | "cell_type": "code", 660 | "metadata": { 661 | "scrolled": true, 662 | "id": "FlGKqf0MmgnB" 663 | }, 664 | "source": [ 665 | "def get_leaves(G):\n", 666 | " # PUT YOUR CODE HERE" 667 | ], 668 | "execution_count": null, 669 | "outputs": [] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "metadata": { 674 | "scrolled": true, 675 | "id": "mVBLr5_xmgnB" 676 | }, 677 | "source": [ 678 | "G = nx.Graph()\n", 679 | "G.add_edges_from([\n", 680 | " ('a', 'b'),\n", 681 | " ('a', 'd'),\n", 682 | " ('c', 'd'),\n", 683 | " ])\n", 684 | "assert set(get_leaves(G)) == {'c', 'b'}" 685 | ], 686 | "execution_count": null, 687 | "outputs": [] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "metadata": { 692 | "id": "3ZJS8XwomgnC" 693 | }, 694 | "source": [ 695 | "# Aside: comprehensions\n" 696 | ] 697 | }, 698 | { 699 | "cell_type": "markdown", 700 | "metadata": { 701 | "id": "UHiOy9Uc9hMQ" 702 | }, 703 | "source": [ 704 | "\n", 705 | "Often we have one sequence of values and we want to generate a new sequence by applying an operation to each item in the first. List comprehensions and generator expressions are compact ways to do this.\n", 706 | "\n", 707 | "List comprehensions are specified inside square brackets, and immediately produce a list of the result." 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "metadata": { 713 | "id": "PI4AO291mgnC" 714 | }, 715 | "source": [ 716 | "items = ['spider', 'y', 'banana']\n", 717 | "[item.upper() for item in items]" 718 | ], 719 | "execution_count": null, 720 | "outputs": [] 721 | }, 722 | { 723 | "cell_type": "markdown", 724 | "metadata": { 725 | "id": "0DiBW7LQmgnC" 726 | }, 727 | "source": [ 728 | "In the context of NetworkX, this is often used to do something with the node or edge lists:" 729 | ] 730 | }, 731 | { 732 | "cell_type": "code", 733 | "metadata": { 734 | "id": "OtB8bHjOmgnC" 735 | }, 736 | "source": [ 737 | "print(G.nodes())\n", 738 | "print([G.degree(n) for n in G.nodes()])" 739 | ], 740 | "execution_count": null, 741 | "outputs": [] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": { 746 | "id": "k5mqjsNbmgnC" 747 | }, 748 | "source": [ 749 | "Generator expressions are slightly different as they are evaluated [lazily](https://en.wikipedia.org/wiki/Lazy_evaluation). These are specified using round braces, and if they are being expressed as a function argument, they can be specified without any braces. These are most often used in the context of aggregations like the `max` function:" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "metadata": { 755 | "id": "bmlcYEMKmgnC" 756 | }, 757 | "source": [ 758 | "g = (len(item) for item in items)\n", 759 | "list(g)" 760 | ], 761 | "execution_count": null, 762 | "outputs": [] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "metadata": { 767 | "id": "fwrnavpvmgnC" 768 | }, 769 | "source": [ 770 | "max(len(item) for item in items)" 771 | ], 772 | "execution_count": null, 773 | "outputs": [] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "metadata": { 778 | "id": "IQiLeXAmmgnC" 779 | }, 780 | "source": [ 781 | "sorted(item.upper() for item in items)" 782 | ], 783 | "execution_count": null, 784 | "outputs": [] 785 | }, 786 | { 787 | "cell_type": "markdown", 788 | "metadata": { 789 | "id": "pOHp5rgxmgnC" 790 | }, 791 | "source": [ 792 | "## Node names\n" 793 | ] 794 | }, 795 | { 796 | "cell_type": "markdown", 797 | "metadata": { 798 | "id": "xUhXjx4D9imG" 799 | }, 800 | "source": [ 801 | "\n", 802 | "The node names don't have to be single characters -- they can be strings or integers or any immutable object, and the types can be mixed. The example below uses strings and integers for names." 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "metadata": { 808 | "id": "2FM2ZUsImgnC" 809 | }, 810 | "source": [ 811 | "import matplotlib.pyplot as plt\n", 812 | "\n", 813 | "fig, ax = plt.subplots(1,1,figsize=(8,6))\n", 814 | "\n", 815 | "G = nx.Graph()\n", 816 | "G.add_nodes_from(['cat','dog','virus',13])\n", 817 | "G.add_edge('cat','dog')\n", 818 | "nx.draw(G, with_labels=True, font_color='white', node_size=1000, ax = ax)\n", 819 | "plt.show()" 820 | ], 821 | "execution_count": null, 822 | "outputs": [] 823 | }, 824 | { 825 | "cell_type": "markdown", 826 | "metadata": { 827 | "id": "np4_nOMVmgnC" 828 | }, 829 | "source": [ 830 | "# Adjacency lists\n" 831 | ] 832 | }, 833 | { 834 | "cell_type": "markdown", 835 | "metadata": { 836 | "id": "Fi2GXDKb9k8N" 837 | }, 838 | "source": [ 839 | "\n", 840 | "One compact way to represent a graph is an adjacency list. This is most useful for unweighted graphs, directed or undirected. In an adjacency list, each line contains some number of node names. The first node name is the \"source\" and each other node name on the line is a \"target\". For instance, given the following adjacency list:\n", 841 | "```\n", 842 | "a d e\n", 843 | "b c\n", 844 | "c\n", 845 | "d\n", 846 | "e\n", 847 | "```\n", 848 | "the edges are as follows:\n", 849 | "```\n", 850 | "(a, d)\n", 851 | "(a, e)\n", 852 | "(b, c)\n", 853 | "```\n", 854 | "The nodes on their own line exist so that we are sure to include any singleton nodes. Note that if our graph is undirected, we only need to specify one direction for each edge. Importantly, whether the graph is directed or undirected is often not contained in the file itself -- you have to infer it. This is one limitation of the format.\n", 855 | "\n", 856 | "There is a file called `friends.adjlist`. It's a plain text file, so you can open it on your computer or in GitHub, but here are its contents:" 857 | ] 858 | }, 859 | { 860 | "cell_type": "code", 861 | "source": [ 862 | "import gdown\n", 863 | "\n", 864 | "url = 'https://drive.google.com/uc?id=1-D5a3ells1jprz0cNMJ3eEHhW2RrnJDB'\n", 865 | "output = 'friends.adjlist'\n", 866 | "gdown.download(url, output, quiet=False)" 867 | ], 868 | "metadata": { 869 | "id": "gpwIjkhcznOp" 870 | }, 871 | "execution_count": null, 872 | "outputs": [] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "metadata": { 877 | "id": "Yet6Ll9wmgnD" 878 | }, 879 | "source": [ 880 | "print(open('friends.adjlist').read())" 881 | ], 882 | "execution_count": null, 883 | "outputs": [] 884 | }, 885 | { 886 | "cell_type": "markdown", 887 | "metadata": { 888 | "id": "Nx-3UM19mgnD" 889 | }, 890 | "source": [ 891 | "NetworkX provides a way to read a graph from an adjacency list: `nx.read_adjlist()`. We will name this graph SG, for social graph." 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "metadata": { 897 | "id": "ZHZefiTWmgnD" 898 | }, 899 | "source": [ 900 | "SG = nx.read_adjlist('friends.adjlist')" 901 | ], 902 | "execution_count": null, 903 | "outputs": [] 904 | }, 905 | { 906 | "cell_type": "markdown", 907 | "metadata": { 908 | "id": "HPYvh9tbmgnD" 909 | }, 910 | "source": [ 911 | "We know how to draw this graph:" 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "metadata": { 917 | "id": "Xqvx4sJRmgnD" 918 | }, 919 | "source": [ 920 | "import matplotlib.pyplot as plt\n", 921 | "\n", 922 | "fig,ax = plt.subplots(1,1,figsize=(10,8))\n", 923 | "nx.draw(SG, node_size=2000, node_color='lightblue', with_labels=True,ax=ax)\n", 924 | "plt.show()" 925 | ], 926 | "execution_count": null, 927 | "outputs": [] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "metadata": { 932 | "id": "sJkPbCgfmgnD" 933 | }, 934 | "source": [ 935 | "And we know how to get information such as the number of friends linked from a node:" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "metadata": { 941 | "id": "3XEmjoTBmgnD" 942 | }, 943 | "source": [ 944 | "SG.degree('Alice')" 945 | ], 946 | "execution_count": null, 947 | "outputs": [] 948 | }, 949 | { 950 | "cell_type": "markdown", 951 | "metadata": { 952 | "id": "TcYdyCnEmgnD" 953 | }, 954 | "source": [ 955 | "## EXERCISE 2\n" 956 | ] 957 | }, 958 | { 959 | "cell_type": "markdown", 960 | "metadata": { 961 | "id": "MsE6DTbI9m4t" 962 | }, 963 | "source": [ 964 | "\n", 965 | "Write a function max_degree that takes a graph as its argument, and returns a 2-tuple with the name and degree of the node with highest degree." 966 | ] 967 | }, 968 | { 969 | "cell_type": "code", 970 | "metadata": { 971 | "scrolled": false, 972 | "id": "DLufczZQmgnD" 973 | }, 974 | "source": [ 975 | "def max_degree(G):\n", 976 | " # PUT YOUR CODE HERE" 977 | ], 978 | "execution_count": null, 979 | "outputs": [] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "metadata": { 984 | "scrolled": false, 985 | "id": "aEQapPYNmgnD" 986 | }, 987 | "source": [ 988 | "SG = nx.read_adjlist('friends.adjlist')\n", 989 | "assert max_degree(SG) == ('Claire', 4)" 990 | ], 991 | "execution_count": null, 992 | "outputs": [] 993 | }, 994 | { 995 | "cell_type": "markdown", 996 | "metadata": { 997 | "id": "MCDbNldnmgnD" 998 | }, 999 | "source": [ 1000 | "## EXERCISE 3\n" 1001 | ] 1002 | }, 1003 | { 1004 | "cell_type": "markdown", 1005 | "metadata": { 1006 | "id": "RWup3uDm9n5I" 1007 | }, 1008 | "source": [ 1009 | "\n", 1010 | "Write a function `mutual_friends` that takes a graph and two nodes as arguments, and returns a list (or set) of nodes that are linked to both given nodes. For example, in the graph `SG` drawn above,\n", 1011 | "\n", 1012 | " mutual_friends(SG, 'Alice', 'Claire') == ['Frank']\n", 1013 | "\n", 1014 | "an empty list or set should be returned in the case where two nodes have no mutual friends, e.g. George and Bob in `SG` drawn above." 1015 | ] 1016 | }, 1017 | { 1018 | "cell_type": "code", 1019 | "metadata": { 1020 | "scrolled": false, 1021 | "id": "hpnX9YeKmgnD" 1022 | }, 1023 | "source": [ 1024 | "def mutual_friends(G, node_1, node_2):\n", 1025 | " # PUT YOUR CODE HERE" 1026 | ], 1027 | "execution_count": null, 1028 | "outputs": [] 1029 | }, 1030 | { 1031 | "cell_type": "code", 1032 | "metadata": { 1033 | "scrolled": true, 1034 | "id": "B2n0vCRYmgnE" 1035 | }, 1036 | "source": [ 1037 | "SG = nx.read_adjlist('friends.adjlist')\n", 1038 | "assert mutual_friends(SG, 'Alice', 'Claire') == ['Frank']\n", 1039 | "assert mutual_friends(SG, 'George', 'Bob') == []\n", 1040 | "assert sorted(mutual_friends(SG, 'Claire', 'George')) == ['Dennis', 'Frank']" 1041 | ], 1042 | "execution_count": null, 1043 | "outputs": [] 1044 | }, 1045 | { 1046 | "cell_type": "markdown", 1047 | "metadata": { 1048 | "id": "1Y9kUzI5mgnE" 1049 | }, 1050 | "source": [ 1051 | "# Directed graphs\n" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "markdown", 1056 | "metadata": { 1057 | "id": "vGwKHPk49pjQ" 1058 | }, 1059 | "source": [ 1060 | "\n", 1061 | "Unless otherwise specified, we assume graph edges are undirected -- they are symmetric and go both ways. But some relationships, e.g. predator-prey relationships, are asymmetric and best represented as directed graphs. NetworkX provides the `DiGraph` class for directed graphs." 1062 | ] 1063 | }, 1064 | { 1065 | "cell_type": "code", 1066 | "metadata": { 1067 | "id": "U-aYvHw2mgnE" 1068 | }, 1069 | "source": [ 1070 | "D = nx.DiGraph()\n", 1071 | "\n", 1072 | "D.add_edges_from([(1,2),(2,3),(3,2),(3,4),(3,5),(4,5),(4,6),(5,6),(6,4),(4,2)])\n", 1073 | "\n", 1074 | "nx.draw(D, with_labels=True)" 1075 | ], 1076 | "execution_count": null, 1077 | "outputs": [] 1078 | }, 1079 | { 1080 | "cell_type": "markdown", 1081 | "metadata": { 1082 | "id": "cJh2UMM1mgnE" 1083 | }, 1084 | "source": [ 1085 | "Note the asymmetry in graph methods dealing with edges such as `has_edge()`:" 1086 | ] 1087 | }, 1088 | { 1089 | "cell_type": "code", 1090 | "metadata": { 1091 | "id": "qR3qxAmPmgnE" 1092 | }, 1093 | "source": [ 1094 | "D.has_edge(1,2)" 1095 | ], 1096 | "execution_count": null, 1097 | "outputs": [] 1098 | }, 1099 | { 1100 | "cell_type": "code", 1101 | "metadata": { 1102 | "id": "GMOpNUIfmgnE" 1103 | }, 1104 | "source": [ 1105 | "D.has_edge(2,1)" 1106 | ], 1107 | "execution_count": null, 1108 | "outputs": [] 1109 | }, 1110 | { 1111 | "cell_type": "markdown", 1112 | "metadata": { 1113 | "id": "1Sr0z0FQmgnE" 1114 | }, 1115 | "source": [ 1116 | "Instead of the symmetric relationship \"neighbors\", nodes in directed graphs have predecessors (\"in-neighbors\") and successors (\"out-neighbors\"):" 1117 | ] 1118 | }, 1119 | { 1120 | "cell_type": "code", 1121 | "metadata": { 1122 | "id": "xgGAG8LCmgnE" 1123 | }, 1124 | "source": [ 1125 | "print('Successors of 2:', list(D.successors(2)))\n", 1126 | "\n", 1127 | "print('Predecessors of 2:', list(D.predecessors(2)))" 1128 | ], 1129 | "execution_count": null, 1130 | "outputs": [] 1131 | }, 1132 | { 1133 | "cell_type": "markdown", 1134 | "metadata": { 1135 | "id": "rsj-OkL8mgnE" 1136 | }, 1137 | "source": [ 1138 | "Directed graphs have in-degree and out-degree, giving the number of edges pointing to and from the given node, respectively:" 1139 | ] 1140 | }, 1141 | { 1142 | "cell_type": "code", 1143 | "metadata": { 1144 | "id": "D_Wz7lDYmgnE" 1145 | }, 1146 | "source": [ 1147 | "D.in_degree(2)" 1148 | ], 1149 | "execution_count": null, 1150 | "outputs": [] 1151 | }, 1152 | { 1153 | "cell_type": "code", 1154 | "metadata": { 1155 | "id": "PRY7wcGpmgnE" 1156 | }, 1157 | "source": [ 1158 | "D.out_degree(2)" 1159 | ], 1160 | "execution_count": null, 1161 | "outputs": [] 1162 | }, 1163 | { 1164 | "cell_type": "markdown", 1165 | "metadata": { 1166 | "id": "axgp9QB1mgnF" 1167 | }, 1168 | "source": [ 1169 | "### Caveat\n", 1170 | "\n", 1171 | "Since NetworkX 2, the `.degree()` method on a directed graph gives the total degree: in-degree plus out-degree. However, in a bit of confusing nomenclature, the `neighbors` method is a synonym for `successors`, giving only the edges originating from the given node. This makes sense if you consider `neighbors` to be all the nodes reachable from the given node by following links, but it's easy to make the mistake of writing `.neighbors()` in your code when you really want both predecessors and successors." 1172 | ] 1173 | }, 1174 | { 1175 | "cell_type": "code", 1176 | "metadata": { 1177 | "id": "OkMlHO7KmgnF" 1178 | }, 1179 | "source": [ 1180 | "D.degree(2)" 1181 | ], 1182 | "execution_count": null, 1183 | "outputs": [] 1184 | }, 1185 | { 1186 | "cell_type": "code", 1187 | "metadata": { 1188 | "scrolled": false, 1189 | "id": "1pUW9bzqmgnF" 1190 | }, 1191 | "source": [ 1192 | "print('Successors of 2:', list(D.successors(2)))\n", 1193 | "print('\"Neighbors\" of 2:', list(D.neighbors(2)))" 1194 | ], 1195 | "execution_count": null, 1196 | "outputs": [] 1197 | } 1198 | ] 1199 | } -------------------------------------------------------------------------------- /lessons/week01/lesson01.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week01/lesson01.pdf -------------------------------------------------------------------------------- /lessons/week01/lesson02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week01/lesson02.pdf -------------------------------------------------------------------------------- /lessons/week02/lesson03.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Part 1" 21 | ], 22 | "metadata": { 23 | "id": "o5NjK5PE2wZW" 24 | } 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "id": "cGIQKdJqIDhz" 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import networkx as nx\n", 35 | "import matplotlib.pyplot as plt\n", 36 | "\n", 37 | "# Create an undirected graph\n", 38 | "G = nx.Graph()\n", 39 | "\n", 40 | "# Add nodes (land masses)\n", 41 | "G.add_nodes_from(['A', 'B', 'C', 'D'])\n", 42 | "\n", 43 | "# Add edges (bridges between the land masses)\n", 44 | "edges = [\n", 45 | " ('A', 'B'), ('A', 'B'), # Two bridges between A and B\n", 46 | " ('A', 'C'), ('A', 'C'), # Two bridges between A and C\n", 47 | " ('A', 'D'), # One bridge between A and D\n", 48 | " ('B', 'D'), # One bridge between B and D\n", 49 | " ('C', 'D') # One bridge between C and D\n", 50 | "]\n", 51 | "\n", 52 | "G.add_edges_from(edges)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "source": [ 58 | "# Define node positions to reflect the geographical layout\n", 59 | "pos = {\n", 60 | " 'A': (0, 1), # North bank\n", 61 | " 'B': (2, 1), # Northeast island\n", 62 | " 'C': (0, -1), # Southeast island\n", 63 | " 'D': (2, -1) # South bank\n", 64 | "}\n", 65 | "\n", 66 | "# Draw the graph\n", 67 | "plt.figure(figsize=(8, 6))\n", 68 | "nx.draw(G, pos, with_labels=True, node_size=1000, node_color='lightblue', font_size=14, font_weight='bold')\n", 69 | "nx.draw_networkx_edges(G, pos, edgelist=edges, width=2)\n", 70 | "plt.title(\"Seven Bridges of Königsberg\", fontsize=16)\n", 71 | "plt.axis('off')\n", 72 | "plt.show()" 73 | ], 74 | "metadata": { 75 | "id": "NsWcCZazIq5V" 76 | }, 77 | "execution_count": null, 78 | "outputs": [] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "source": [ 83 | "import networkx as nx\n", 84 | "import matplotlib.pyplot as plt\n", 85 | "\n", 86 | "# Create an undirected graph\n", 87 | "G = nx.Graph()\n", 88 | "\n", 89 | "# Add nodes\n", 90 | "G.add_nodes_from([1, 2, 3, 4])\n", 91 | "\n", 92 | "# Add edges (unweighted)\n", 93 | "edges = [(1, 2), (1, 3), (2, 3), (3, 4)]\n", 94 | "G.add_edges_from(edges)\n", 95 | "\n", 96 | "# Draw the graph\n", 97 | "nx.draw(G, with_labels=True, node_color='lightblue', node_size=500)\n", 98 | "plt.title(\"Undirected and Unweighted Graph\")\n", 99 | "plt.show()" 100 | ], 101 | "metadata": { 102 | "id": "ZvevMKVyMo0w" 103 | }, 104 | "execution_count": null, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "source": [ 110 | "import networkx as nx\n", 111 | "import matplotlib.pyplot as plt\n", 112 | "\n", 113 | "# Create an undirected graph\n", 114 | "G = nx.Graph()\n", 115 | "\n", 116 | "# Add nodes\n", 117 | "G.add_nodes_from([1, 2, 3, 4])\n", 118 | "\n", 119 | "# Add weighted edges\n", 120 | "edges = [\n", 121 | " (1, 2, {'weight': 2.5}),\n", 122 | " (1, 3, {'weight': 1.2}),\n", 123 | " (2, 3, {'weight': 0.75}),\n", 124 | " (3, 4, {'weight': 1.8})\n", 125 | "]\n", 126 | "G.add_edges_from(edges)\n", 127 | "\n", 128 | "# Get edge weights\n", 129 | "edge_labels = nx.get_edge_attributes(G, 'weight')\n", 130 | "\n", 131 | "# Draw the graph\n", 132 | "pos = nx.spring_layout(G)\n", 133 | "nx.draw(G, pos, with_labels=True, node_color='lightgreen', node_size=500)\n", 134 | "nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)\n", 135 | "plt.title(\"Undirected and Weighted Graph\")\n", 136 | "plt.show()" 137 | ], 138 | "metadata": { 139 | "id": "JZpetnhkOUMU" 140 | }, 141 | "execution_count": null, 142 | "outputs": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "source": [ 147 | "edge_labels" 148 | ], 149 | "metadata": { 150 | "id": "kEV2AVg8j2GZ" 151 | }, 152 | "execution_count": null, 153 | "outputs": [] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "source": [ 158 | "import networkx as nx\n", 159 | "import matplotlib.pyplot as plt\n", 160 | "\n", 161 | "# Create a directed graph\n", 162 | "G = nx.DiGraph()\n", 163 | "\n", 164 | "# Add nodes\n", 165 | "G.add_nodes_from(['1', '2', '3', '4'])\n", 166 | "\n", 167 | "# Add edges (unweighted)\n", 168 | "edges = [('1', '2'), ('2', '3'), ('3', '1'), ('3', '4')]\n", 169 | "G.add_edges_from(edges)\n", 170 | "\n", 171 | "# Draw the graph\n", 172 | "pos = nx.circular_layout(G)\n", 173 | "nx.draw(G, pos, with_labels=True, node_color='orange', node_size=500, arrowsize=20)\n", 174 | "plt.title(\"Directed and Unweighted Graph\")\n", 175 | "plt.show()" 176 | ], 177 | "metadata": { 178 | "id": "q7xBCCxYP0EA" 179 | }, 180 | "execution_count": null, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "source": [ 186 | "import networkx as nx\n", 187 | "import matplotlib.pyplot as plt\n", 188 | "\n", 189 | "fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n", 190 | "\n", 191 | "# --- Undirected and Unweighted Graph ---\n", 192 | "G1 = nx.Graph()\n", 193 | "G1.add_nodes_from([1, 2, 3, 4])\n", 194 | "edges1 = [(1, 2), (1, 3), (2, 3), (3, 4)]\n", 195 | "G1.add_edges_from(edges1)\n", 196 | "nx.draw(G1, ax=axes[0, 0], with_labels=True, node_color='lightblue', node_size=500)\n", 197 | "axes[0, 0].set_title(\"Undirected and Unweighted\")\n", 198 | "\n", 199 | "# --- Undirected and Weighted Graph ---\n", 200 | "G2 = nx.Graph()\n", 201 | "G2.add_nodes_from([1, 2, 3, 4])\n", 202 | "edges2 = [\n", 203 | " (1, 2, {'weight': 2.5}),\n", 204 | " (3, 1, {'weight': 1.2}),\n", 205 | " (2, 3, {'weight': 0.75}),\n", 206 | " (3, 4, {'weight': 1.8})\n", 207 | "]\n", 208 | "G2.add_edges_from(edges2)\n", 209 | "pos2 = nx.spring_layout(G2)\n", 210 | "edge_labels2 = nx.get_edge_attributes(G2, 'weight')\n", 211 | "nx.draw(G2, pos2, ax=axes[0, 1], with_labels=True, node_color='lightgreen', node_size=500)\n", 212 | "nx.draw_networkx_edge_labels(G2, pos2, edge_labels=edge_labels2, ax=axes[0, 1])\n", 213 | "axes[0, 1].set_title(\"Undirected and Weighted\")\n", 214 | "\n", 215 | "# --- Directed and Unweighted Graph ---\n", 216 | "G3 = nx.DiGraph()\n", 217 | "G3.add_nodes_from(['A', 'B', 'C', 'D'])\n", 218 | "edges3 = [('A', 'B'), ('B', 'C'), ('C', 'A'), ('C', 'D')]\n", 219 | "G3.add_edges_from(edges3)\n", 220 | "pos3 = nx.circular_layout(G3)\n", 221 | "nx.draw(G3, pos3, ax=axes[1, 0], with_labels=True, node_color='orange', node_size=500, arrowsize=20)\n", 222 | "axes[1, 0].set_title(\"Directed and Unweighted\")\n", 223 | "\n", 224 | "# --- Directed and Weighted Graph ---\n", 225 | "G4 = nx.DiGraph()\n", 226 | "G4.add_nodes_from(['X', 'Y', 'Z', 'W'])\n", 227 | "edges4 = [\n", 228 | " ('X', 'Y', {'weight': 3}),\n", 229 | " ('Y', 'Z', {'weight': 2}),\n", 230 | " ('Z', 'X', {'weight': 4}),\n", 231 | " ('Z', 'W', {'weight': 1})\n", 232 | "]\n", 233 | "G4.add_edges_from(edges4)\n", 234 | "pos4 = nx.spring_layout(G4)\n", 235 | "edge_labels4 = nx.get_edge_attributes(G4, 'weight')\n", 236 | "nx.draw(G4, pos4, ax=axes[1, 1], with_labels=True, node_color='pink', node_size=500, arrowsize=20)\n", 237 | "nx.draw_networkx_edge_labels(G4, pos4, edge_labels=edge_labels4, ax=axes[1, 1])\n", 238 | "axes[1, 1].set_title(\"Directed and Weighted\")\n", 239 | "\n", 240 | "# Adjust layout\n", 241 | "plt.tight_layout()\n", 242 | "plt.show()\n" 243 | ], 244 | "metadata": { 245 | "id": "zxEl67qxRjzX" 246 | }, 247 | "execution_count": null, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "source": [ 253 | "del G" 254 | ], 255 | "metadata": { 256 | "id": "BzrHnj4EUH2-" 257 | }, 258 | "execution_count": null, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "source": [ 264 | "import networkx as nx\n", 265 | "import matplotlib.pyplot as plt\n", 266 | "\n", 267 | "# Create a directed graph\n", 268 | "G = nx.DiGraph()\n", 269 | "\n", 270 | "# Add nodes\n", 271 | "G.add_nodes_from([1, 2, 3, 4])\n", 272 | "\n", 273 | "# Add weighted edges\n", 274 | "edges = [\n", 275 | " (1, 2, {'weight': 2.5}),\n", 276 | " (3, 1, {'weight': 1.2}),\n", 277 | " (2, 3, {'weight': 0.75}),\n", 278 | " (3, 4, {'weight': 1.8})\n", 279 | "]\n", 280 | "G.add_edges_from(edges)\n", 281 | "\n", 282 | "# Get edge weights\n", 283 | "edge_labels = nx.get_edge_attributes(G, 'weight')\n", 284 | "\n", 285 | "# Draw the graph\n", 286 | "pos = nx.spring_layout(G)\n", 287 | "nx.draw(G, pos, with_labels=True, node_color='pink', node_size=500, arrowsize=20)\n", 288 | "nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)\n", 289 | "plt.title(\"Directed and Weighted Graph\")\n", 290 | "plt.show()" 291 | ], 292 | "metadata": { 293 | "id": "o37iNevQShuG" 294 | }, 295 | "execution_count": null, 296 | "outputs": [] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "source": [ 301 | "import networkx as nx\n", 302 | "import matplotlib.pyplot as plt\n", 303 | "\n", 304 | "# Create a directed graph\n", 305 | "G = nx.DiGraph()\n", 306 | "\n", 307 | "# Add nodes\n", 308 | "G.add_nodes_from(['X', 'Y', 'Z', 'W'])\n", 309 | "\n", 310 | "# Add weighted edges\n", 311 | "edges = [\n", 312 | " ('X', 'Y', {'weight': 3}),\n", 313 | " ('Y', 'Z', {'weight': 2}),\n", 314 | " ('Z', 'X', {'weight': 4}),\n", 315 | " ('Z', 'W', {'weight': 1})\n", 316 | "]\n", 317 | "G.add_edges_from(edges)\n", 318 | "\n", 319 | "# Get edge weights\n", 320 | "edge_labels = nx.get_edge_attributes(G, 'weight')\n", 321 | "\n", 322 | "# Draw the graph\n", 323 | "pos = nx.spring_layout(G)\n", 324 | "nx.draw(G, pos, with_labels=True, node_color='pink', node_size=500, arrowsize=20)\n", 325 | "nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)\n", 326 | "plt.title(\"Directed and Weighted Graph\")\n", 327 | "plt.show()\n" 328 | ], 329 | "metadata": { 330 | "id": "0avFk369TyJc" 331 | }, 332 | "execution_count": null, 333 | "outputs": [] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "source": [ 338 | "import networkx as nx\n", 339 | "import matplotlib.pyplot as plt\n", 340 | "\n", 341 | "# Step 1: Create an empty graph\n", 342 | "B = nx.Graph()\n", 343 | "\n", 344 | "# Step 2: Define the two node sets\n", 345 | "U = {'A', 'B', 'C'}\n", 346 | "V = {1, 2, 3, 4}\n", 347 | "\n", 348 | "# Step 3: Add nodes with the bipartite attribute\n", 349 | "B.add_nodes_from(U, bipartite=0)\n", 350 | "B.add_nodes_from(V, bipartite=1)\n", 351 | "\n", 352 | "# Step 4: Add edges between nodes from U to V\n", 353 | "edges = [('A', 1), ('A', 2), ('B', 2), ('B', 3), ('C', 3), ('C', 4)]\n", 354 | "B.add_edges_from(edges)\n", 355 | "\n", 356 | "# Step 5: Visualize the bipartite graph\n", 357 | "pos = nx.bipartite_layout(B, U)\n", 358 | "nx.draw_networkx_nodes(B, pos, nodelist=U, node_color='lightblue', node_size=500, label='Set U')\n", 359 | "nx.draw_networkx_nodes(B, pos, nodelist=V, node_color='lightgreen', node_size=500, label='Set V')\n", 360 | "nx.draw_networkx_edges(B, pos)\n", 361 | "nx.draw_networkx_labels(B, pos)\n", 362 | "plt.legend(scatterpoints=1)\n", 363 | "plt.title(\"Bipartite Graph\")\n", 364 | "plt.axis('off')\n", 365 | "plt.show()" 366 | ], 367 | "metadata": { 368 | "id": "1KpaiyhrUAra" 369 | }, 370 | "execution_count": null, 371 | "outputs": [] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "source": [ 376 | "import networkx as nx\n", 377 | "import matplotlib.pyplot as plt\n", 378 | "from matplotlib.lines import Line2D # Import Line2D for custom legend handles\n", 379 | "\n", 380 | "# Step 1: Create an empty graph\n", 381 | "G = nx.Graph()\n", 382 | "\n", 383 | "# Step 2: Define the two node sets\n", 384 | "U = {'A', 'B', 'C'}\n", 385 | "V = {1, 2, 3, 4}\n", 386 | "\n", 387 | "# Step 3: Add nodes with the bipartite attribute\n", 388 | "G.add_nodes_from(U, bipartite=0)\n", 389 | "G.add_nodes_from(V, bipartite=1)\n", 390 | "\n", 391 | "# Step 4: Add edges between nodes from U to V\n", 392 | "edges = [('A', 1), ('A', 2), ('B', 2), ('B', 3), ('C', 3), ('C', 4)]\n", 393 | "G.add_edges_from(edges)\n", 394 | "\n", 395 | "# Step 5: Visualize the bipartite graph\n", 396 | "pos = nx.bipartite_layout(G, U)\n", 397 | "\n", 398 | "# Draw nodes for each set\n", 399 | "nx.draw_networkx_nodes(G, pos, nodelist=U, node_color='lightblue', node_size=500)\n", 400 | "nx.draw_networkx_nodes(G, pos, nodelist=V, node_color='lightgreen', node_size=500)\n", 401 | "nx.draw_networkx_edges(G, pos)\n", 402 | "nx.draw_networkx_labels(G, pos)\n", 403 | "\n", 404 | "# Create custom legend handles\n", 405 | "legend_elements = [\n", 406 | " Line2D([0], [0], marker='o', color='w', label='Set U',\n", 407 | " markerfacecolor='lightblue', markersize=15),\n", 408 | " Line2D([0], [0], marker='o', color='w', label='Set V',\n", 409 | " markerfacecolor='lightgreen', markersize=15)\n", 410 | "]\n", 411 | "\n", 412 | "# Add the legend to the plot\n", 413 | "plt.legend(handles=legend_elements)\n", 414 | "plt.title(\"Bipartite Graph\")\n", 415 | "plt.axis('off')\n", 416 | "# save the figure\n", 417 | "plt.savefig('grafo_bipartite.png', transparent=True)\n", 418 | "plt.show()" 419 | ], 420 | "metadata": { 421 | "id": "6pWS1LTike3n" 422 | }, 423 | "execution_count": null, 424 | "outputs": [] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "source": [ 429 | "is_bipartite = nx.is_bipartite(G)\n", 430 | "print(f\"Is the graph bipartite? {is_bipartite}\")" 431 | ], 432 | "metadata": { 433 | "id": "O-pRQW8MiWU2" 434 | }, 435 | "execution_count": null, 436 | "outputs": [] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "source": [ 441 | "from networkx.algorithms import bipartite\n", 442 | "\n", 443 | "projected_U = bipartite.projected_graph(G, U)\n", 444 | "nx.draw(projected_U, with_labels=True, node_color='lightblue',node_size=500)\n", 445 | "plt.title(\"Projection onto Set U\")\n", 446 | "\n", 447 | "# save the figure\n", 448 | "plt.savefig('projected.png', transparent=True)\n", 449 | "plt.show()" 450 | ], 451 | "metadata": { 452 | "id": "kHQMzYYGivEm" 453 | }, 454 | "execution_count": null, 455 | "outputs": [] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "source": [ 460 | "projected_V = bipartite.projected_graph(B, V)\n", 461 | "nx.draw(projected_V, with_labels=True, node_color='lightgreen', node_size=500)\n", 462 | "plt.title(\"Projection onto Set V\")\n", 463 | "\n", 464 | "plt.show()" 465 | ], 466 | "metadata": { 467 | "id": "XCwVD10yi47B" 468 | }, 469 | "execution_count": null, 470 | "outputs": [] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "source": [ 475 | "import networkx as nx\n", 476 | "import matplotlib.pyplot as plt\n", 477 | "\n", 478 | "# Create a MultiGraph to handle multiple edges\n", 479 | "G = nx.MultiGraph()\n", 480 | "\n", 481 | "# List of nodes\n", 482 | "nodes = ['Alice', 'Bob', 'Charlie', 'Diana']\n", 483 | "\n", 484 | "# Add nodes to the graph\n", 485 | "G.add_nodes_from(nodes)\n", 486 | "\n", 487 | "# Edges in Layer 1 (Friendships)\n", 488 | "friendships = [\n", 489 | " ('Alice', 'Bob'),\n", 490 | " ('Alice', 'Charlie'),\n", 491 | " ('Bob', 'Charlie')\n", 492 | "]\n", 493 | "\n", 494 | "# Edges in Layer 2 (Professional Relationships)\n", 495 | "professional_relationships = [\n", 496 | " ('Alice', 'Diana'),\n", 497 | " ('Bob', 'Diana'),\n", 498 | " ('Charlie', 'Diana')\n", 499 | "]\n", 500 | "\n", 501 | "# Add edges with layer attribute\n", 502 | "#G.add_edges_from(friendships, layer='friendship')\n", 503 | "#G.add_edges_from(professional_relationships, layer='professional')\n", 504 | "\n", 505 | "# Adding edges with more attributes\n", 506 | "G.add_edge('Alice', 'Bob', layer='friendship', weight=1)\n", 507 | "G.add_edge('Alice', 'Charlie', layer='friendship', weight=1)\n", 508 | "G.add_edge('Bob', 'Charlie', layer='friendship', weight=1)\n", 509 | "\n", 510 | "G.add_edge('Alice', 'Diana', layer='professional', weight=2)\n", 511 | "G.add_edge('Bob', 'Diana', layer='professional', weight=2)\n", 512 | "G.add_edge('Charlie', 'Diana', layer='professional', weight=2)\n", 513 | "\n", 514 | "# Positions for nodes\n", 515 | "pos = nx.spring_layout(G, seed=42) # Fixed seed for reproducibility\n", 516 | "\n", 517 | "# Separate edges by layer\n", 518 | "edges_friendship = [(u, v) for u, v, d in G.edges(data=True) if d['layer'] == 'friendship']\n", 519 | "edges_professional = [(u, v) for u, v, d in G.edges(data=True) if d['layer'] == 'professional']\n", 520 | "\n", 521 | "# Draw nodes\n", 522 | "nx.draw_networkx_nodes(G, pos, node_size=500, node_color='lightblue')\n", 523 | "\n", 524 | "# Draw edges for friendship layer\n", 525 | "nx.draw_networkx_edges(G, pos, edgelist=edges_friendship, edge_color='blue', label='Friendship')\n", 526 | "\n", 527 | "# Draw edges for professional layer\n", 528 | "nx.draw_networkx_edges(G, pos, edgelist=edges_professional, edge_color='green', style='dashed', label='Professional')\n", 529 | "\n", 530 | "# Draw labels\n", 531 | "nx.draw_networkx_labels(G, pos)\n", 532 | "\n", 533 | "# Create custom legend\n", 534 | "from matplotlib.lines import Line2D\n", 535 | "\n", 536 | "legend_elements = [\n", 537 | " Line2D([0], [0], color='blue', lw=2, label='Friendship'),\n", 538 | " Line2D([0], [0], color='green', lw=2, linestyle='dashed', label='Professional')\n", 539 | "]\n", 540 | "\n", 541 | "plt.legend(handles=legend_elements)\n", 542 | "plt.title('Multilayer Graph')\n", 543 | "plt.axis('off')\n", 544 | "# save the figure\n", 545 | "plt.savefig('multigraph.png', transparent=True)\n", 546 | "plt.show()\n" 547 | ], 548 | "metadata": { 549 | "id": "OnDfM9RSi-KY" 550 | }, 551 | "execution_count": null, 552 | "outputs": [] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "source": [ 557 | "G.edges(data=True)" 558 | ], 559 | "metadata": { 560 | "id": "Z7a7JO7VrIKe" 561 | }, 562 | "execution_count": null, 563 | "outputs": [] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "source": [ 568 | "# Part 2" 569 | ], 570 | "metadata": { 571 | "id": "t9MALrKf2yiS" 572 | } 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "source": [ 577 | "## Density" 578 | ], 579 | "metadata": { 580 | "id": "ojigl1Kz9BLU" 581 | } 582 | }, 583 | { 584 | "cell_type": "code", 585 | "source": [ 586 | "import networkx as nx\n", 587 | "import gzip\n", 588 | "import urllib.request\n", 589 | "\n", 590 | "# URL of the dataset (compressed file of Enron email network)\n", 591 | "url = \"https://snap.stanford.edu/data/email-Enron.txt.gz\"\n", 592 | "filename = \"email-Enron.txt.gz\"\n", 593 | "\n", 594 | "# Download the file from the URL\n", 595 | "urllib.request.urlretrieve(url, filename)" 596 | ], 597 | "metadata": { 598 | "id": "sOE-b1Bs25tO" 599 | }, 600 | "execution_count": null, 601 | "outputs": [] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "source": [ 606 | "# Create an undirected graph\n", 607 | "G = nx.Graph()\n", 608 | "\n", 609 | "# Open and read the compressed file line by line\n", 610 | "with gzip.open(filename, 'rt') as f:\n", 611 | " for line in f:\n", 612 | " if line.startswith(\"#\"):\n", 613 | " continue # Skip comment lines\n", 614 | " src, dst = map(int, line.strip().split()) # Extract source and destination node IDs\n", 615 | " G.add_edge(src, dst) # Add an undirected edge between the nodes" 616 | ], 617 | "metadata": { 618 | "id": "sOsxZGk23GII" 619 | }, 620 | "execution_count": null, 621 | "outputs": [] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "source": [ 626 | "# Calculate number of nodes (vertices)\n", 627 | "num_nodes = G.number_of_nodes()\n", 628 | "\n", 629 | "# Calculate number of edges (links)\n", 630 | "num_edges = G.number_of_edges()\n", 631 | "\n", 632 | "# Calculate network density\n", 633 | "density = nx.density(G)\n", 634 | "\n", 635 | "manual_density = 2 * num_edges / (num_nodes * (num_nodes - 1))\n", 636 | "\n", 637 | "# Print basic network statistics\n", 638 | "print(f\"Number of nodes: {num_nodes}\")\n", 639 | "print(f\"Number of edges: {num_edges}\")\n", 640 | "print(f\"Network density: {density:.6f}\")\n", 641 | "print(f\"Network density calc: {manual_density:.6f}\")" 642 | ], 643 | "metadata": { 644 | "id": "-wIsA0CC3aln" 645 | }, 646 | "execution_count": null, 647 | "outputs": [] 648 | }, 649 | { 650 | "cell_type": "markdown", 651 | "source": [ 652 | "## Subgraph" 653 | ], 654 | "metadata": { 655 | "id": "L7ecbHr79DUi" 656 | } 657 | }, 658 | { 659 | "cell_type": "code", 660 | "source": [ 661 | "import networkx as nx\n", 662 | "import gzip\n", 663 | "import urllib.request\n", 664 | "import random\n", 665 | "\n", 666 | "# Step 1: Download the citation network dataset\n", 667 | "url = \"https://snap.stanford.edu/data/cit-Patents.txt.gz\"\n", 668 | "filename = \"cit-Patents.txt.gz\"\n", 669 | "urllib.request.urlretrieve(url, filename)" 670 | ], 671 | "metadata": { 672 | "id": "kYr5Humm3oYT" 673 | }, 674 | "execution_count": null, 675 | "outputs": [] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "source": [ 680 | "# Step 2: Create a directed graph and load data\n", 681 | "G = nx.DiGraph()\n", 682 | "with gzip.open(filename, 'rt') as f:\n", 683 | " for line in f:\n", 684 | " if line.startswith(\"#\"):\n", 685 | " continue # Skip comment lines\n", 686 | " src, dst = map(int, line.strip().split())\n", 687 | " G.add_edge(src, dst)" 688 | ], 689 | "metadata": { 690 | "id": "1DrgheoP9Ezy" 691 | }, 692 | "execution_count": null, 693 | "outputs": [] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "source": [ 698 | "print(f\"Full graph: {G.number_of_nodes()} nodes,{G.number_of_edges()} edges, {nx.density(G)}\")" 699 | ], 700 | "metadata": { 701 | "id": "9uatcgII-np0" 702 | }, 703 | "execution_count": null, 704 | "outputs": [] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "source": [ 709 | "# Step 3: Randomly sample 10% of the nodes\n", 710 | "num_sample = int(0.10 * G.number_of_nodes())\n", 711 | "sample_nodes = random.sample(list(G.nodes()), num_sample)\n", 712 | "\n", 713 | "# Step 4: Create the subgraph induced by the sampled nodes\n", 714 | "subG = G.subgraph(sample_nodes)" 715 | ], 716 | "metadata": { 717 | "id": "JgHWu3YB9Ivy" 718 | }, 719 | "execution_count": null, 720 | "outputs": [] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "source": [ 725 | "# Step 5: Compute and print basic metrics\n", 726 | "print(f\"\\nSampled subgraph (10% of nodes):\")\n", 727 | "print(f\"Nodes: {subG.number_of_nodes()}\")\n", 728 | "print(f\"Edges: {subG.number_of_edges()}\")\n", 729 | "print(f\"Density: {nx.density(subG):.6f}\")" 730 | ], 731 | "metadata": { 732 | "id": "s55Htcam-rJ2" 733 | }, 734 | "execution_count": null, 735 | "outputs": [] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "source": [ 740 | "## Degree" 741 | ], 742 | "metadata": { 743 | "id": "cci-WJHpn1fC" 744 | } 745 | }, 746 | { 747 | "cell_type": "code", 748 | "source": [ 749 | "import networkx as nx\n", 750 | "import urllib.request\n", 751 | "import zipfile\n", 752 | "import os\n", 753 | "import matplotlib.pyplot as plt\n", 754 | "import pandas as pd\n", 755 | "import seaborn as sns\n", 756 | "from collections import Counter\n", 757 | "\n", 758 | "\n", 759 | "# Step 1: Download and unzip the dataset\n", 760 | "url = \"https://snap.stanford.edu/data/twitch_gamers.zip\"\n", 761 | "zip_filename = \"twitch_gamers.zip\"\n", 762 | "urllib.request.urlretrieve(url, zip_filename)" 763 | ], 764 | "metadata": { 765 | "id": "jT6BSqLYn2Z-" 766 | }, 767 | "execution_count": null, 768 | "outputs": [] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "source": [ 773 | "# Step 2: Extract the ZIP file\n", 774 | "with zipfile.ZipFile(zip_filename, 'r') as zip_ref:\n", 775 | " zip_ref.extractall(\"twitch_data\")\n", 776 | "\n", 777 | "# Step 3: Load the edge list from CSV using pandas\n", 778 | "edge_file = \"twitch_data/large_twitch_edges.csv\"\n", 779 | "df = pd.read_csv(edge_file)\n", 780 | "\n", 781 | "# Step 4: Create an undirected graph from the edge list\n", 782 | "G = nx.from_pandas_edgelist(df, source='numeric_id_1',\n", 783 | " target='numeric_id_2')\n", 784 | "\n", 785 | "# Step 5: Print basic info\n", 786 | "print(f\"Number of nodes: {G.number_of_nodes()}\")\n", 787 | "print(f\"Number of edges: {G.number_of_edges()}\")" 788 | ], 789 | "metadata": { 790 | "id": "gr9vmSKTpM1s" 791 | }, 792 | "execution_count": null, 793 | "outputs": [] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "source": [ 798 | "# Step 4: Compute degrees\n", 799 | "degrees = [deg for node, deg in G.degree()]\n", 800 | "\n", 801 | "# Step 5: Show degree statistics\n", 802 | "print(f\"Max degree: {max(degrees)}\")\n", 803 | "print(f\"Average degree: {sum(degrees)/len(degrees):.2f}\")\n", 804 | "print(f\"Average degree alternative: {nx.density(G)*(G.number_of_nodes()-1):.2f}\")" 805 | ], 806 | "metadata": { 807 | "id": "GYUrbf-Cpdkp" 808 | }, 809 | "execution_count": null, 810 | "outputs": [] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "source": [ 815 | "# Set a clean, minimalist style\n", 816 | "plt.figure(figsize=(10, 6))\n", 817 | "sns.set_style(\"whitegrid\")\n", 818 | "\n", 819 | "# Plot histogram with soft color and clear edges\n", 820 | "plt.hist(degrees, bins=50, color=\"#4C72B0\", edgecolor='white', alpha=0.85)\n", 821 | "\n", 822 | "# Titles with hierarchy and clarity\n", 823 | "plt.title(\"Node Degree Distribution in Twitch Gamer Network\", fontsize=16, weight='bold')\n", 824 | "plt.xlabel(\"Degree\", fontsize=14)\n", 825 | "plt.ylabel(\"Number of Nodes\", fontsize=14)\n", 826 | "\n", 827 | "# Reduce ticks clutter and highlight major ones\n", 828 | "plt.xticks(fontsize=12)\n", 829 | "plt.yticks(fontsize=12)\n", 830 | "\n", 831 | "# Remove top/right borders to reduce distraction\n", 832 | "sns.despine()\n", 833 | "\n", 834 | "# Add grid only on y-axis to improve readability\n", 835 | "plt.grid(axis='y', linestyle='--', linewidth=0.5, alpha=0.7)\n", 836 | "\n", 837 | "plt.tight_layout()\n", 838 | "plt.show()" 839 | ], 840 | "metadata": { 841 | "id": "vblpYxqrsaOj" 842 | }, 843 | "execution_count": null, 844 | "outputs": [] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "source": [ 849 | "# Count the frequency of each degree\n", 850 | "degree_count = Counter(degrees)\n", 851 | "\n", 852 | "# Sort by degree (optional, for readability)\n", 853 | "sorted_degree_count = dict(sorted(degree_count.items()))\n", 854 | "\n", 855 | "# Print the histogram values\n", 856 | "print(\"Degree\\tNumber of Nodes\")\n", 857 | "for degree, count in sorted_degree_count.items():\n", 858 | " print(f\"{degree}\\t{count}\")" 859 | ], 860 | "metadata": { 861 | "id": "v0nMlWDnsg1l" 862 | }, 863 | "execution_count": null, 864 | "outputs": [] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "source": [ 869 | "import seaborn as sns\n", 870 | "import matplotlib.pyplot as plt\n", 871 | "\n", 872 | "# Filter degrees up to 200\n", 873 | "filtered_degrees = [d for d in degrees if d <= 200]\n", 874 | "\n", 875 | "# Plot the histogram\n", 876 | "plt.figure(figsize=(10, 6))\n", 877 | "sns.set_style(\"whitegrid\")\n", 878 | "\n", 879 | "plt.hist(filtered_degrees, bins=50, color=\"#4C72B0\", edgecolor='white', alpha=0.85)\n", 880 | "\n", 881 | "plt.title(\"Degree Distribution (Nodes with Degree ≤ 200)\", fontsize=16, weight='bold')\n", 882 | "plt.xlabel(\"Degree\", fontsize=14)\n", 883 | "plt.ylabel(\"Number of Nodes\", fontsize=14)\n", 884 | "\n", 885 | "plt.xticks(fontsize=12)\n", 886 | "plt.yticks(fontsize=12)\n", 887 | "sns.despine()\n", 888 | "plt.grid(axis='y', linestyle='--', linewidth=0.5, alpha=0.7)\n", 889 | "plt.tight_layout()\n", 890 | "\n", 891 | "# Save figure with transparent background and high resolution\n", 892 | "plt.savefig(\"degree_histogram.png\", dpi=300, transparent=True, bbox_inches='tight')\n", 893 | "\n", 894 | "# Optional: Show plot after saving\n", 895 | "plt.show()" 896 | ], 897 | "metadata": { 898 | "id": "Xgo_-JgNtL-a" 899 | }, 900 | "execution_count": null, 901 | "outputs": [] 902 | }, 903 | { 904 | "cell_type": "code", 905 | "source": [], 906 | "metadata": { 907 | "id": "3wgL3Tnot36r" 908 | }, 909 | "execution_count": null, 910 | "outputs": [] 911 | } 912 | ] 913 | } -------------------------------------------------------------------------------- /lessons/week02/lesson03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week02/lesson03.pdf -------------------------------------------------------------------------------- /lessons/week04/Assortativity.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week04/Assortativity.pdf -------------------------------------------------------------------------------- /lessons/week06/Walks_Paths_and_Distances.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"id":"--SEHn9LcaUZ"},"source":["import numpy as np\n","import networkx as nx\n","import matplotlib.pyplot as plt"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"rtH5MLCi2bJ3"},"source":["# an adjacency matrix\n","A = np.array([[0,1,1,1,0,0,0],\n"," [1,0,1,0,1,1,0],\n"," [1,1,0,1,0,1,0],\n"," [1,0,1,0,0,1,0],\n"," [0,1,0,0,0,1,0],\n"," [0,1,1,1,1,0,1],\n"," [0,0,0,0,0,1,0]])"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"wAkBpy3Zgd2i"},"source":["# A2 = A * A\n","A2 = np.matmul(A,A)\n","A2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ZgyQmQabdfEg"},"source":["# create a undirected graph\n","G = nx.Graph()\n","\n","# create a branch of edges\n","G.add_edges_from([(1,2),(2,3),(2,4),(3,5),(4,5)])\n","\n","# draw the graph\n","fig, ax = plt.subplots(1, 1,figsize=(10,8))\n","nx.draw_networkx(G, ax=ax,node_size=1000)\n","plt.axis(\"off\")\n","plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"JreXietd2gb6"},"source":["# G is a tree?\n","nx.is_tree(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"f_uuD3l56I4D"},"source":["# Return all cycles of G if exist ones\n","nx.cycle_basis(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"9EcH0inw6k2R"},"source":["# create a tree (graph without cycles)\n","G = nx.Graph()\n","\n","# create a branch of edges\n","G.add_edges_from([(1,2),(2,3),(2,4),(2,5),(2,7),(5,6),(7,8)])\n","\n","# draw the graph\n","fig, ax = plt.subplots(1, 1,figsize=(10,8))\n","nx.draw_networkx(G, ax=ax,node_size=1000)\n","plt.axis(\"off\")\n","plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"e-7fpjwL-fZH"},"source":["# G is a tree?\n","nx.is_tree(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"ylDdo2Ac-nGK"},"source":["# Return all cycles of G if exist ones\n","nx.cycle_basis(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"-QPeOEk9-o8o"},"source":["# create a undirected graph\n","G = nx.Graph()\n","\n","# create a branch of edges\n","G.add_edges_from([(\"a\",\"e\"),\n"," (\"e\",\"d\"),(\"e\",\"f\"),\n"," (\"d\",\"f\"),\n"," (\"f\",\"h\"),(\"f\",\"g\"),\n"," (\"h\",\"i\"),(\"h\",\"b\")])\n","\n","# add a disconnected node\n","G.add_node(\"c\")\n","\n","# draw the graph\n","fig, ax = plt.subplots(1, 1,figsize=(10,8))\n","nx.draw_networkx(G, ax=ax,node_size=1000)\n","plt.axis(\"off\")\n","plt.show()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"PElb47CWGaUz"},"source":["nx.has_path(G,\"a\",\"c\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"AsBuIJPZKFNN"},"source":["nx.has_path(G,\"a\",\"b\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"_6JG1dW4KH5n"},"source":["nx.shortest_path(G,\"a\",\"b\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"SnTA-X1eKMjz"},"source":["nx.shortest_path(G,\"a\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"T6O2cHmRKS0D"},"source":["nx.shortest_path_length(G,\"a\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"DYeEfHy0KaKl"},"source":["nx.shortest_path(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"MzOxgEoaKelC"},"source":["nx.average_shortest_path_length(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"2hHoLsBjKodp"},"source":["G.remove_node(\"c\")\n","nx.average_shortest_path_length(G)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"AzuVKZBs-zpo"},"source":[],"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /lessons/week06/Week_06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week06/Week_06.pdf -------------------------------------------------------------------------------- /lessons/week06/dijsktra.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Dijkstra's Algorithm - without paths\n", 21 | "\n", 22 | "You're given an integer `start` and a list `edges` of pairs of integers.\n", 23 | "\n", 24 | "The list is what's called an adjacency list, and it represents a graph. The number of vertices in the graph is equal to the length of `edges`, where each index `i` in `edges` contains vertex `i`'s outbound edges, in no particular order. Each individual edge is represented by a pair of two numbers, `[destination, distance]`, where the destination is a positive integer denoting the destination vertex and the distance is a positive integer representing the length of the edge (the distance from vertex `i` to vertex `destination`). Note that these edges are directed, meaning that you can only travel from a particular vertex to its destination—not the other way around (unless the destination vertex itself has an outbound edge to the original vertex).\n", 25 | "\n", 26 | "Write a function that computes the lengths of the shortest paths between `start` and all of the other vertices in the graph using Dijkstra's algorithm and returns them in an array. Each index `i` in the output array should represent the length of the shortest path between `start` and vertex `i`. If no path is found from `start` to vertex `i`, then `output[i]` should be `-1`.\n", 27 | "\n", 28 | "Note that the graph represented by `edges` won't contain any self-loops (vertices that have an outbound edge to themselves) and will only have positively weighted edges (i.e., no negative distances).\n", 29 | "\n" 30 | ], 31 | "metadata": { 32 | "id": "mY4jasRMsau5" 33 | } 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "source": [ 38 | "**Sample Input**" 39 | ], 40 | "metadata": { 41 | "id": "npq7VCLuslOh" 42 | } 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "start = 0\n", 48 | "edges = [\n", 49 | " [[1, 7]],\n", 50 | " [[2, 6], [3, 20], [4, 3]],\n", 51 | " [[3, 14]],\n", 52 | " [[4, 2]],\n", 53 | " [],\n", 54 | " [],\n", 55 | "]" 56 | ], 57 | "metadata": { 58 | "id": "kWHKqd4Jsd4E" 59 | }, 60 | "execution_count": null, 61 | "outputs": [] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "source": [ 66 | "**Sample Output**" 67 | ], 68 | "metadata": { 69 | "id": "VXm2gf_Nso6t" 70 | } 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "[0, 7, 13, 27, 10, -1]" 76 | ], 77 | "metadata": { 78 | "id": "MCawXcPustON" 79 | }, 80 | "execution_count": null, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "## Hints\n", 87 | "\n", 88 | "Hint 1\n", 89 | "\n", 90 | "Dijkstra's algorithm works by visiting vertices in the graph, one by one, all the while keeping track of the current shortest distances from the start vertex to all other vertices and continuously updating these shortest distances. More specifically, the algorithm keeps track of unvisited vertices and visits the unvisited vertex with the shortest distance at any point in time, naturally starting with the start vertex. Whenever the algorithm visits an unvisited vertex, it looks at all of its outbound edges and tries to update the shortest distances from the start to the destinations in the edges, using the current shortest distance to the current vertex as a base. Once the algorithm has visited all of the vertices and considered all of their edges, it is guaranteed to have found the shortest path to each vertex. How can you implement this algorithm?\n", 91 | "\n", 92 | "Hint 2\n", 93 | "\n", 94 | "The most challenging part of Dijkstra's algorithm is determining how to efficiently find the vertex with the current shortest distance. Can you think of a data structure that could be used to keep track of the distances and to efficiently retrieve the vertex with the current shortest distance at each step?\n", 95 | "\n", 96 | "Hint 3\n", 97 | "\n", 98 | "Create an array that can store the final shortest distances between the start vertex and all other vertices, as well as a min-heap that will hold all of the unvisited vertices and their current shortest distances. For both the final distances array and the min-heap, initialize all vertices except for the start node as having a distance of infinity; the start node will have a distance 0. Next, write a while loop that will run until the min-heap is empty. At every iteration in the loop, remove the vertex from the top of the heap (the vertex with the shortest distance), loop through all of its edges, and for each edge, update the shortest distance of the destination vertex to be the minimum of the destination's current shortest distance and the currently visited vertex's distance plus the current edge's weight. Once the heap is empty, all of the vertices will have been visited, and you'll have the shortest distances to all vertices stored in your distances array." 99 | ], 100 | "metadata": { 101 | "id": "LoYAnGevsz2r" 102 | } 103 | }, 104 | { 105 | "cell_type": "code", 106 | "source": [ 107 | "# O(V^2 + E) time | O(V) space - where V is the number of vertices and E is the number of edges in the input graph\n", 108 | "def dijkstrasAlgorithm(start, edges):\n", 109 | " \"\"\"\n", 110 | " Implements Dijkstra's algorithm to find the shortest path from a starting node to all other nodes in a graph.\n", 111 | "\n", 112 | " Args:\n", 113 | " start (int): The starting node index.\n", 114 | " edges (list of list): Adjacency list representing the graph. Each index corresponds to a vertex,\n", 115 | " and each entry is a list of pairs [destination, weight].\n", 116 | "\n", 117 | " Returns:\n", 118 | " list: A list of the shortest distances from the starting node to each node. If a node is not reachable,\n", 119 | " the distance is -1.\n", 120 | " \"\"\"\n", 121 | " numberOfVertices = len(edges)\n", 122 | "\n", 123 | " # Initialize the minimum distances for all vertices as infinity\n", 124 | " # except the starting vertex which is set to 0.\n", 125 | " minDistances = [float(\"inf\") for _ in range(numberOfVertices)]\n", 126 | " minDistances[start] = 0\n", 127 | "\n", 128 | " # Keep track of visited nodes to avoid reprocessing them.\n", 129 | " visited = set()\n", 130 | "\n", 131 | " # Continue processing nodes until all have been visited.\n", 132 | " while len(visited) != numberOfVertices:\n", 133 | " # Find the vertex with the smallest known distance that has not been visited.\n", 134 | " vertex, currentMinDistance = getVertexWithMinDistance(minDistances, visited)\n", 135 | "\n", 136 | " # If the smallest distance is infinity, all remaining vertices are unreachable.\n", 137 | " if currentMinDistance == float(\"inf\"):\n", 138 | " break\n", 139 | "\n", 140 | " # Mark the current vertex as visited.\n", 141 | " visited.add(vertex)\n", 142 | "\n", 143 | " # Iterate through all the neighbors of the current vertex.\n", 144 | " for edge in edges[vertex]:\n", 145 | " destination, distanceToDestination = edge\n", 146 | "\n", 147 | " # Skip the neighbor if it has already been visited.\n", 148 | " if destination in visited:\n", 149 | " continue\n", 150 | "\n", 151 | " # Calculate the new potential path distance to the neighbor.\n", 152 | " newPathDistance = currentMinDistance + distanceToDestination\n", 153 | " currentDestinationDistance = minDistances[destination]\n", 154 | "\n", 155 | " # Update the shortest distance to the neighbor if the new path is shorter.\n", 156 | " if newPathDistance < currentDestinationDistance:\n", 157 | " minDistances[destination] = newPathDistance\n", 158 | "\n", 159 | " # Replace any remaining infinity distances with -1 to indicate unreachable nodes.\n", 160 | " return list(map(lambda x: -1 if x == float(\"inf\") else x, minDistances))\n", 161 | "\n", 162 | "\n", 163 | "def getVertexWithMinDistance(distances, visited):\n", 164 | " \"\"\"\n", 165 | " Helper function to find the vertex with the smallest known distance that has not been visited.\n", 166 | "\n", 167 | " Args:\n", 168 | " distances (list): A list of the shortest known distances to each vertex.\n", 169 | " visited (set): A set of vertices that have already been visited.\n", 170 | "\n", 171 | " Returns:\n", 172 | " tuple: The index of the vertex with the smallest distance and its distance value.\n", 173 | " \"\"\"\n", 174 | " currentMinDistance = float(\"inf\")\n", 175 | " vertex = -1\n", 176 | "\n", 177 | " # Iterate over all vertices to find the one with the smallest distance.\n", 178 | " for vertexIdx, distance in enumerate(distances):\n", 179 | " # Skip the vertex if it has already been visited.\n", 180 | " if vertexIdx in visited:\n", 181 | " continue\n", 182 | "\n", 183 | " # Update the current minimum distance and vertex if a smaller distance is found.\n", 184 | " if distance <= currentMinDistance:\n", 185 | " vertex = vertexIdx\n", 186 | " currentMinDistance = distance\n", 187 | "\n", 188 | " return vertex, currentMinDistance" 189 | ], 190 | "metadata": { 191 | "id": "g-jMjCkDs0Kx" 192 | }, 193 | "execution_count": null, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "def test_dijkstras_algorithm():\n", 200 | " # Input graph\n", 201 | " edges = [\n", 202 | " [[1, 7]], # Node 0 -> Node 1 (weight 7)\n", 203 | " [[2, 6], [3, 20], [4, 3]], # Node 1 -> Nodes 2 (weight 6), 3 (weight 20), 4 (weight 3)\n", 204 | " [[3, 14]], # Node 2 -> Node 3 (weight 14)\n", 205 | " [[4, 2]], # Node 3 -> Node 4 (weight 2)\n", 206 | " [], # Node 4 has no outgoing edges\n", 207 | " [] # Node 5 has no outgoing edges\n", 208 | " ]\n", 209 | " start = 0\n", 210 | "\n", 211 | " # Expected output\n", 212 | " expected_output = [0, 7, 13, 27, 10, -1]\n", 213 | "\n", 214 | " # Run Dijkstra's algorithm\n", 215 | " result = dijkstrasAlgorithm(start, edges)\n", 216 | "\n", 217 | " # Test the result\n", 218 | " assert result == expected_output, f\"Test failed: expected {expected_output}, but got {result}\"\n", 219 | "\n", 220 | " print(\"Test passed: Output matches expected result.\")\n", 221 | "\n", 222 | "# Run the test\n", 223 | "test_dijkstras_algorithm()" 224 | ], 225 | "metadata": { 226 | "id": "MQUBh2xfxcQt" 227 | }, 228 | "execution_count": null, 229 | "outputs": [] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "source": [ 234 | "# Dijstrak's Algorithm with Path" 235 | ], 236 | "metadata": { 237 | "id": "Bt4vSnf8yKpA" 238 | } 239 | }, 240 | { 241 | "cell_type": "code", 242 | "source": [ 243 | "def dijkstrasAlgorithmWithPaths(start, edges):\n", 244 | " \"\"\"\n", 245 | " Implements Dijkstra's algorithm to find the shortest paths from a starting node to all other nodes in a graph.\n", 246 | " Additionally, it tracks the path to each node using a predecessor list.\n", 247 | "\n", 248 | " Args:\n", 249 | " start (int): The starting node index.\n", 250 | " edges (list of list): Adjacency list representing the graph. Each index corresponds to a vertex,\n", 251 | " and each entry is a list of pairs [destination, weight].\n", 252 | "\n", 253 | " Returns:\n", 254 | " tuple: A tuple containing:\n", 255 | " - minDistances (list): A list of the shortest distances from the starting node to each node.\n", 256 | " If a node is unreachable, its distance is `inf`.\n", 257 | " - previousNodes (list): A list where each index points to the predecessor of the node\n", 258 | " in the shortest path. `None` if no path exists.\n", 259 | " \"\"\"\n", 260 | " numberOfVertices = len(edges)\n", 261 | "\n", 262 | " # Initialize the minimum distances with infinity, except for the starting node (distance 0).\n", 263 | " minDistances = [float(\"inf\") for _ in range(numberOfVertices)]\n", 264 | " minDistances[start] = 0\n", 265 | "\n", 266 | " # Set of visited nodes to avoid re-processing.\n", 267 | " visited = set()\n", 268 | "\n", 269 | " # Predecessor list to track the path to each node.\n", 270 | " previousNodes = [None] * numberOfVertices\n", 271 | "\n", 272 | " # Iterate until all nodes are processed or no more reachable nodes exist.\n", 273 | " while len(visited) != numberOfVertices:\n", 274 | " # Find the unvisited node with the smallest known distance.\n", 275 | " vertex, currentMinDistance = getVertexWithMinDistance(minDistances, visited)\n", 276 | "\n", 277 | " # If the smallest distance is infinity, remaining nodes are unreachable.\n", 278 | " if currentMinDistance == float(\"inf\"):\n", 279 | " break\n", 280 | "\n", 281 | " # Mark the current node as visited.\n", 282 | " visited.add(vertex)\n", 283 | "\n", 284 | " # Update distances for all neighbors of the current node.\n", 285 | " for edge in edges[vertex]:\n", 286 | " destination, distanceToDestination = edge\n", 287 | "\n", 288 | " # Skip if the neighbor is already visited.\n", 289 | " if destination in visited:\n", 290 | " continue\n", 291 | "\n", 292 | " # Calculate the new potential path distance.\n", 293 | " newPathDistance = currentMinDistance + distanceToDestination\n", 294 | " currentDestinationDistance = minDistances[destination]\n", 295 | "\n", 296 | " # Update the shortest distance and the predecessor if the new path is shorter.\n", 297 | " if newPathDistance <= currentDestinationDistance:\n", 298 | " minDistances[destination] = newPathDistance\n", 299 | " previousNodes[destination] = vertex # Update predecessor\n", 300 | "\n", 301 | " return minDistances, previousNodes\n", 302 | "\n", 303 | "\n", 304 | "def getVertexWithMinDistance(distances, visited):\n", 305 | " \"\"\"\n", 306 | " Helper function to find the unvisited node with the smallest known distance.\n", 307 | "\n", 308 | " Args:\n", 309 | " distances (list): A list of the shortest known distances to each node.\n", 310 | " visited (set): A set of already visited nodes.\n", 311 | "\n", 312 | " Returns:\n", 313 | " tuple: The index of the node with the smallest distance and its distance value.\n", 314 | " \"\"\"\n", 315 | " currentMinDistance = float(\"inf\")\n", 316 | " vertex = -1\n", 317 | "\n", 318 | " # Iterate over all nodes to find the one with the smallest distance.\n", 319 | " for vertexIdx, distance in enumerate(distances):\n", 320 | " if vertexIdx in visited:\n", 321 | " continue\n", 322 | " if distance <= currentMinDistance:\n", 323 | " vertex = vertexIdx\n", 324 | " currentMinDistance = distance\n", 325 | "\n", 326 | " return vertex, currentMinDistance" 327 | ], 328 | "metadata": { 329 | "id": "Adzk8NFmVUXu" 330 | }, 331 | "execution_count": null, 332 | "outputs": [] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "source": [ 337 | "def reconstructPath(previousNodes, start, end):\n", 338 | " path = []\n", 339 | " currentNode = end\n", 340 | "\n", 341 | " while currentNode is not None: # Trace back to the start node\n", 342 | " path.append(currentNode)\n", 343 | " currentNode = previousNodes[currentNode]\n", 344 | "\n", 345 | " path.reverse() # Reverse the path to get it in the correct order\n", 346 | "\n", 347 | " # If the start node is not in the path, the destination is unreachable\n", 348 | " if path[0] != start:\n", 349 | " return []\n", 350 | "\n", 351 | " return path\n" 352 | ], 353 | "metadata": { 354 | "id": "f7zCuZd5QC9H" 355 | }, 356 | "execution_count": null, 357 | "outputs": [] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "source": [ 362 | "def test_dijkstrasAlgorithmWithPaths():\n", 363 | " # Input graph (adjacency list)\n", 364 | " edges = [\n", 365 | " [[1, 7]], # Node 0 -> Node 1 (weight 7)\n", 366 | " [[2, 6], [3, 20], [4, 3]], # Node 1 -> Nodes 2 (6), 3 (20), 4 (3)\n", 367 | " [[3, 14]], # Node 2 -> Node 3 (weight 14)\n", 368 | " [[4, 2]], # Node 3 -> Node 4 (weight 2)\n", 369 | " [], # Node 4 has no outgoing edges\n", 370 | " [] # Node 5 has no outgoing edges\n", 371 | " ]\n", 372 | " start = 0 # Starting node\n", 373 | "\n", 374 | " # Run Dijkstra's algorithm\n", 375 | " minDistances, previousNodes = dijkstrasAlgorithmWithPaths(start, edges)\n", 376 | "\n", 377 | " # Expected distances\n", 378 | " expectedDistances = [0, 7, 13, 27, 10, float(\"inf\")]\n", 379 | " assert minDistances == expectedDistances, f\"Distances test failed: {minDistances}\"\n", 380 | "\n", 381 | " # Reconstruct paths\n", 382 | " path_to_3 = reconstructPath(previousNodes, start, 3)\n", 383 | " expectedPathTo3 = [0, 1, 2, 3]\n", 384 | " assert path_to_3 == expectedPathTo3, f\"Path to 3 test failed: {path_to_3}\"\n", 385 | "\n", 386 | " path_to_4 = reconstructPath(previousNodes, start, 4)\n", 387 | " expectedPathTo4 = [0, 1, 4]\n", 388 | " assert path_to_4 == expectedPathTo4, f\"Path to 4 test failed: {path_to_4}\"\n", 389 | "\n", 390 | " path_to_5 = reconstructPath(previousNodes, start, 5)\n", 391 | " expectedPathTo5 = []\n", 392 | " assert path_to_5 == expectedPathTo5, f\"Path to 5 test failed: {path_to_5}\"\n", 393 | "\n", 394 | " print(\"All tests passed!\")\n", 395 | "\n", 396 | "# Run the test\n", 397 | "test_dijkstrasAlgorithmWithPaths()" 398 | ], 399 | "metadata": { 400 | "id": "5mMLPzmnVWHM" 401 | }, 402 | "execution_count": null, 403 | "outputs": [] 404 | } 405 | ] 406 | } -------------------------------------------------------------------------------- /lessons/week06/dijsktra.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week06/dijsktra.pdf -------------------------------------------------------------------------------- /lessons/week06/dijsktra_min_heap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Dijkstra's Algorithm - without paths\n", 21 | "\n", 22 | "You're given an integer `start` and a list `edges` of pairs of integers.\n", 23 | "\n", 24 | "The list is what's called an adjacency list, and it represents a graph. The number of vertices in the graph is equal to the length of `edges`, where each index `i` in `edges` contains vertex `i`'s outbound edges, in no particular order. Each individual edge is represented by a pair of two numbers, `[destination, distance]`, where the destination is a positive integer denoting the destination vertex and the distance is a positive integer representing the length of the edge (the distance from vertex `i` to vertex `destination`). Note that these edges are directed, meaning that you can only travel from a particular vertex to its destination—not the other way around (unless the destination vertex itself has an outbound edge to the original vertex).\n", 25 | "\n", 26 | "Write a function that computes the lengths of the shortest paths between `start` and all of the other vertices in the graph using Dijkstra's algorithm and returns them in an array. Each index `i` in the output array should represent the length of the shortest path between `start` and vertex `i`. If no path is found from `start` to vertex `i`, then `output[i]` should be `-1`.\n", 27 | "\n", 28 | "Note that the graph represented by `edges` won't contain any self-loops (vertices that have an outbound edge to themselves) and will only have positively weighted edges (i.e., no negative distances)." 29 | ], 30 | "metadata": { 31 | "id": "mY4jasRMsau5" 32 | } 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "source": [ 37 | "**Sample Input**" 38 | ], 39 | "metadata": { 40 | "id": "npq7VCLuslOh" 41 | } 42 | }, 43 | { 44 | "cell_type": "code", 45 | "source": [ 46 | "start = 0\n", 47 | "edges = [\n", 48 | " [[1, 7]],\n", 49 | " [[2, 6], [3, 20], [4, 3]],\n", 50 | " [[3, 14]],\n", 51 | " [[4, 2]],\n", 52 | " [],\n", 53 | " [],\n", 54 | "]" 55 | ], 56 | "metadata": { 57 | "id": "kWHKqd4Jsd4E" 58 | }, 59 | "execution_count": 1, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "source": [ 65 | "**Sample Output**" 66 | ], 67 | "metadata": { 68 | "id": "VXm2gf_Nso6t" 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "[0, 7, 13, 27, 10, -1]" 75 | ], 76 | "metadata": { 77 | "id": "MCawXcPustON", 78 | "colab": { 79 | "base_uri": "https://localhost:8080/" 80 | }, 81 | "outputId": "e384dc3e-e39c-44be-8d7b-8cff7a1a3b57" 82 | }, 83 | "execution_count": null, 84 | "outputs": [ 85 | { 86 | "output_type": "execute_result", 87 | "data": { 88 | "text/plain": [ 89 | "[0, 7, 13, 27, 10, -1]" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "execution_count": 2 94 | } 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "source": [ 100 | "## Hints\n", 101 | "\n", 102 | "Hint 1\n", 103 | "\n", 104 | "Dijkstra's algorithm works by visiting vertices in the graph, one by one, all the while keeping track of the current shortest distances from the start vertex to all other vertices and continuously updating these shortest distances. More specifically, the algorithm keeps track of unvisited vertices and visits the unvisited vertex with the shortest distance at any point in time, naturally starting with the start vertex. Whenever the algorithm visits an unvisited vertex, it looks at all of its outbound edges and tries to update the shortest distances from the start to the destinations in the edges, using the current shortest distance to the current vertex as a base. Once the algorithm has visited all of the vertices and considered all of their edges, it is guaranteed to have found the shortest path to each vertex. How can you implement this algorithm?\n", 105 | "\n", 106 | "Hint 2\n", 107 | "\n", 108 | "The most challenging part of Dijkstra's algorithm is determining how to efficiently find the vertex with the current shortest distance. Can you think of a data structure that could be used to keep track of the distances and to efficiently retrieve the vertex with the current shortest distance at each step?\n", 109 | "\n", 110 | "Hint 3\n", 111 | "\n", 112 | "Create an array that can store the final shortest distances between the start vertex and all other vertices, as well as a min-heap that will hold all of the unvisited vertices and their current shortest distances. For both the final distances array and the min-heap, initialize all vertices except for the start node as having a distance of infinity; the start node will have a distance 0. Next, write a while loop that will run until the min-heap is empty. At every iteration in the loop, remove the vertex from the top of the heap (the vertex with the shortest distance), loop through all of its edges, and for each edge, update the shortest distance of the destination vertex to be the minimum of the destination's current shortest distance and the currently visited vertex's distance plus the current edge's weight. Once the heap is empty, all of the vertices will have been visited, and you'll have the shortest distances to all vertices stored in your distances array." 113 | ], 114 | "metadata": { 115 | "id": "LoYAnGevsz2r" 116 | } 117 | }, 118 | { 119 | "cell_type": "code", 120 | "source": [ 121 | "class MinHeap:\n", 122 | " \"\"\"\n", 123 | " MinHeap class: Implements a MinHeap data structure to efficiently manage vertices and their distances\n", 124 | " for algorithms like Dijkstra. This implementation keeps track of the position of each vertex using\n", 125 | " a vertex map for constant-time lookups and updates.\n", 126 | " \"\"\"\n", 127 | " def __init__(self, array):\n", 128 | " \"\"\"\n", 129 | " Initializes the MinHeap with an input array of (vertex, distance) pairs.\n", 130 | "\n", 131 | " Args:\n", 132 | " array (list): List of tuples where each tuple is (vertex, distance).\n", 133 | " The distance is typically initialized to infinity except for the starting vertex.\n", 134 | "\n", 135 | " Attributes:\n", 136 | " vertexMap (dict): Maps each vertex to its position in the heap for quick access.\n", 137 | " heap (list): List representing the binary heap as an array.\n", 138 | " \"\"\"\n", 139 | " # Create a vertex map: Maps vertices to their indices in the heap.\n", 140 | " self.vertexMap = {idx: idx for idx in range(len(array))}\n", 141 | "\n", 142 | " # Build the heap from the input array to satisfy the heap property.\n", 143 | " self.heap = self.buildHeap(array)\n", 144 | "\n", 145 | " def isEmpty(self):\n", 146 | " \"\"\"\n", 147 | " Checks if the heap is empty.\n", 148 | "\n", 149 | " Returns:\n", 150 | " bool: True if the heap is empty, False otherwise.\n", 151 | " \"\"\"\n", 152 | " return len(self.heap) == 0\n", 153 | "\n", 154 | " def buildHeap(self, array):\n", 155 | " \"\"\"\n", 156 | " Builds the heap from an input array in O(n) time.\n", 157 | "\n", 158 | " Args:\n", 159 | " array (list): List of (vertex, distance) pairs.\n", 160 | "\n", 161 | " Returns:\n", 162 | " list: The input array transformed into a valid MinHeap.\n", 163 | " \"\"\"\n", 164 | " # Start from the first parent node and sift down each node.\n", 165 | " firstParentIdx = (len(array) - 2) // 2\n", 166 | " for currentIdx in reversed(range(firstParentIdx + 1)):\n", 167 | " self.siftDown(currentIdx, len(array) - 1, array)\n", 168 | " return array\n", 169 | "\n", 170 | " def siftDown(self, currentIdx, endIdx, heap):\n", 171 | " \"\"\"\n", 172 | " Restores the heap property by \"sifting down\" a node into its correct position.\n", 173 | "\n", 174 | " Args:\n", 175 | " currentIdx (int): Index of the node to sift down.\n", 176 | " endIdx (int): Last index in the heap.\n", 177 | " heap (list): The heap array.\n", 178 | "\n", 179 | " Complexity:\n", 180 | " Time: O(log(n))\n", 181 | " Space: O(1)\n", 182 | " \"\"\"\n", 183 | " childOneIdx = currentIdx * 2 + 1 # Index of the first child\n", 184 | " while childOneIdx <= endIdx:\n", 185 | " # Determine the index of the second child\n", 186 | " childTwoIdx = currentIdx * 2 + 2 if currentIdx * 2 + 2 <= endIdx else -1\n", 187 | "\n", 188 | " # Choose the smaller child to maintain the min-heap property\n", 189 | " if childTwoIdx != -1 and heap[childTwoIdx][1] < heap[childOneIdx][1]:\n", 190 | " idxToSwap = childTwoIdx\n", 191 | " else:\n", 192 | " idxToSwap = childOneIdx\n", 193 | "\n", 194 | " # Swap if the child is smaller than the current node\n", 195 | " if heap[idxToSwap][1] < heap[currentIdx][1]:\n", 196 | " self.swap(currentIdx, idxToSwap, heap)\n", 197 | " currentIdx = idxToSwap # Move to the swapped position\n", 198 | " childOneIdx = currentIdx * 2 + 1 # Update the first child index\n", 199 | " else:\n", 200 | " return\n", 201 | "\n", 202 | " def siftUp(self, currentIdx, heap):\n", 203 | " \"\"\"\n", 204 | " Restores the heap property by \"sifting up\" a node into its correct position.\n", 205 | "\n", 206 | " Args:\n", 207 | " currentIdx (int): Index of the node to sift up.\n", 208 | " heap (list): The heap array.\n", 209 | "\n", 210 | " Complexity:\n", 211 | " Time: O(log(n))\n", 212 | " Space: O(1)\n", 213 | " \"\"\"\n", 214 | " parentIdx = (currentIdx - 1) // 2 # Calculate parent index\n", 215 | " while currentIdx > 0 and heap[currentIdx][1] < heap[parentIdx][1]:\n", 216 | " self.swap(currentIdx, parentIdx, heap) # Swap with parent\n", 217 | " currentIdx = parentIdx # Move to the parent's position\n", 218 | " parentIdx = (currentIdx - 1) // 2\n", 219 | "\n", 220 | " def remove(self):\n", 221 | " \"\"\"\n", 222 | " Removes and returns the smallest element (root) in the heap.\n", 223 | "\n", 224 | " Returns:\n", 225 | " tuple: The (vertex, distance) pair with the smallest distance.\n", 226 | "\n", 227 | " Complexity:\n", 228 | " Time: O(log(n))\n", 229 | " Space: O(1)\n", 230 | " \"\"\"\n", 231 | " if self.isEmpty():\n", 232 | " return None\n", 233 | "\n", 234 | " # Swap the root with the last element and remove it\n", 235 | " self.swap(0, len(self.heap) - 1, self.heap)\n", 236 | " vertex, distance = self.heap.pop()\n", 237 | " self.vertexMap.pop(vertex) # Remove the vertex from the map\n", 238 | "\n", 239 | " # Restore the heap property\n", 240 | " self.siftDown(0, len(self.heap) - 1, self.heap)\n", 241 | " return vertex, distance\n", 242 | "\n", 243 | " def swap(self, i, j, heap):\n", 244 | " \"\"\"\n", 245 | " Swaps two nodes in the heap and updates their positions in the vertexMap.\n", 246 | "\n", 247 | " Args:\n", 248 | " i (int): Index of the first node.\n", 249 | " j (int): Index of the second node.\n", 250 | " heap (list): The heap array.\n", 251 | " \"\"\"\n", 252 | " self.vertexMap[heap[i][0]] = j # Update vertexMap for heap[i]\n", 253 | " self.vertexMap[heap[j][0]] = i # Update vertexMap for heap[j]\n", 254 | " heap[i], heap[j] = heap[j], heap[i] # Swap the nodes in the heap\n", 255 | "\n", 256 | " def update(self, vertex, value):\n", 257 | " \"\"\"\n", 258 | " Updates the distance of a given vertex and restores the heap property.\n", 259 | "\n", 260 | " Args:\n", 261 | " vertex (int): The vertex whose distance is to be updated.\n", 262 | " value (int): The new distance value.\n", 263 | "\n", 264 | " Complexity:\n", 265 | " Time: O(log(n))\n", 266 | " Space: O(1)\n", 267 | " \"\"\"\n", 268 | " # Update the heap with the new (vertex, value) pair\n", 269 | " self.heap[self.vertexMap[vertex]] = (vertex, value)\n", 270 | " # Restore the heap property by sifting up the updated node\n", 271 | " self.siftUp(self.vertexMap[vertex], self.heap)" 272 | ], 273 | "metadata": { 274 | "id": "vDjqHyFXN1zj" 275 | }, 276 | "execution_count": 2, 277 | "outputs": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "source": [ 282 | "# O((v + e) * log(v)) time | O(v) space — where v is the number\n", 283 | "# of vertices and e is the number of edges in the input graph\n", 284 | "def dijkstrasAlgorithm(start, edges):\n", 285 | " \"\"\"\n", 286 | " Implements Dijkstra's algorithm to find the shortest paths from a starting vertex to all other vertices\n", 287 | " in a weighted graph. The graph is represented using an adjacency list.\n", 288 | "\n", 289 | " Args:\n", 290 | " start (int): The starting vertex index.\n", 291 | " edges (list of list): An adjacency list where each index represents a vertex, and each entry\n", 292 | " is a list of [destination, weight] pairs.\n", 293 | "\n", 294 | " Returns:\n", 295 | " list: A list of minimum distances from the starting vertex to each vertex in the graph.\n", 296 | " If a vertex is unreachable, its distance is represented as -1.\n", 297 | " \"\"\"\n", 298 | " # Step 1: Initialize the number of vertices in the graph\n", 299 | " numberOfVertices = len(edges)\n", 300 | "\n", 301 | " # Step 2: Initialize the minimum distances with infinity\n", 302 | " # Set the starting vertex's distance to 0\n", 303 | " minDistances = [float(\"inf\") for _ in range(numberOfVertices)]\n", 304 | " minDistances[start] = 0\n", 305 | "\n", 306 | " # Step 3: Initialize the MinHeap to track the vertices and their current shortest distances\n", 307 | " minDistancesHeap = MinHeap([(idx, float(\"inf\")) for idx in range(numberOfVertices)])\n", 308 | " minDistancesHeap.update(start, 0) # Update the starting vertex's distance to 0\n", 309 | "\n", 310 | " # Step 4: Process vertices until the heap is empty\n", 311 | " while not minDistancesHeap.isEmpty():\n", 312 | " # Extract the vertex with the smallest known distance\n", 313 | " vertex, currentMinDistance = minDistancesHeap.remove()\n", 314 | "\n", 315 | " # If the current distance is infinity, no further reachable vertices exist\n", 316 | " if currentMinDistance == float(\"inf\"):\n", 317 | " break\n", 318 | "\n", 319 | " # Step 5: Relaxation - Update distances for all neighboring vertices\n", 320 | " for edge in edges[vertex]:\n", 321 | " destination, distanceToDestination = edge # Extract neighbor and weight\n", 322 | "\n", 323 | " # Calculate the new potential path distance\n", 324 | " newPathDistance = currentMinDistance + distanceToDestination\n", 325 | " currentDestinationDistance = minDistances[destination]\n", 326 | "\n", 327 | " # If the new path is shorter, update the distance and the heap\n", 328 | " if newPathDistance < currentDestinationDistance:\n", 329 | " minDistances[destination] = newPathDistance\n", 330 | " minDistancesHeap.update(destination, newPathDistance)\n", 331 | "\n", 332 | " # Step 6: Convert unreachable vertices' distances from infinity to -1\n", 333 | " return list(map(lambda x: -1 if x == float(\"inf\") else x, minDistances))" 334 | ], 335 | "metadata": { 336 | "id": "g-jMjCkDs0Kx" 337 | }, 338 | "execution_count": 3, 339 | "outputs": [] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "source": [ 344 | "def test_dijkstras_algorithm():\n", 345 | " \"\"\"\n", 346 | " Test function for Dijkstra's algorithm.\n", 347 | "\n", 348 | " This test evaluates the correctness of the `dijkstrasAlgorithm` function using a predefined input graph\n", 349 | " and expected output. The graph is represented as an adjacency list, and the test ensures that the\n", 350 | " algorithm computes the shortest paths correctly.\n", 351 | "\n", 352 | " Input Graph:\n", 353 | " Node 0 -> Node 1 (weight 7)\n", 354 | " Node 1 -> Node 2 (weight 6), Node 3 (weight 20), Node 4 (weight 3)\n", 355 | " Node 2 -> Node 3 (weight 14)\n", 356 | " Node 3 -> Node 4 (weight 2)\n", 357 | " Node 4 -> No outgoing edges\n", 358 | " Node 5 -> No outgoing edges\n", 359 | "\n", 360 | " Expected Output:\n", 361 | " [0, 7, 13, 27, 10, -1]\n", 362 | " - Shortest distances from node 0 to all other nodes.\n", 363 | " - `-1` represents unreachable nodes (node 5 in this case).\n", 364 | "\n", 365 | " Assertions:\n", 366 | " The test checks if the result matches the expected output and raises an assertion error otherwise.\n", 367 | "\n", 368 | " Returns:\n", 369 | " None. Prints \"Test passed\" if the output is correct.\n", 370 | " \"\"\"\n", 371 | " # Input graph represented as an adjacency list\n", 372 | " edges = [\n", 373 | " [[1, 7]], # Node 0 -> Node 1 (weight 7)\n", 374 | " [[2, 6], [3, 20], [4, 3]], # Node 1 -> Node 2 (6), Node 3 (20), Node 4 (3)\n", 375 | " [[3, 14]], # Node 2 -> Node 3 (weight 14)\n", 376 | " [[4, 2]], # Node 3 -> Node 4 (weight 2)\n", 377 | " [], # Node 4 has no outgoing edges\n", 378 | " [] # Node 5 has no outgoing edges\n", 379 | " ]\n", 380 | " start = 0 # Starting node for Dijkstra's algorithm\n", 381 | "\n", 382 | " # Expected output: Shortest distances from the starting node\n", 383 | " expected_output = [0, 7, 13, 27, 10, -1]\n", 384 | "\n", 385 | " # Step 1: Run Dijkstra's algorithm on the input graph\n", 386 | " result = dijkstrasAlgorithm(start, edges)\n", 387 | "\n", 388 | " # Step 2: Compare the result with the expected output\n", 389 | " assert result == expected_output, f\"Test failed: expected {expected_output}, but got {result}\"\n", 390 | "\n", 391 | " # Step 3: If no assertion error, print test success message\n", 392 | " print(\"Test passed: Output matches expected result.\")\n", 393 | "\n", 394 | "\n", 395 | "# Run the test function\n", 396 | "test_dijkstras_algorithm()" 397 | ], 398 | "metadata": { 399 | "colab": { 400 | "base_uri": "https://localhost:8080/" 401 | }, 402 | "id": "MQUBh2xfxcQt", 403 | "outputId": "01971df0-ea57-4fb2-a493-310c934b9ab9" 404 | }, 405 | "execution_count": 4, 406 | "outputs": [ 407 | { 408 | "output_type": "stream", 409 | "name": "stdout", 410 | "text": [ 411 | "Test passed: Output matches expected result.\n" 412 | ] 413 | } 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "source": [ 419 | "# Dijstrak's Algorithm with Path" 420 | ], 421 | "metadata": { 422 | "id": "Bt4vSnf8yKpA" 423 | } 424 | }, 425 | { 426 | "cell_type": "code", 427 | "source": [ 428 | "def dijkstrasAlgorithmWithPaths(start, edges):\n", 429 | " \"\"\"\n", 430 | " Implements Dijkstra's algorithm to compute the shortest paths from a starting vertex to all other vertices\n", 431 | " in a weighted graph. It also tracks the predecessors of each vertex for path reconstruction.\n", 432 | "\n", 433 | " Args:\n", 434 | " start (int): The index of the starting vertex.\n", 435 | " edges (list of list): Adjacency list representation of the graph, where each index represents a vertex,\n", 436 | " and each entry is a list of [destination, weight] pairs.\n", 437 | "\n", 438 | " Returns:\n", 439 | " tuple: A tuple containing:\n", 440 | " - minDistances (list): A list of the shortest distances from the starting vertex to each vertex.\n", 441 | " Vertices that are unreachable will have a distance of `float(\"inf\")`.\n", 442 | " - previousNodes (list): A list where each index points to the predecessor of the vertex\n", 443 | " in the shortest path. If no path exists, the predecessor is `None`.\n", 444 | "\n", 445 | " Algorithm Steps:\n", 446 | " 1. Initialize distances with `infinity` and set the starting vertex distance to 0.\n", 447 | " 2. Use a MinHeap to manage and efficiently retrieve the vertex with the smallest known distance.\n", 448 | " 3. For each vertex, relax its edges to update distances to neighboring vertices.\n", 449 | " 4. Track the predecessor of each vertex to allow path reconstruction later.\n", 450 | " 5. Return the minimum distances and the list of predecessors.\n", 451 | " \"\"\"\n", 452 | " numberOfVertices = len(edges) # Total number of vertices in the graph\n", 453 | "\n", 454 | " # Step 1: Initialize minimum distances and predecessors\n", 455 | " minDistances = [float(\"inf\")] * numberOfVertices # All distances set to infinity initially\n", 456 | " minDistances[start] = 0 # Distance to the start node is 0\n", 457 | "\n", 458 | " previousNodes = [None] * numberOfVertices # Array to store the predecessor of each vertex\n", 459 | "\n", 460 | " # Step 2: Initialize the MinHeap\n", 461 | " heap = MinHeap([(i, float(\"inf\")) for i in range(numberOfVertices)])\n", 462 | " heap.update(start, 0) # Update the distance of the starting vertex to 0\n", 463 | "\n", 464 | " # Step 3: Process vertices until the heap is empty\n", 465 | " while not heap.isEmpty():\n", 466 | " # Extract the vertex with the smallest known distance\n", 467 | " vertex, currentMinDistance = heap.remove()\n", 468 | "\n", 469 | " # Ignore outdated distances (e.g., if a shorter distance was already found)\n", 470 | " if currentMinDistance > minDistances[vertex]:\n", 471 | " continue\n", 472 | "\n", 473 | " # Step 4: Relaxation - Update distances to neighboring vertices\n", 474 | " for edge in edges[vertex]:\n", 475 | " destination, weight = edge # Extract destination vertex and edge weight\n", 476 | "\n", 477 | " # Calculate the new potential distance to the destination vertex\n", 478 | " newPathDistance = currentMinDistance + weight\n", 479 | "\n", 480 | " # Update if the new path is shorter\n", 481 | " if newPathDistance <= minDistances[destination]:\n", 482 | " minDistances[destination] = newPathDistance\n", 483 | " previousNodes[destination] = vertex # Track the predecessor for path reconstruction\n", 484 | " heap.update(destination, newPathDistance) # Update the heap with the new distance\n", 485 | "\n", 486 | " return minDistances, previousNodes" 487 | ], 488 | "metadata": { 489 | "id": "Adzk8NFmVUXu" 490 | }, 491 | "execution_count": 5, 492 | "outputs": [] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "source": [ 497 | "def reconstructPath(previousNodes, start, end):\n", 498 | " \"\"\"\n", 499 | " Reconstructs the shortest path from the start node to the end node using the predecessor list.\n", 500 | "\n", 501 | " Args:\n", 502 | " previousNodes (list): A list where each index corresponds to a node, and the value at that index\n", 503 | " is the predecessor node in the shortest path. `None` indicates no predecessor.\n", 504 | " start (int): The starting node index.\n", 505 | " end (int): The destination node index.\n", 506 | "\n", 507 | " Returns:\n", 508 | " list: A list of nodes representing the shortest path from the start node to the end node.\n", 509 | " If no path exists, returns an empty list.\n", 510 | "\n", 511 | " Algorithm:\n", 512 | " 1. Start from the destination node (`end`) and trace back to the start node (`start`) using\n", 513 | " the `previousNodes` list.\n", 514 | " 2. Append each node encountered to the `path` list.\n", 515 | " 3. Reverse the `path` list to produce the correct order from start to end.\n", 516 | " 4. If no valid path exists (e.g., disconnected graph), return an empty list.\n", 517 | "\n", 518 | " Example:\n", 519 | " previousNodes = [None, 0, 1, 2] # Predecessor list\n", 520 | " start = 0\n", 521 | " end = 3\n", 522 | " reconstructPath(previousNodes, start, end) -> [0, 1, 2, 3]\n", 523 | " \"\"\"\n", 524 | " path = [] # Initialize an empty list to store the path\n", 525 | " currentNode = end # Start tracing from the destination node\n", 526 | "\n", 527 | " # Step 1: Trace back from the destination node to the start node\n", 528 | " while currentNode is not None and currentNode != start:\n", 529 | " path.append(currentNode) # Add the current node to the path\n", 530 | " currentNode = previousNodes[currentNode] # Move to the predecessor node\n", 531 | "\n", 532 | " # Step 2: Add the start node if a valid path was found\n", 533 | " if currentNode == start:\n", 534 | " path.append(start) # Add the start node to the path\n", 535 | " path.reverse() # Reverse the path to get the correct order from start to end\n", 536 | " return path\n", 537 | "\n", 538 | " # Step 3: If no path exists, return an empty list\n", 539 | " return []" 540 | ], 541 | "metadata": { 542 | "id": "f7zCuZd5QC9H" 543 | }, 544 | "execution_count": 6, 545 | "outputs": [] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "source": [ 550 | "def test_dijkstrasAlgorithmWithPaths():\n", 551 | " \"\"\"\n", 552 | " Test function for the `dijkstrasAlgorithmWithPaths` function.\n", 553 | "\n", 554 | " This test verifies:\n", 555 | " 1. The correctness of the minimum distances calculated by Dijkstra's algorithm.\n", 556 | " 2. The correctness of the reconstructed paths using the `reconstructPath` function.\n", 557 | "\n", 558 | " Input Graph (Adjacency List):\n", 559 | " - Node 0 -> Node 1 (weight 7)\n", 560 | " - Node 1 -> Node 2 (weight 6), Node 3 (weight 20), Node 4 (weight 3)\n", 561 | " - Node 2 -> Node 3 (weight 14)\n", 562 | " - Node 3 -> Node 4 (weight 2)\n", 563 | " - Node 4 has no outgoing edges\n", 564 | " - Node 5 is isolated (no incoming or outgoing edges).\n", 565 | "\n", 566 | " Expected Results:\n", 567 | " - Minimum Distances:\n", 568 | " [0, 7, 13, 27, 10, float(\"inf\")]\n", 569 | " - Distances from node 0 to all other nodes. `float(\"inf\")` represents unreachable nodes.\n", 570 | " - Reconstructed Paths:\n", 571 | " - Path to node 3: [0, 1, 2, 3]\n", 572 | " - Path to node 4: [0, 1, 4]\n", 573 | " - Path to node 5: []\n", 574 | "\n", 575 | " Assertions:\n", 576 | " The test uses assertions to ensure that the computed distances and paths match the expected values.\n", 577 | "\n", 578 | " Returns:\n", 579 | " None. Prints \"All tests passed!\" if the results are correct.\n", 580 | " \"\"\"\n", 581 | " # Step 1: Define the input graph as an adjacency list\n", 582 | " edges = [\n", 583 | " [[1, 7]], # Node 0 -> Node 1 (weight 7)\n", 584 | " [[2, 6], [3, 20], [4, 3]], # Node 1 -> Node 2 (6), Node 3 (20), Node 4 (3)\n", 585 | " [[3, 14]], # Node 2 -> Node 3 (weight 14)\n", 586 | " [[4, 2]], # Node 3 -> Node 4 (weight 2)\n", 587 | " [], # Node 4 has no outgoing edges\n", 588 | " [] # Node 5 has no outgoing edges\n", 589 | " ]\n", 590 | " start = 0 # Starting node for Dijkstra's algorithm\n", 591 | "\n", 592 | " # Step 2: Run Dijkstra's algorithm\n", 593 | " minDistances, previousNodes = dijkstrasAlgorithmWithPaths(start, edges)\n", 594 | "\n", 595 | " # Step 3: Check the minimum distances\n", 596 | " expectedDistances = [0, 7, 13, 27, 10, float(\"inf\")]\n", 597 | " assert minDistances == expectedDistances, f\"Distances test failed: {minDistances}\"\n", 598 | "\n", 599 | " # Step 4: Reconstruct paths to specific nodes and validate them\n", 600 | " # Path to node 3\n", 601 | " path_to_3 = reconstructPath(previousNodes, start, 3)\n", 602 | " expectedPathTo3 = [0, 1, 2, 3]\n", 603 | " assert path_to_3 == expectedPathTo3, f\"Path to 3 test failed: {path_to_3}\"\n", 604 | "\n", 605 | " # Path to node 4\n", 606 | " path_to_4 = reconstructPath(previousNodes, start, 4)\n", 607 | " expectedPathTo4 = [0, 1, 4]\n", 608 | " assert path_to_4 == expectedPathTo4, f\"Path to 4 test failed: {path_to_4}\"\n", 609 | "\n", 610 | " # Path to node 5 (unreachable)\n", 611 | " path_to_5 = reconstructPath(previousNodes, start, 5)\n", 612 | " expectedPathTo5 = []\n", 613 | " assert path_to_5 == expectedPathTo5, f\"Path to 5 test failed: {path_to_5}\"\n", 614 | "\n", 615 | " # Step 5: Print success message if all tests pass\n", 616 | " print(\"All tests passed!\")\n", 617 | "\n", 618 | "\n", 619 | "# Run the test\n", 620 | "test_dijkstrasAlgorithmWithPaths()" 621 | ], 622 | "metadata": { 623 | "colab": { 624 | "base_uri": "https://localhost:8080/" 625 | }, 626 | "id": "5mMLPzmnVWHM", 627 | "outputId": "ee052d2f-4628-412b-b913-729e5805d4eb" 628 | }, 629 | "execution_count": 7, 630 | "outputs": [ 631 | { 632 | "output_type": "stream", 633 | "name": "stdout", 634 | "text": [ 635 | "All tests passed!\n" 636 | ] 637 | } 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "source": [], 643 | "metadata": { 644 | "id": "lFdlmgKpRrfQ" 645 | }, 646 | "execution_count": null, 647 | "outputs": [] 648 | } 649 | ] 650 | } -------------------------------------------------------------------------------- /lessons/week08/MinHeap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "toc_visible": true 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "source": [ 21 | "# Min Heap Construction\n", 22 | "\n", 23 | "Implement a `MinHeap` class that supports:\n", 24 | "\n", 25 | "- Building a Min Heap from an input array of integers.\n", 26 | "- Inserting integers in the heap.\n", 27 | "- Removing the heap's minimum / root value.\n", 28 | "- Peeking at the heap's minimum / root value.\n", 29 | "- Sifting integers up and down the heap, which is to be used when inserting and removing values.\n", 30 | "\n", 31 | "> Note that the heap should be represented in the form of an array.\n" 32 | ], 33 | "metadata": { 34 | "id": "NUUZ1Yr2HuV9" 35 | } 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "source": [ 40 | "## Sample Usage\n", 41 | "\n", 42 | "```python\n", 43 | "array = [48, 12, 24, 7, 8, -5, 24, 391, 24, 56, 2, 6, 8, 41]\n", 44 | "\n", 45 | "// All operations below are performed sequentially.\n", 46 | "\n", 47 | "MinHeap(array)\n", 48 | "# Instantiates a MinHeap (calls the buildHeap method and populates the heap)\n", 49 | "\n", 50 | "buildHeap(array)\n", 51 | "# ➞ [-5, 2, 6, 7, 8, 8, 24, 391, 24, 56, 12, 24, 48, 41]\n", 52 | "\n", 53 | "insert(76)\n", 54 | "# ➞ [-5, 2, 6, 7, 8, 8, 24, 391, 24, 56, 12, 24, 48, 41, 76]\n", 55 | "\n", 56 | "peek()\n", 57 | "# ➞ -5\n", 58 | "\n", 59 | "remove()\n", 60 | "# ➞ -5\n", 61 | "# Heap after removal: [2, 7, 6, 24, 8, 24, 391, 76, 56, 12, 24, 48, 41]\n", 62 | "\n", 63 | "peek()\n", 64 | "# ➞ 2\n", 65 | "\n", 66 | "remove()\n", 67 | "# ➞ 2\n", 68 | "# Heap after removal: [6, 7, 8, 24, 8, 24, 24, 391, 76, 56, 12, 41, 48]\n", 69 | "\n", 70 | "peek()\n", 71 | "# ➞ 6\n", 72 | "\n", 73 | "insert(87)\n", 74 | "# ➞ [6, 7, 8, 24, 8, 24, 24, 391, 76, 56, 12, 41, 48, 87]\n" 75 | ], 76 | "metadata": { 77 | "id": "GRVrNpsrH5le" 78 | } 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "source": [ 83 | "## Hints\n" 84 | ], 85 | "metadata": { 86 | "id": "Va6w6J7XIWP3" 87 | } 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "source": [ 92 | "\n", 93 | "### Hint 1\n" 94 | ], 95 | "metadata": { 96 | "id": "fy7dVyYMIZUn" 97 | } 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "source": [ 102 | "\n", 103 | "For the `buildHeap()`, `remove()`, and `insert()` methods of the Heap, you will need to use the `siftDown()` and `siftUp()` methods. These two methods should essentially allow you to take any node in the heap and move it either down or up in the heap until it's in its final, appropriate position. This can be done by comparing the node in question to its child nodes in the case of `siftDown()` or to its parent node in the case of `siftUp()`.\n" 104 | ], 105 | "metadata": { 106 | "id": "ZFW1ZYupIhaN" 107 | } 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "source": [ 112 | "\n", 113 | "### Hint 2\n" 114 | ], 115 | "metadata": { 116 | "id": "oQLYJkoJIafe" 117 | } 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "source": [ 122 | "\n", 123 | "In an array-based Heap, you can easily access a node's children and parent nodes by using the nodes' indices. If a node is located at index `i`, then its children nodes are located at indices `2 * i + 1` and `2 * i + 2`, and its parent node is located at index `floor((i - 1) / 2)`.\n" 124 | ], 125 | "metadata": { 126 | "id": "Y0aLk3FhIgMB" 127 | } 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "source": [ 132 | "\n", 133 | "### Hint 3\n" 134 | ], 135 | "metadata": { 136 | "id": "c8a45fCaIch9" 137 | } 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "source": [ 142 | "\n", 143 | "To implement the `buildHeap()` method, you can either sift every node in the input array down to its final, correct position, or you can sift every node in the input array up to its final, correct position. \n", 144 | "What are the runtime implications of both approaches? \n", 145 | "Which methods (`siftDown()` or `siftUp()`) will `insert()` and `remove()` utilize? \n", 146 | "What about `peek()`?" 147 | ], 148 | "metadata": { 149 | "id": "xjnT85NTIfSj" 150 | } 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "source": [ 155 | "# Implementation" 156 | ], 157 | "metadata": { 158 | "id": "hTBSXemlMtFm" 159 | } 160 | }, 161 | { 162 | "cell_type": "code", 163 | "source": [ 164 | "import unittest" 165 | ], 166 | "metadata": { 167 | "id": "bm6n_rGgMpxX" 168 | }, 169 | "execution_count": 16, 170 | "outputs": [] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 2, 175 | "metadata": { 176 | "id": "nlla89yyHYDT" 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "class MinHeap:\n", 181 | " \"\"\"\n", 182 | " A class that implements a Min-Heap data structure.\n", 183 | " The smallest element is always at the root.\n", 184 | " \"\"\"\n", 185 | "\n", 186 | " def __init__(self, array):\n", 187 | " \"\"\"\n", 188 | " Initializes the MinHeap object and builds the heap in-place.\n", 189 | "\n", 190 | " :param array: The initial array of elements.\n", 191 | " \"\"\"\n", 192 | " self.heap = self.build_heap(array)\n", 193 | "\n", 194 | " def build_heap(self, array):\n", 195 | " \"\"\"\n", 196 | " Transforms an array into a valid Min-Heap in O(n) time.\n", 197 | "\n", 198 | " :param array: The array to be heapified.\n", 199 | " :return: The heapified array.\n", 200 | " \"\"\"\n", 201 | " first_parent_idx = (len(array) - 2) // 2\n", 202 | " for current_idx in reversed(range(first_parent_idx + 1)):\n", 203 | " self.sift_down(current_idx, len(array) - 1, array)\n", 204 | " return array\n", 205 | "\n", 206 | " def sift_down(self, current_idx, end_idx, heap):\n", 207 | " \"\"\"\n", 208 | " Moves the element at current_idx down the heap until the heap property is restored.\n", 209 | "\n", 210 | " :param current_idx: The index of the element to sift down.\n", 211 | " :param end_idx: The last index in the heap.\n", 212 | " :param heap: The heap array.\n", 213 | " \"\"\"\n", 214 | " child_one_idx = current_idx * 2 + 1\n", 215 | " while child_one_idx <= end_idx:\n", 216 | " child_two_idx = current_idx * 2 + 2 if current_idx * 2 + 2 <= end_idx else -1\n", 217 | " if child_two_idx != -1 and heap[child_two_idx] < heap[child_one_idx]:\n", 218 | " idx_to_swap = child_two_idx\n", 219 | " else:\n", 220 | " idx_to_swap = child_one_idx\n", 221 | "\n", 222 | " if heap[idx_to_swap] < heap[current_idx]:\n", 223 | " self.swap(current_idx, idx_to_swap, heap)\n", 224 | " current_idx = idx_to_swap\n", 225 | " child_one_idx = current_idx * 2 + 1\n", 226 | " else:\n", 227 | " return\n", 228 | "\n", 229 | " def sift_up(self, current_idx, heap):\n", 230 | " \"\"\"\n", 231 | " Moves the element at current_idx up the heap until the heap property is restored.\n", 232 | "\n", 233 | " :param current_idx: The index of the element to sift up.\n", 234 | " :param heap: The heap array.\n", 235 | " \"\"\"\n", 236 | " parent_idx = (current_idx - 1) // 2\n", 237 | " while current_idx > 0 and heap[current_idx] < heap[parent_idx]:\n", 238 | " self.swap(current_idx, parent_idx, heap)\n", 239 | " current_idx = parent_idx\n", 240 | " parent_idx = (current_idx - 1) // 2\n", 241 | "\n", 242 | " def peek(self):\n", 243 | " \"\"\"\n", 244 | " Returns the smallest element in the heap without removing it.\n", 245 | "\n", 246 | " :return: The root element of the heap.\n", 247 | " \"\"\"\n", 248 | " return self.heap[0]\n", 249 | "\n", 250 | " def remove(self):\n", 251 | " \"\"\"\n", 252 | " Removes and returns the smallest element from the heap.\n", 253 | "\n", 254 | " :return: The removed element.\n", 255 | " \"\"\"\n", 256 | " self.swap(0, len(self.heap) - 1, self.heap)\n", 257 | " value_to_remove = self.heap.pop()\n", 258 | " self.sift_down(0, len(self.heap) - 1, self.heap)\n", 259 | " return value_to_remove\n", 260 | "\n", 261 | " def insert(self, value):\n", 262 | " \"\"\"\n", 263 | " Inserts a new element into the heap and restores the heap property.\n", 264 | "\n", 265 | " :param value: The value to be inserted.\n", 266 | " \"\"\"\n", 267 | " self.heap.append(value)\n", 268 | " self.sift_up(len(self.heap) - 1, self.heap)\n", 269 | "\n", 270 | " @staticmethod\n", 271 | " def swap(i, j, heap):\n", 272 | " \"\"\"\n", 273 | " Swaps two elements in the heap.\n", 274 | "\n", 275 | " :param i: Index of the first element.\n", 276 | " :param j: Index of the second element.\n", 277 | " :param heap: The heap array.\n", 278 | " \"\"\"\n", 279 | " heap[i], heap[j] = heap[j], heap[i]" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "source": [ 285 | "# Helper function to verify the min-heap property\n", 286 | "def is_min_heap_property_satisfied(heap):\n", 287 | " for i in range(len(heap)):\n", 288 | " left = 2 * i + 1\n", 289 | " right = 2 * i + 2\n", 290 | " if left < len(heap) and heap[i] > heap[left]:\n", 291 | " return False\n", 292 | " if right < len(heap) and heap[i] > heap[right]:\n", 293 | " return False\n", 294 | " return True" 295 | ], 296 | "metadata": { 297 | "id": "gTeAomm-J5nQ" 298 | }, 299 | "execution_count": 5, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "source": [ 305 | "# Test Cases\n", 306 | "class TestMinHeap(unittest.TestCase):\n", 307 | "\n", 308 | " def test_case_1(self):\n", 309 | " array = [48, 12, 24, 7, 8, -5, 24, 391, 24, 56, 2, 6, 8, 41]\n", 310 | " heap = MinHeap(array)\n", 311 | "\n", 312 | " heap.insert(76)\n", 313 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 314 | "\n", 315 | " self.assertEqual(heap.peek(), -5)\n", 316 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 317 | "\n", 318 | " self.assertEqual(heap.remove(), -5)\n", 319 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 320 | "\n", 321 | " self.assertEqual(heap.peek(), 2)\n", 322 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 323 | "\n", 324 | " self.assertEqual(heap.remove(), 2)\n", 325 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 326 | "\n", 327 | " self.assertEqual(heap.peek(), 6)\n", 328 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 329 | "\n", 330 | " heap.insert(87)\n", 331 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 332 | "\n", 333 | " def test_case_2(self):\n", 334 | " array = [2, 3, 1]\n", 335 | " heap = MinHeap(array)\n", 336 | " self.assertEqual(heap.peek(), 1)\n", 337 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 338 | "\n", 339 | " def test_case_3(self):\n", 340 | " array = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 341 | " heap = MinHeap(array)\n", 342 | " self.assertEqual(heap.peek(), 1)\n", 343 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 344 | "\n", 345 | " def test_case_4(self):\n", 346 | " array = [-4, 5, 10, 8, -10, -6, -4, -2, -5, 3, 5, -4, -5, -1, 1, 6, -7, -6, -7, 8]\n", 347 | " heap = MinHeap(array)\n", 348 | " self.assertEqual(heap.peek(), -10)\n", 349 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 350 | "\n", 351 | " def test_case_5(self):\n", 352 | " array = [-7, 2, 3, 8, -10, 4, -6, -10, -2, -7, 10, 5, 2, 9, -9, -5, 3, 8]\n", 353 | " heap = MinHeap(array)\n", 354 | "\n", 355 | " self.assertEqual(heap.remove(), -10)\n", 356 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 357 | "\n", 358 | " self.assertEqual(heap.peek(), -10)\n", 359 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 360 | "\n", 361 | " heap.insert(-8)\n", 362 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 363 | "\n", 364 | " self.assertEqual(heap.peek(), -10)\n", 365 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 366 | "\n", 367 | " self.assertEqual(heap.remove(), -10)\n", 368 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 369 | "\n", 370 | " self.assertEqual(heap.peek(), -9)\n", 371 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 372 | "\n", 373 | " heap.insert(8)\n", 374 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 375 | "\n", 376 | " self.assertEqual(heap.peek(), -9)\n", 377 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 378 | "\n", 379 | " def test_case_6(self):\n", 380 | " array = [427, 787, 222, 996, -359, -614, 246, 230, 107, -706, 568, 9, -246, 12, -764,\n", 381 | " -212, -484, 603, 934, -848, -646, -991, 661, -32, -348, -474, -439, -56, 507,\n", 382 | " 736, 635, -171, -215, 564, -710, 710, 565, 892, 970, -755, 55, 821, -3, -153,\n", 383 | " 240, -160, -610, -583, -27, 131]\n", 384 | " heap = MinHeap(array)\n", 385 | " self.assertEqual(heap.peek(), -991)\n", 386 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 387 | "\n", 388 | " def test_case_7(self):\n", 389 | " array = [991, -731, -882, 100, 280, -43, 432, 771, -581, 180, -382, -998, 847, 80,\n", 390 | " -220, 680, 769, -75, -817, 366, 956, 749, 471, 228, -435, -269, 652, -331,\n", 391 | " -387, -657, -255, 382, -216, -6, -163, -681, 980, 913, -169, 972, -523,\n", 392 | " 354, 747, 805, 382, -827, -796, 372, 753, 519, 906]\n", 393 | " heap = MinHeap(array)\n", 394 | " self.assertEqual(heap.remove(), -998)\n", 395 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 396 | " self.assertEqual(heap.remove(), -882)\n", 397 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 398 | " self.assertEqual(heap.remove(), -827)\n", 399 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 400 | " heap.insert(992)\n", 401 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 402 | "\n", 403 | " def test_case_8(self):\n", 404 | " array = [544, -578, 556, 713, -655, -359, -810, -731, 194, -531, -685, 689, -279,\n", 405 | " -738, 886, -54, -320, -500, 738, 445, -401, 993, -753, 329, -396, -924,\n", 406 | " -975, 376, 748, -356, 972, 459, 399, 669, -488, 568, -702, 551, 763, -90,\n", 407 | " -249, -45, 452, -917, 394, 195, -877, 153, 153, 788, 844, 867, 266, -739,\n", 408 | " 904, -154, -947, 464, 343, -312, 150, -656, 528, 61, 94, -581]\n", 409 | " heap = MinHeap(array)\n", 410 | " self.assertEqual(heap.peek(), -975)\n", 411 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 412 | "\n", 413 | " def test_case_9(self):\n", 414 | " array = [-823, 164, 48, -987, 323, 399, -293, 183, -908, -376, 14, 980, 965, 842,\n", 415 | " 422, 829, 59, 724, -415, -733, 356, -855, -155, 52, 328, -544, -371, -160,\n", 416 | " -942, -51, 700, -363, -353, -359, 238, 892, -730, -575, 892, 490, 490,\n", 417 | " 995, 572, 888, -935, 919, -191, 646, -120, 125, -817, 341, -575, 372,\n", 418 | " -874, 243, 610, -36, -685, -337, -13, 295, 800, -950, -949, -257, 631,\n", 419 | " -542, 201, -796, 157, 950, 540, -846, -265, 746, 355, -578, -441, -254,\n", 420 | " -941, -738, -469, -167, -420, -126, -410, 59]\n", 421 | " heap = MinHeap(array)\n", 422 | " heap.insert(2)\n", 423 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 424 | " heap.insert(22)\n", 425 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 426 | " heap.insert(222)\n", 427 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 428 | " heap.insert(2222)\n", 429 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 430 | " self.assertEqual(heap.remove(), -987)\n", 431 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 432 | " self.assertEqual(heap.remove(), -950)\n", 433 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 434 | " self.assertEqual(heap.remove(), -949)\n", 435 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))\n", 436 | " self.assertEqual(heap.remove(), -942)\n", 437 | " self.assertTrue(is_min_heap_property_satisfied(heap.heap))" 438 | ], 439 | "metadata": { 440 | "id": "rTbi5F9aKHSb" 441 | }, 442 | "execution_count": 14, 443 | "outputs": [] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "source": [ 448 | "# Run the tests in Google Colab\n", 449 | "suite = unittest.TestLoader().loadTestsFromTestCase(TestMinHeap)\n", 450 | "unittest.TextTestRunner(verbosity=2).run(suite)" 451 | ], 452 | "metadata": { 453 | "colab": { 454 | "base_uri": "https://localhost:8080/" 455 | }, 456 | "id": "XS7oC41FLhC8", 457 | "outputId": "f8d90200-05f1-453a-9810-e64c4c9c217d" 458 | }, 459 | "execution_count": 15, 460 | "outputs": [ 461 | { 462 | "output_type": "stream", 463 | "name": "stderr", 464 | "text": [ 465 | "test_case_1 (__main__.TestMinHeap.test_case_1) ... ok\n", 466 | "test_case_2 (__main__.TestMinHeap.test_case_2) ... ok\n", 467 | "test_case_3 (__main__.TestMinHeap.test_case_3) ... ok\n", 468 | "test_case_4 (__main__.TestMinHeap.test_case_4) ... ok\n", 469 | "test_case_5 (__main__.TestMinHeap.test_case_5) ... ok\n", 470 | "test_case_6 (__main__.TestMinHeap.test_case_6) ... ok\n", 471 | "test_case_7 (__main__.TestMinHeap.test_case_7) ... ok\n", 472 | "test_case_8 (__main__.TestMinHeap.test_case_8) ... ok\n", 473 | "test_case_9 (__main__.TestMinHeap.test_case_9) ... ok\n", 474 | "\n", 475 | "----------------------------------------------------------------------\n", 476 | "Ran 9 tests in 0.018s\n", 477 | "\n", 478 | "OK\n" 479 | ] 480 | }, 481 | { 482 | "output_type": "execute_result", 483 | "data": { 484 | "text/plain": [ 485 | "" 486 | ] 487 | }, 488 | "metadata": {}, 489 | "execution_count": 15 490 | } 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "source": [ 496 | "# Time and Space Complexity Analysis of MinHeap Operations\n" 497 | ], 498 | "metadata": { 499 | "id": "-xkYOUc7Tp7V" 500 | } 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "source": [ 505 | "\n", 506 | "A MinHeap is a complete binary tree where each parent node is less than or equal to its child nodes. This data structure can be efficiently implemented using an array, and it supports a range of operations such as insertion, deletion, and retrieval of the minimum element. This section presents a detailed complexity analysis of each core method in the `MinHeap` class.\n" 507 | ], 508 | "metadata": { 509 | "id": "6nMPR6RfUALC" 510 | } 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "source": [ 515 | "\n", 516 | "## `__init__(self, array)`\n" 517 | ], 518 | "metadata": { 519 | "id": "u5-XWsL3UHjG" 520 | } 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "source": [ 525 | "\n", 526 | "**Time Complexity:** O(n) \n", 527 | "**Space Complexity:** O(1) \n", 528 | "The constructor initializes the heap by calling the `buildHeap` method, which transforms an unordered array into a valid MinHeap. Since this is done in-place, the space complexity remains constant. Despite involving several operations, the build process has linear time complexity due to the nature of the heap construction algorithm (see `buildHeap` below).\n" 529 | ], 530 | "metadata": { 531 | "id": "EOpLe4QrUKxy" 532 | } 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "source": [ 537 | "\n", 538 | "## `buildHeap(self, array)`\n" 539 | ], 540 | "metadata": { 541 | "id": "hb3S-Xn_UNG5" 542 | } 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "source": [ 547 | "\n", 548 | "**Time Complexity:** O(n) \n", 549 | "**Space Complexity:** O(1) \n", 550 | "Building a heap from an unsorted array involves sifting down all non-leaf nodes. Although each sift-down can take up to O(log n) time, most nodes are located at lower levels and require fewer operations. This results in an amortized total cost of O(n), a non-obvious but well-established result in algorithm analysis.\n" 551 | ], 552 | "metadata": { 553 | "id": "jftyJz5IUO-q" 554 | } 555 | }, 556 | { 557 | "cell_type": "markdown", 558 | "source": [ 559 | "\n", 560 | "\n", 561 | "## `siftDown(self, index, endIdx, heap)`\n" 562 | ], 563 | "metadata": { 564 | "id": "U3N_qt0wURoS" 565 | } 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "source": [ 570 | "\n", 571 | "**Time Complexity:** O(log n) \n", 572 | "**Space Complexity:** O(1) \n", 573 | "This method restores the heap property by comparing a node with its children and moving it downward if necessary. The maximum number of comparisons is bounded by the height of the heap, which is logarithmic with respect to the number of elements.\n" 574 | ], 575 | "metadata": { 576 | "id": "JE_-Ml5sUVpE" 577 | } 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "source": [ 582 | "\n", 583 | "## `siftUp(self, index, heap)`\n" 584 | ], 585 | "metadata": { 586 | "id": "8gFURuwAUXKc" 587 | } 588 | }, 589 | { 590 | "cell_type": "markdown", 591 | "source": [ 592 | "\n", 593 | "**Time Complexity:** O(log n) \n", 594 | "**Space Complexity:** O(1) \n", 595 | "Used after an insertion, this method restores the heap property by moving a node upward until it reaches its correct position. In the worst case, the node travels from a leaf to the root, making the time complexity O(log n).\n" 596 | ], 597 | "metadata": { 598 | "id": "bXRXo1gFUZfK" 599 | } 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "source": [ 604 | "\n", 605 | "## `insert(self, value)`\n" 606 | ], 607 | "metadata": { 608 | "id": "-eYrS7xwUa5I" 609 | } 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "source": [ 614 | "\n", 615 | "**Time Complexity:** O(log n) \n", 616 | "**Space Complexity:** O(1) \n", 617 | "The value is appended to the end of the array, which takes constant time. The `siftUp` method is then used to ensure that the heap property is maintained. Therefore, the dominant cost arises from the logarithmic time of `siftUp`.\n" 618 | ], 619 | "metadata": { 620 | "id": "Qv0yR12kUcOy" 621 | } 622 | }, 623 | { 624 | "cell_type": "markdown", 625 | "source": [ 626 | "\n", 627 | "## `remove(self)`\n" 628 | ], 629 | "metadata": { 630 | "id": "ln4GLCJ4UdnM" 631 | } 632 | }, 633 | { 634 | "cell_type": "markdown", 635 | "source": [ 636 | "\n", 637 | "**Time Complexity:** O(log n) \n", 638 | "**Space Complexity:** O(1) \n", 639 | "To remove the minimum element (the root), the last element in the heap is swapped with the root and removed. The `siftDown` method is then called to restore the heap property, resulting in a logarithmic time cost.\n" 640 | ], 641 | "metadata": { 642 | "id": "skkkHaZFUe7o" 643 | } 644 | }, 645 | { 646 | "cell_type": "markdown", 647 | "source": [ 648 | "\n", 649 | "## `peek(self)`\n" 650 | ], 651 | "metadata": { 652 | "id": "TbBYLzRTUgn9" 653 | } 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "source": [ 658 | "\n", 659 | "**Time Complexity:** O(1) \n", 660 | "**Space Complexity:** O(1) \n", 661 | "This method returns the minimum element, which is located at the root (index 0). Since no modifications are made to the heap and only a single element is accessed, both time and space complexities are constant.\n" 662 | ], 663 | "metadata": { 664 | "id": "eWWy29FBUh_r" 665 | } 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "source": [ 670 | "\n", 671 | "## Summary Table\n" 672 | ], 673 | "metadata": { 674 | "id": "SWlOKXaZUkA3" 675 | } 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "source": [ 680 | "\n", 681 | "\n", 682 | "| Method | Time Complexity | Space Complexity | Notes |\n", 683 | "|--------------|-----------------|------------------|---------------------------------------------|\n", 684 | "| `__init__` | O(n) | O(1) | Delegates to `buildHeap` |\n", 685 | "| `buildHeap` | O(n) | O(1) | Uses bottom-up heapify |\n", 686 | "| `siftDown` | O(log n) | O(1) | Moves node downward |\n", 687 | "| `siftUp` | O(log n) | O(1) | Moves node upward |\n", 688 | "| `insert` | O(log n) | O(1) | Append and sift up |\n", 689 | "| `remove` | O(log n) | O(1) | Swap root with last, pop, and sift down |\n", 690 | "| `peek` | O(1) | O(1) | Direct access to index 0 |\n" 691 | ], 692 | "metadata": { 693 | "id": "aknis2EWUmmq" 694 | } 695 | }, 696 | { 697 | "cell_type": "code", 698 | "source": [], 699 | "metadata": { 700 | "id": "GwKxShQFbyBz" 701 | }, 702 | "execution_count": 4, 703 | "outputs": [] 704 | } 705 | ] 706 | } -------------------------------------------------------------------------------- /lessons/week08/MinHeap.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week08/MinHeap.pdf -------------------------------------------------------------------------------- /lessons/week09/Week09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ivanovitchm/datastructure/75b5a938c431ef78adcc5a7bb6373c7c3edbe7fa/lessons/week09/Week09.pdf --------------------------------------------------------------------------------