├── environment.yml ├── README.md └── conda-forge-dependencies.ipynb /environment.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - pandas 3 | - dask 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Today I learned that conda-forge maintains a JSON file of all package 2 | dependencies on github. 3 | 4 | [github.com/regro/libcfgraph/raw/master/conda-forge.json](https://github.com/regro/libcfgraph/raw/master/conda-forge.json) 5 | 6 | This is a small notebook that downloads that json file and turns it into a 7 | Pandas dataframe. You can run it on Binder here: 8 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/mrocklin/conda-forge-dependencies/master?filepath=conda-forge-dependencies.ipynb) 9 | -------------------------------------------------------------------------------- /conda-forge-dependencies.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Conda Forge Dependency Graph\n", 8 | "\n", 9 | "Conda forge keeps metadata about package dependencies in a JSON file on Github. It's easy to download and manipulate this data." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Download Conda Forge Graph" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "--2018-10-04 10:24:17-- https://github.com/regro/libcfgraph/raw/master/conda-forge.json\n", 29 | "Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113\n", 30 | "Connecting to github.com (github.com)|192.30.253.112|:443... connected.\n", 31 | "HTTP request sent, awaiting response... 302 Found\n", 32 | "Location: https://raw.githubusercontent.com/regro/libcfgraph/master/conda-forge.json [following]\n", 33 | "--2018-10-04 10:24:17-- https://raw.githubusercontent.com/regro/libcfgraph/master/conda-forge.json\n", 34 | "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.208.133\n", 35 | "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.208.133|:443... connected.\n", 36 | "HTTP request sent, awaiting response... 200 OK\n", 37 | "Length: 1581105 (1.5M) [text/plain]\n", 38 | "Saving to: ‘conda-forge.json.1’\n", 39 | "\n", 40 | "conda-forge.json.1 100%[===================>] 1.51M --.-KB/s in 0.1s \n", 41 | "\n", 42 | "2018-10-04 10:24:17 (10.8 MB/s) - ‘conda-forge.json.1’ saved [1581105/1581105]\n", 43 | "\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "!wget https://github.com/regro/libcfgraph/raw/master/conda-forge.json" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Load data into Python" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 2, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "import json\n", 65 | "\n", 66 | "with open('conda-forge.json') as f:\n", 67 | " data = json.load(f)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Construct dependency graph" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "('cython', 'numpy', 'pip', 'python', 'python-dateutil', 'pytz')" 86 | ] 87 | }, 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "dependencies = {}\n", 95 | "for x in data['nodes']:\n", 96 | " key = x['id']\n", 97 | " try:\n", 98 | " value = tuple(x['req']['elements'])\n", 99 | " except KeyError:\n", 100 | " value = ()\n", 101 | " dependencies[key] = value\n", 102 | " \n", 103 | "dependencies['pandas']" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "## Reverse dependency graph" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 4, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "data": { 120 | "text/plain": [ 121 | "['fbprophet',\n", 122 | " 'altair',\n", 123 | " 'jsontableschema-pandas',\n", 124 | " 'pyseidon',\n", 125 | " 'ogh',\n", 126 | " 'trackpy',\n", 127 | " 'reports',\n", 128 | " 'erddapy',\n", 129 | " 'ps2ff',\n", 130 | " 'qgrid']" 131 | ] 132 | }, 133 | "execution_count": 4, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "from dask.core import reverse_dict\n", 140 | "dependents = reverse_dict(dependencies)\n", 141 | "list(dependents['pandas'])[:10]" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Switch to Pandas" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 5, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "
\n", 160 | "\n", 173 | "\n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
downstreamupstream
0ad3-cppcmake
1ad3-cppeigen
2ad3-cpptoolchain
3addictpython
4addictsetuptools
\n", 209 | "
" 210 | ], 211 | "text/plain": [ 212 | " downstream upstream\n", 213 | "0 ad3-cpp cmake\n", 214 | "1 ad3-cpp eigen\n", 215 | "2 ad3-cpp toolchain\n", 216 | "3 addict python\n", 217 | "4 addict setuptools" 218 | ] 219 | }, 220 | "execution_count": 5, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "import pandas as pd\n", 227 | "L = [(k, vv) for k, v in dependencies.items() for vv in v]\n", 228 | "df = pd.DataFrame(L, columns=['downstream', 'upstream'])\n", 229 | "\n", 230 | "df.head()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 6, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/html": [ 241 | "
\n", 242 | "\n", 255 | "\n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | "
upstream
downstream
pandascython
pandasnumpy
pandaspip
pandaspython
pandaspython-dateutil
pandaspytz
\n", 293 | "
" 294 | ], 295 | "text/plain": [ 296 | " upstream\n", 297 | "downstream \n", 298 | "pandas cython\n", 299 | "pandas numpy\n", 300 | "pandas pip\n", 301 | "pandas python\n", 302 | "pandas python-dateutil\n", 303 | "pandas pytz" 304 | ] 305 | }, 306 | "execution_count": 6, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "dependencies = df.set_index('downstream', inplace=False)\n", 313 | "dependencies.loc['pandas']" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 7, 319 | "metadata": {}, 320 | "outputs": [ 321 | { 322 | "data": { 323 | "text/html": [ 324 | "
\n", 325 | "\n", 338 | "\n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | "
downstream
upstream
pandasalpenglow
pandasaltair
pandasaospy
pandasaxelrod
pandasbatman
pandasbcolz
pandasbeakerx
pandasbiopandas
pandasbkcharts
pandasbootstrap_contrast
pandasbqplot
pandascaffe
pandascalliope
pandascategory_encoders
pandascbsyst
pandascgsn_processing
pandascis
pandascluster-lensing
pandascodar2netcdf
pandascontact_map
pandascontextily
pandascourtana
pandasctd
pandascyclus
pandascymetric
pandasdask-ml
pandasdatabroker
pandasdatadotworld-py
pandasdatashader
pandasdatreant.data
......
pandasbgen-reader
pandasimagingreso
pandasdatarobot
pandasjaws
pandasheliopy
pandasvega3
pandaspytrackmate
pandaspyiron
pandaspyiron_base
pandasrfpimp
pandaspy_stringsimjoin
pandasgpflow
pandaspyunfold
pandasixmp
pandascyberpandas
pandasdatacache
pandaspython-geotiepoints
pandaspygridtools
pandasdask
pandaspyomo.extras
pandascyavro
pandascyclus-build-deps
pandasbillingegroup
pandasrnatools
pandaspy_entitymatching
pandastabula-py
pandasmetsim
pandasmarbles-mixins
pandaspyldavis
pandasaxopy
\n", 596 | "

214 rows × 1 columns

\n", 597 | "
" 598 | ], 599 | "text/plain": [ 600 | " downstream\n", 601 | "upstream \n", 602 | "pandas alpenglow\n", 603 | "pandas altair\n", 604 | "pandas aospy\n", 605 | "pandas axelrod\n", 606 | "pandas batman\n", 607 | "pandas bcolz\n", 608 | "pandas beakerx\n", 609 | "pandas biopandas\n", 610 | "pandas bkcharts\n", 611 | "pandas bootstrap_contrast\n", 612 | "pandas bqplot\n", 613 | "pandas caffe\n", 614 | "pandas calliope\n", 615 | "pandas category_encoders\n", 616 | "pandas cbsyst\n", 617 | "pandas cgsn_processing\n", 618 | "pandas cis\n", 619 | "pandas cluster-lensing\n", 620 | "pandas codar2netcdf\n", 621 | "pandas contact_map\n", 622 | "pandas contextily\n", 623 | "pandas courtana\n", 624 | "pandas ctd\n", 625 | "pandas cyclus\n", 626 | "pandas cymetric\n", 627 | "pandas dask-ml\n", 628 | "pandas databroker\n", 629 | "pandas datadotworld-py\n", 630 | "pandas datashader\n", 631 | "pandas datreant.data\n", 632 | "... ...\n", 633 | "pandas bgen-reader\n", 634 | "pandas imagingreso\n", 635 | "pandas datarobot\n", 636 | "pandas jaws\n", 637 | "pandas heliopy\n", 638 | "pandas vega3\n", 639 | "pandas pytrackmate\n", 640 | "pandas pyiron\n", 641 | "pandas pyiron_base\n", 642 | "pandas rfpimp\n", 643 | "pandas py_stringsimjoin\n", 644 | "pandas gpflow\n", 645 | "pandas pyunfold\n", 646 | "pandas ixmp\n", 647 | "pandas cyberpandas\n", 648 | "pandas datacache\n", 649 | "pandas python-geotiepoints\n", 650 | "pandas pygridtools\n", 651 | "pandas dask\n", 652 | "pandas pyomo.extras\n", 653 | "pandas cyavro\n", 654 | "pandas cyclus-build-deps\n", 655 | "pandas billingegroup\n", 656 | "pandas rnatools\n", 657 | "pandas py_entitymatching\n", 658 | "pandas tabula-py\n", 659 | "pandas metsim\n", 660 | "pandas marbles-mixins\n", 661 | "pandas pyldavis\n", 662 | "pandas axopy\n", 663 | "\n", 664 | "[214 rows x 1 columns]" 665 | ] 666 | }, 667 | "execution_count": 7, 668 | "metadata": {}, 669 | "output_type": "execute_result" 670 | } 671 | ], 672 | "source": [ 673 | "dependents = df.set_index('upstream', inplace=False)\n", 674 | "dependents.loc['pandas']" 675 | ] 676 | }, 677 | { 678 | "cell_type": "code", 679 | "execution_count": 8, 680 | "metadata": {}, 681 | "outputs": [ 682 | { 683 | "data": { 684 | "text/plain": [ 685 | "python 2892\n", 686 | "setuptools 1893\n", 687 | "r-base 1092\n", 688 | "pip 773\n", 689 | "toolchain 728\n", 690 | "numpy 705\n", 691 | "gcc 518\n", 692 | "six 447\n", 693 | "libgcc 420\n", 694 | "scipy 330\n", 695 | "cmake 239\n", 696 | "matplotlib 239\n", 697 | "requests 229\n", 698 | "pandas 214\n", 699 | "pkg-config 188\n", 700 | "cython 158\n", 701 | "zlib 128\n", 702 | "pyyaml 125\n", 703 | "make 108\n", 704 | "r-rcpp 104\n", 705 | "Name: upstream, dtype: int64" 706 | ] 707 | }, 708 | "execution_count": 8, 709 | "metadata": {}, 710 | "output_type": "execute_result" 711 | } 712 | ], 713 | "source": [ 714 | "df.upstream.value_counts().nlargest(20)" 715 | ] 716 | }, 717 | { 718 | "cell_type": "code", 719 | "execution_count": 9, 720 | "metadata": {}, 721 | "outputs": [ 722 | { 723 | "data": { 724 | "text/plain": [ 725 | "sage 126\n", 726 | "sagelib 55\n", 727 | "qgis 45\n", 728 | "r-essentials 37\n", 729 | "bob 33\n", 730 | "r-userfriendlyscience 33\n", 731 | "datalad 31\n", 732 | "doconce 31\n", 733 | "ncl 31\n", 734 | "steem 29\n", 735 | "octave 29\n", 736 | "caffe 29\n", 737 | "libgdal 28\n", 738 | "paraview 25\n", 739 | "r-sjstats 25\n", 740 | "cyclus-build-deps 25\n", 741 | "mss 25\n", 742 | "hyperspy 25\n", 743 | "fenics 24\n", 744 | "datacube 23\n", 745 | "Name: downstream, dtype: int64" 746 | ] 747 | }, 748 | "execution_count": 9, 749 | "metadata": {}, 750 | "output_type": "execute_result" 751 | } 752 | ], 753 | "source": [ 754 | "df.downstream.value_counts().nlargest(20)" 755 | ] 756 | } 757 | ], 758 | "metadata": { 759 | "kernelspec": { 760 | "display_name": "Python 3", 761 | "language": "python", 762 | "name": "python3" 763 | }, 764 | "language_info": { 765 | "codemirror_mode": { 766 | "name": "ipython", 767 | "version": 3 768 | }, 769 | "file_extension": ".py", 770 | "mimetype": "text/x-python", 771 | "name": "python", 772 | "nbconvert_exporter": "python", 773 | "pygments_lexer": "ipython3", 774 | "version": "3.6.4" 775 | } 776 | }, 777 | "nbformat": 4, 778 | "nbformat_minor": 2 779 | } 780 | --------------------------------------------------------------------------------