├── environment.yml
├── README.md
└── conda-forge-dependencies.ipynb
/environment.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - pandas
3 | - dask
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Today I learned that conda-forge maintains a JSON file of all package
2 | dependencies on github.
3 |
4 | [github.com/regro/libcfgraph/raw/master/conda-forge.json](https://github.com/regro/libcfgraph/raw/master/conda-forge.json)
5 |
6 | This is a small notebook that downloads that json file and turns it into a
7 | Pandas dataframe. You can run it on Binder here:
8 | [](https://mybinder.org/v2/gh/mrocklin/conda-forge-dependencies/master?filepath=conda-forge-dependencies.ipynb)
9 |
--------------------------------------------------------------------------------
/conda-forge-dependencies.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Conda Forge Dependency Graph\n",
8 | "\n",
9 | "Conda forge keeps metadata about package dependencies in a JSON file on Github. It's easy to download and manipulate this data."
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Download Conda Forge Graph"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "name": "stdout",
26 | "output_type": "stream",
27 | "text": [
28 | "--2018-10-04 10:24:17-- https://github.com/regro/libcfgraph/raw/master/conda-forge.json\n",
29 | "Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113\n",
30 | "Connecting to github.com (github.com)|192.30.253.112|:443... connected.\n",
31 | "HTTP request sent, awaiting response... 302 Found\n",
32 | "Location: https://raw.githubusercontent.com/regro/libcfgraph/master/conda-forge.json [following]\n",
33 | "--2018-10-04 10:24:17-- https://raw.githubusercontent.com/regro/libcfgraph/master/conda-forge.json\n",
34 | "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.208.133\n",
35 | "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.208.133|:443... connected.\n",
36 | "HTTP request sent, awaiting response... 200 OK\n",
37 | "Length: 1581105 (1.5M) [text/plain]\n",
38 | "Saving to: ‘conda-forge.json.1’\n",
39 | "\n",
40 | "conda-forge.json.1 100%[===================>] 1.51M --.-KB/s in 0.1s \n",
41 | "\n",
42 | "2018-10-04 10:24:17 (10.8 MB/s) - ‘conda-forge.json.1’ saved [1581105/1581105]\n",
43 | "\n"
44 | ]
45 | }
46 | ],
47 | "source": [
48 | "!wget https://github.com/regro/libcfgraph/raw/master/conda-forge.json"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "## Load data into Python"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 2,
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "import json\n",
65 | "\n",
66 | "with open('conda-forge.json') as f:\n",
67 | " data = json.load(f)"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "## Construct dependency graph"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/plain": [
85 | "('cython', 'numpy', 'pip', 'python', 'python-dateutil', 'pytz')"
86 | ]
87 | },
88 | "execution_count": 3,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "dependencies = {}\n",
95 | "for x in data['nodes']:\n",
96 | " key = x['id']\n",
97 | " try:\n",
98 | " value = tuple(x['req']['elements'])\n",
99 | " except KeyError:\n",
100 | " value = ()\n",
101 | " dependencies[key] = value\n",
102 | " \n",
103 | "dependencies['pandas']"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "## Reverse dependency graph"
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 4,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "data": {
120 | "text/plain": [
121 | "['fbprophet',\n",
122 | " 'altair',\n",
123 | " 'jsontableschema-pandas',\n",
124 | " 'pyseidon',\n",
125 | " 'ogh',\n",
126 | " 'trackpy',\n",
127 | " 'reports',\n",
128 | " 'erddapy',\n",
129 | " 'ps2ff',\n",
130 | " 'qgrid']"
131 | ]
132 | },
133 | "execution_count": 4,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "from dask.core import reverse_dict\n",
140 | "dependents = reverse_dict(dependencies)\n",
141 | "list(dependents['pandas'])[:10]"
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {},
147 | "source": [
148 | "## Switch to Pandas"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 5,
154 | "metadata": {},
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/html": [
159 | "
\n",
160 | "\n",
173 | "
\n",
174 | " \n",
175 | " \n",
176 | " | \n",
177 | " downstream | \n",
178 | " upstream | \n",
179 | "
\n",
180 | " \n",
181 | " \n",
182 | " \n",
183 | " | 0 | \n",
184 | " ad3-cpp | \n",
185 | " cmake | \n",
186 | "
\n",
187 | " \n",
188 | " | 1 | \n",
189 | " ad3-cpp | \n",
190 | " eigen | \n",
191 | "
\n",
192 | " \n",
193 | " | 2 | \n",
194 | " ad3-cpp | \n",
195 | " toolchain | \n",
196 | "
\n",
197 | " \n",
198 | " | 3 | \n",
199 | " addict | \n",
200 | " python | \n",
201 | "
\n",
202 | " \n",
203 | " | 4 | \n",
204 | " addict | \n",
205 | " setuptools | \n",
206 | "
\n",
207 | " \n",
208 | "
\n",
209 | "
"
210 | ],
211 | "text/plain": [
212 | " downstream upstream\n",
213 | "0 ad3-cpp cmake\n",
214 | "1 ad3-cpp eigen\n",
215 | "2 ad3-cpp toolchain\n",
216 | "3 addict python\n",
217 | "4 addict setuptools"
218 | ]
219 | },
220 | "execution_count": 5,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "import pandas as pd\n",
227 | "L = [(k, vv) for k, v in dependencies.items() for vv in v]\n",
228 | "df = pd.DataFrame(L, columns=['downstream', 'upstream'])\n",
229 | "\n",
230 | "df.head()"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 6,
236 | "metadata": {},
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/html": [
241 | "\n",
242 | "\n",
255 | "
\n",
256 | " \n",
257 | " \n",
258 | " | \n",
259 | " upstream | \n",
260 | "
\n",
261 | " \n",
262 | " | downstream | \n",
263 | " | \n",
264 | "
\n",
265 | " \n",
266 | " \n",
267 | " \n",
268 | " | pandas | \n",
269 | " cython | \n",
270 | "
\n",
271 | " \n",
272 | " | pandas | \n",
273 | " numpy | \n",
274 | "
\n",
275 | " \n",
276 | " | pandas | \n",
277 | " pip | \n",
278 | "
\n",
279 | " \n",
280 | " | pandas | \n",
281 | " python | \n",
282 | "
\n",
283 | " \n",
284 | " | pandas | \n",
285 | " python-dateutil | \n",
286 | "
\n",
287 | " \n",
288 | " | pandas | \n",
289 | " pytz | \n",
290 | "
\n",
291 | " \n",
292 | "
\n",
293 | "
"
294 | ],
295 | "text/plain": [
296 | " upstream\n",
297 | "downstream \n",
298 | "pandas cython\n",
299 | "pandas numpy\n",
300 | "pandas pip\n",
301 | "pandas python\n",
302 | "pandas python-dateutil\n",
303 | "pandas pytz"
304 | ]
305 | },
306 | "execution_count": 6,
307 | "metadata": {},
308 | "output_type": "execute_result"
309 | }
310 | ],
311 | "source": [
312 | "dependencies = df.set_index('downstream', inplace=False)\n",
313 | "dependencies.loc['pandas']"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 7,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "data": {
323 | "text/html": [
324 | "\n",
325 | "\n",
338 | "
\n",
339 | " \n",
340 | " \n",
341 | " | \n",
342 | " downstream | \n",
343 | "
\n",
344 | " \n",
345 | " | upstream | \n",
346 | " | \n",
347 | "
\n",
348 | " \n",
349 | " \n",
350 | " \n",
351 | " | pandas | \n",
352 | " alpenglow | \n",
353 | "
\n",
354 | " \n",
355 | " | pandas | \n",
356 | " altair | \n",
357 | "
\n",
358 | " \n",
359 | " | pandas | \n",
360 | " aospy | \n",
361 | "
\n",
362 | " \n",
363 | " | pandas | \n",
364 | " axelrod | \n",
365 | "
\n",
366 | " \n",
367 | " | pandas | \n",
368 | " batman | \n",
369 | "
\n",
370 | " \n",
371 | " | pandas | \n",
372 | " bcolz | \n",
373 | "
\n",
374 | " \n",
375 | " | pandas | \n",
376 | " beakerx | \n",
377 | "
\n",
378 | " \n",
379 | " | pandas | \n",
380 | " biopandas | \n",
381 | "
\n",
382 | " \n",
383 | " | pandas | \n",
384 | " bkcharts | \n",
385 | "
\n",
386 | " \n",
387 | " | pandas | \n",
388 | " bootstrap_contrast | \n",
389 | "
\n",
390 | " \n",
391 | " | pandas | \n",
392 | " bqplot | \n",
393 | "
\n",
394 | " \n",
395 | " | pandas | \n",
396 | " caffe | \n",
397 | "
\n",
398 | " \n",
399 | " | pandas | \n",
400 | " calliope | \n",
401 | "
\n",
402 | " \n",
403 | " | pandas | \n",
404 | " category_encoders | \n",
405 | "
\n",
406 | " \n",
407 | " | pandas | \n",
408 | " cbsyst | \n",
409 | "
\n",
410 | " \n",
411 | " | pandas | \n",
412 | " cgsn_processing | \n",
413 | "
\n",
414 | " \n",
415 | " | pandas | \n",
416 | " cis | \n",
417 | "
\n",
418 | " \n",
419 | " | pandas | \n",
420 | " cluster-lensing | \n",
421 | "
\n",
422 | " \n",
423 | " | pandas | \n",
424 | " codar2netcdf | \n",
425 | "
\n",
426 | " \n",
427 | " | pandas | \n",
428 | " contact_map | \n",
429 | "
\n",
430 | " \n",
431 | " | pandas | \n",
432 | " contextily | \n",
433 | "
\n",
434 | " \n",
435 | " | pandas | \n",
436 | " courtana | \n",
437 | "
\n",
438 | " \n",
439 | " | pandas | \n",
440 | " ctd | \n",
441 | "
\n",
442 | " \n",
443 | " | pandas | \n",
444 | " cyclus | \n",
445 | "
\n",
446 | " \n",
447 | " | pandas | \n",
448 | " cymetric | \n",
449 | "
\n",
450 | " \n",
451 | " | pandas | \n",
452 | " dask-ml | \n",
453 | "
\n",
454 | " \n",
455 | " | pandas | \n",
456 | " databroker | \n",
457 | "
\n",
458 | " \n",
459 | " | pandas | \n",
460 | " datadotworld-py | \n",
461 | "
\n",
462 | " \n",
463 | " | pandas | \n",
464 | " datashader | \n",
465 | "
\n",
466 | " \n",
467 | " | pandas | \n",
468 | " datreant.data | \n",
469 | "
\n",
470 | " \n",
471 | " | ... | \n",
472 | " ... | \n",
473 | "
\n",
474 | " \n",
475 | " | pandas | \n",
476 | " bgen-reader | \n",
477 | "
\n",
478 | " \n",
479 | " | pandas | \n",
480 | " imagingreso | \n",
481 | "
\n",
482 | " \n",
483 | " | pandas | \n",
484 | " datarobot | \n",
485 | "
\n",
486 | " \n",
487 | " | pandas | \n",
488 | " jaws | \n",
489 | "
\n",
490 | " \n",
491 | " | pandas | \n",
492 | " heliopy | \n",
493 | "
\n",
494 | " \n",
495 | " | pandas | \n",
496 | " vega3 | \n",
497 | "
\n",
498 | " \n",
499 | " | pandas | \n",
500 | " pytrackmate | \n",
501 | "
\n",
502 | " \n",
503 | " | pandas | \n",
504 | " pyiron | \n",
505 | "
\n",
506 | " \n",
507 | " | pandas | \n",
508 | " pyiron_base | \n",
509 | "
\n",
510 | " \n",
511 | " | pandas | \n",
512 | " rfpimp | \n",
513 | "
\n",
514 | " \n",
515 | " | pandas | \n",
516 | " py_stringsimjoin | \n",
517 | "
\n",
518 | " \n",
519 | " | pandas | \n",
520 | " gpflow | \n",
521 | "
\n",
522 | " \n",
523 | " | pandas | \n",
524 | " pyunfold | \n",
525 | "
\n",
526 | " \n",
527 | " | pandas | \n",
528 | " ixmp | \n",
529 | "
\n",
530 | " \n",
531 | " | pandas | \n",
532 | " cyberpandas | \n",
533 | "
\n",
534 | " \n",
535 | " | pandas | \n",
536 | " datacache | \n",
537 | "
\n",
538 | " \n",
539 | " | pandas | \n",
540 | " python-geotiepoints | \n",
541 | "
\n",
542 | " \n",
543 | " | pandas | \n",
544 | " pygridtools | \n",
545 | "
\n",
546 | " \n",
547 | " | pandas | \n",
548 | " dask | \n",
549 | "
\n",
550 | " \n",
551 | " | pandas | \n",
552 | " pyomo.extras | \n",
553 | "
\n",
554 | " \n",
555 | " | pandas | \n",
556 | " cyavro | \n",
557 | "
\n",
558 | " \n",
559 | " | pandas | \n",
560 | " cyclus-build-deps | \n",
561 | "
\n",
562 | " \n",
563 | " | pandas | \n",
564 | " billingegroup | \n",
565 | "
\n",
566 | " \n",
567 | " | pandas | \n",
568 | " rnatools | \n",
569 | "
\n",
570 | " \n",
571 | " | pandas | \n",
572 | " py_entitymatching | \n",
573 | "
\n",
574 | " \n",
575 | " | pandas | \n",
576 | " tabula-py | \n",
577 | "
\n",
578 | " \n",
579 | " | pandas | \n",
580 | " metsim | \n",
581 | "
\n",
582 | " \n",
583 | " | pandas | \n",
584 | " marbles-mixins | \n",
585 | "
\n",
586 | " \n",
587 | " | pandas | \n",
588 | " pyldavis | \n",
589 | "
\n",
590 | " \n",
591 | " | pandas | \n",
592 | " axopy | \n",
593 | "
\n",
594 | " \n",
595 | "
\n",
596 | "
214 rows × 1 columns
\n",
597 | "
"
598 | ],
599 | "text/plain": [
600 | " downstream\n",
601 | "upstream \n",
602 | "pandas alpenglow\n",
603 | "pandas altair\n",
604 | "pandas aospy\n",
605 | "pandas axelrod\n",
606 | "pandas batman\n",
607 | "pandas bcolz\n",
608 | "pandas beakerx\n",
609 | "pandas biopandas\n",
610 | "pandas bkcharts\n",
611 | "pandas bootstrap_contrast\n",
612 | "pandas bqplot\n",
613 | "pandas caffe\n",
614 | "pandas calliope\n",
615 | "pandas category_encoders\n",
616 | "pandas cbsyst\n",
617 | "pandas cgsn_processing\n",
618 | "pandas cis\n",
619 | "pandas cluster-lensing\n",
620 | "pandas codar2netcdf\n",
621 | "pandas contact_map\n",
622 | "pandas contextily\n",
623 | "pandas courtana\n",
624 | "pandas ctd\n",
625 | "pandas cyclus\n",
626 | "pandas cymetric\n",
627 | "pandas dask-ml\n",
628 | "pandas databroker\n",
629 | "pandas datadotworld-py\n",
630 | "pandas datashader\n",
631 | "pandas datreant.data\n",
632 | "... ...\n",
633 | "pandas bgen-reader\n",
634 | "pandas imagingreso\n",
635 | "pandas datarobot\n",
636 | "pandas jaws\n",
637 | "pandas heliopy\n",
638 | "pandas vega3\n",
639 | "pandas pytrackmate\n",
640 | "pandas pyiron\n",
641 | "pandas pyiron_base\n",
642 | "pandas rfpimp\n",
643 | "pandas py_stringsimjoin\n",
644 | "pandas gpflow\n",
645 | "pandas pyunfold\n",
646 | "pandas ixmp\n",
647 | "pandas cyberpandas\n",
648 | "pandas datacache\n",
649 | "pandas python-geotiepoints\n",
650 | "pandas pygridtools\n",
651 | "pandas dask\n",
652 | "pandas pyomo.extras\n",
653 | "pandas cyavro\n",
654 | "pandas cyclus-build-deps\n",
655 | "pandas billingegroup\n",
656 | "pandas rnatools\n",
657 | "pandas py_entitymatching\n",
658 | "pandas tabula-py\n",
659 | "pandas metsim\n",
660 | "pandas marbles-mixins\n",
661 | "pandas pyldavis\n",
662 | "pandas axopy\n",
663 | "\n",
664 | "[214 rows x 1 columns]"
665 | ]
666 | },
667 | "execution_count": 7,
668 | "metadata": {},
669 | "output_type": "execute_result"
670 | }
671 | ],
672 | "source": [
673 | "dependents = df.set_index('upstream', inplace=False)\n",
674 | "dependents.loc['pandas']"
675 | ]
676 | },
677 | {
678 | "cell_type": "code",
679 | "execution_count": 8,
680 | "metadata": {},
681 | "outputs": [
682 | {
683 | "data": {
684 | "text/plain": [
685 | "python 2892\n",
686 | "setuptools 1893\n",
687 | "r-base 1092\n",
688 | "pip 773\n",
689 | "toolchain 728\n",
690 | "numpy 705\n",
691 | "gcc 518\n",
692 | "six 447\n",
693 | "libgcc 420\n",
694 | "scipy 330\n",
695 | "cmake 239\n",
696 | "matplotlib 239\n",
697 | "requests 229\n",
698 | "pandas 214\n",
699 | "pkg-config 188\n",
700 | "cython 158\n",
701 | "zlib 128\n",
702 | "pyyaml 125\n",
703 | "make 108\n",
704 | "r-rcpp 104\n",
705 | "Name: upstream, dtype: int64"
706 | ]
707 | },
708 | "execution_count": 8,
709 | "metadata": {},
710 | "output_type": "execute_result"
711 | }
712 | ],
713 | "source": [
714 | "df.upstream.value_counts().nlargest(20)"
715 | ]
716 | },
717 | {
718 | "cell_type": "code",
719 | "execution_count": 9,
720 | "metadata": {},
721 | "outputs": [
722 | {
723 | "data": {
724 | "text/plain": [
725 | "sage 126\n",
726 | "sagelib 55\n",
727 | "qgis 45\n",
728 | "r-essentials 37\n",
729 | "bob 33\n",
730 | "r-userfriendlyscience 33\n",
731 | "datalad 31\n",
732 | "doconce 31\n",
733 | "ncl 31\n",
734 | "steem 29\n",
735 | "octave 29\n",
736 | "caffe 29\n",
737 | "libgdal 28\n",
738 | "paraview 25\n",
739 | "r-sjstats 25\n",
740 | "cyclus-build-deps 25\n",
741 | "mss 25\n",
742 | "hyperspy 25\n",
743 | "fenics 24\n",
744 | "datacube 23\n",
745 | "Name: downstream, dtype: int64"
746 | ]
747 | },
748 | "execution_count": 9,
749 | "metadata": {},
750 | "output_type": "execute_result"
751 | }
752 | ],
753 | "source": [
754 | "df.downstream.value_counts().nlargest(20)"
755 | ]
756 | }
757 | ],
758 | "metadata": {
759 | "kernelspec": {
760 | "display_name": "Python 3",
761 | "language": "python",
762 | "name": "python3"
763 | },
764 | "language_info": {
765 | "codemirror_mode": {
766 | "name": "ipython",
767 | "version": 3
768 | },
769 | "file_extension": ".py",
770 | "mimetype": "text/x-python",
771 | "name": "python",
772 | "nbconvert_exporter": "python",
773 | "pygments_lexer": "ipython3",
774 | "version": "3.6.4"
775 | }
776 | },
777 | "nbformat": 4,
778 | "nbformat_minor": 2
779 | }
780 |
--------------------------------------------------------------------------------