├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── README.md └── _config.yml ├── environment.yml ├── python_for_data_science.pptx ├── python_for_data_science_linux64_conda_specs.txt └── source-code ├── README.md ├── altair ├── README.md └── altair_interaction.ipynb ├── db-access ├── ExperimentDB │ ├── .gitignore │ ├── README.md │ ├── create.py │ ├── experiments.py │ ├── fill.py │ ├── orm_utils.py │ ├── query.py │ ├── shell.py │ └── view.py ├── Orm │ ├── .gitignore │ ├── README.md │ ├── create_aggr_db.py │ ├── create_sqlalchemy_db.py │ ├── fill_aggr_db.py │ ├── fill_sqlalchemy_db.py │ ├── query_aggr_db.py │ ├── query_sqlalchemy_db.py │ └── update_sqlalchemy_db.py ├── README.md └── StraightSql │ ├── README.md │ ├── fill_db.py │ ├── query_db.py │ └── weather-db ├── gis ├── Data │ ├── .gitignore │ ├── Alabama.gdb │ │ ├── a00000001.TablesByName.atx │ │ ├── a00000001.gdbindexes │ │ ├── a00000001.gdbtable │ │ ├── a00000001.gdbtablx │ │ ├── a00000002.gdbtable │ │ ├── a00000002.gdbtablx │ │ ├── a00000003.gdbindexes │ │ ├── a00000003.gdbtable │ │ ├── a00000003.gdbtablx │ │ ├── a00000004.CatItemsByPhysicalName.atx │ │ ├── a00000004.CatItemsByType.atx │ │ ├── a00000004.FDO_UUID.atx │ │ ├── a00000004.gdbindexes │ │ ├── a00000004.gdbtable │ │ ├── a00000004.gdbtablx │ │ ├── a00000004.spx │ │ ├── a00000005.CatItemTypesByName.atx │ │ ├── a00000005.CatItemTypesByParentTypeID.atx │ │ ├── a00000005.CatItemTypesByUUID.atx │ │ ├── a00000005.gdbindexes │ │ ├── a00000005.gdbtable │ │ ├── a00000005.gdbtablx │ │ ├── a00000006.CatRelsByDestinationID.atx │ │ ├── a00000006.CatRelsByOriginID.atx │ │ ├── a00000006.CatRelsByType.atx │ │ ├── a00000006.FDO_UUID.atx │ │ ├── a00000006.gdbindexes │ │ ├── a00000006.gdbtable │ │ ├── a00000006.gdbtablx │ │ ├── a00000007.CatRelTypesByBackwardLabel.atx │ │ ├── a00000007.CatRelTypesByDestItemTypeID.atx │ │ ├── a00000007.CatRelTypesByForwardLabel.atx │ │ ├── a00000007.CatRelTypesByName.atx │ │ ├── a00000007.CatRelTypesByOriginItemTypeID.atx │ │ ├── a00000007.CatRelTypesByUUID.atx │ │ ├── a00000007.gdbindexes │ │ ├── a00000007.gdbtable │ │ ├── a00000007.gdbtablx │ │ ├── a00000009.gdbindexes │ │ ├── a00000009.gdbtable │ │ ├── a00000009.gdbtablx │ │ ├── a00000009.spx │ │ ├── gdb │ │ └── timestamps │ ├── USA.gdb │ │ ├── a00000001.TablesByName.atx │ │ ├── a00000001.freelist │ │ ├── a00000001.gdbindexes │ │ ├── a00000001.gdbtable │ │ ├── a00000001.gdbtablx │ │ ├── a00000002.gdbtable │ │ ├── a00000002.gdbtablx │ │ ├── a00000003.gdbindexes │ │ ├── a00000003.gdbtable │ │ ├── a00000003.gdbtablx │ │ ├── a00000004.CatItemsByPhysicalName.atx │ │ ├── a00000004.CatItemsByType.atx │ │ ├── a00000004.FDO_UUID.atx │ │ ├── a00000004.freelist │ │ ├── a00000004.gdbindexes │ │ ├── a00000004.gdbtable │ │ ├── a00000004.gdbtablx │ │ ├── a00000004.spx │ │ ├── a00000005.CatRelsByDestinationID.atx │ │ ├── a00000005.CatRelsByOriginID.atx │ │ ├── a00000005.CatRelsByType.atx │ │ ├── a00000005.FDO_UUID.atx │ │ ├── a00000005.freelist │ │ ├── a00000005.gdbindexes │ │ ├── a00000005.gdbtable │ │ ├── a00000005.gdbtablx │ │ ├── a00000006.CatRelTypesByBackwardLabel.atx │ │ ├── a00000006.CatRelTypesByDestItemTypeID.atx │ │ ├── a00000006.CatRelTypesByForwardLabel.atx │ │ ├── a00000006.CatRelTypesByName.atx │ │ ├── a00000006.CatRelTypesByOriginItemTypeID.atx │ │ ├── a00000006.CatRelTypesByUUID.atx │ │ ├── a00000006.gdbindexes │ │ ├── a00000006.gdbtable │ │ ├── a00000006.gdbtablx │ │ ├── a00000007.CatItemTypesByName.atx │ │ ├── a00000007.CatItemTypesByParentTypeID.atx │ │ ├── a00000007.CatItemTypesByUUID.atx │ │ ├── a00000007.gdbindexes │ │ ├── a00000007.gdbtable │ │ ├── a00000007.gdbtablx │ │ ├── a00000009.freelist │ │ ├── a00000009.gdbindexes │ │ ├── a00000009.gdbtable │ │ ├── a00000009.gdbtablx │ │ ├── a00000009.spx │ │ ├── a0000000a.gdbindexes │ │ ├── a0000000a.gdbtable │ │ ├── a0000000a.gdbtablx │ │ ├── a0000000a.spx │ │ ├── a0000000b.gdbindexes │ │ ├── a0000000b.gdbtable │ │ ├── a0000000b.gdbtablx │ │ ├── a0000000b.spx │ │ ├── a0000000c.gdbindexes │ │ ├── a0000000c.gdbtable │ │ ├── a0000000c.gdbtablx │ │ ├── a0000000c.spx │ │ ├── a0000000d.freelist │ │ ├── a0000000d.gdbindexes │ │ ├── a0000000d.gdbtable │ │ ├── a0000000d.gdbtablx │ │ ├── a0000000d.spx │ │ ├── gdb │ │ └── timestamps │ ├── belgium_municipalities_topojson.json │ ├── cea.tif │ ├── countries.geo.json │ └── world_happiness_2016.csv ├── README.md ├── city_center.ipynb ├── city_center.md ├── geopandas.ipynb ├── gis.ipynb ├── make_your_own_island.ipynb ├── maps.ipynb ├── shapely.ipynb └── world_happiness.ipynb ├── holoviews ├── README.md ├── data.csv ├── data_visualization.ipynb └── holoviews_demo.ipynb ├── networkx ├── README.md ├── add_random_edge_weights.py ├── compute_leaf_path_lengths.py ├── generate_random_tree.py ├── graph.txt ├── graph.xml ├── max_flow.py ├── par_compute_leaf_lengths.py ├── shortest_path.ipynb ├── topo_sort.ipynb ├── topo_sort.py └── weighted_tree.xml ├── pandas ├── .gitignore ├── README.md ├── agt_analysis.ipynb ├── agt_data │ ├── vei_t8303_160519102111_T8303.CSV │ ├── vei_t8320_160519125721_T8320.CSV │ └── vei_u9117_p1.1_160519125431_U9117.CSV ├── bokeh │ ├── README.md │ ├── bokeh_plots.ipynb │ ├── environment.yml │ └── pandas_bokeh_linux64_conda_specs.txt ├── copy_on_write.ipynb ├── data │ ├── README_DATA.rst │ ├── csv_files │ │ ├── mac.csv │ │ ├── semi_colon_sep_mac.csv │ │ ├── single_column_mac.csv │ │ ├── single_column_unix.csv │ │ ├── single_column_windows.csv │ │ ├── unix.csv │ │ └── windows.csv │ ├── genes.html │ ├── greenhouse_gaz │ │ ├── README_co2_mm_global.txt │ │ └── co2_mm_global.txt │ ├── missing_values.csv │ ├── patient_experiment.xlsx │ ├── patient_metadata.xlsx │ ├── patients.xlsx │ ├── sales-funnel.xlsx │ ├── sea_levels │ │ ├── Obtaining Tide Gauge Data.html │ │ ├── Obtaining Tide Gauge Data_files │ │ │ ├── NOC_logo_2010.png │ │ │ ├── all_190.png │ │ │ ├── banner-styles.css │ │ │ ├── bodc_logo40.png │ │ │ ├── data_explorer_2_190.png │ │ │ ├── graph.jsp │ │ │ ├── icsu_logo40.png │ │ │ ├── ioc_en_small_color.gif │ │ │ ├── nerc-long-logo-200.png │ │ │ ├── print.css │ │ │ ├── psmsl_header.gif │ │ │ ├── style(1).css │ │ │ ├── style.css │ │ │ ├── wayback-toolbar-logo.png │ │ │ ├── wm_tb_nxt_off.png │ │ │ └── wm_tb_prv_on.png │ │ ├── sl_Atlantic_Ocean.txt │ │ ├── sl_nh.txt │ │ ├── sl_ns_global.txt │ │ └── sl_sh.txt │ └── temperatures │ │ ├── GLB.Ts+dSST.txt │ │ └── annual.land_ocean.90S.90N.df_1901-2000mean.dat ├── data_generation.ipynb ├── generate_csv_files.py ├── indexing_and_querying.ipynb ├── missing_values.ipynb ├── pandas_datatypes.ipynb ├── pandas_intro.ipynb ├── patient_data.ipynb ├── patients.ipynb ├── pipes.ipynb ├── pivot_versus_pivot_table.ipynb └── screenshots │ ├── add_column.png │ ├── add_multilevel.png │ ├── cum_sum.png │ ├── cumsum.png │ ├── interpolate.png │ ├── interpolated_plot.png │ ├── pearsonr.png │ ├── pivot_aggfunc.png │ ├── pivot_table.png │ ├── plot_all_temps.png │ ├── plot_missing.png │ ├── query.png │ ├── read_excel.png │ ├── read_html.png │ └── scatter_matrix.png ├── polars ├── .gitignore ├── README.md ├── create_csv_data.py ├── create_csv_data.slurm ├── data │ ├── patient_experiment.xlsx │ └── patient_metadata.xlsx ├── patient_data.ipynb ├── polars_large_data_benchmark.ipynb └── polars_performance.ipynb ├── regexes ├── README.md └── regexes.ipynb ├── seaborn ├── Data │ └── data.txt ├── READNE.md ├── generate_data.py └── seaborn.ipynb ├── web-scraping ├── .gitignore ├── README.md ├── link_web.py └── preprocessing │ ├── README.md │ ├── openmp_faq.html │ ├── openmp_faq.jsonl │ └── preprocess_openmp_faq.py └── xarray ├── .gitignore ├── README.md └── xarray_intro.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # editor back files 107 | *~ 108 | *.swp 109 | *.bak 110 | ~$python_for_data_science.pptx 111 | 112 | # direnv file 113 | .envrc 114 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Code of conduct 4 | permalink: /code_of_conduct.html 5 | --- 6 | 7 | # Code of conduct 8 | 9 | This workshop is dedicated to providing a welcoming and supportive environment for all people, regardless of background or identity. By participating in this workshop, participants accept to abide by this Code of Conduct and accept the procedures by which any Code of Conduct incidents are resolved. We do not tolerate behavior that is disrespectful or that excludes, intimidates, or causes discomfort to others. We do not tolerate discrimination or harassment based on characteristics that include, but are not limited to, gender identity and expression, sexual orientation, disability, physical appearance, body size, citizenship, nationality, ethnic or social origin, pregnancy, familial status, veteran status, genetic information, religion or belief (or lack thereof), membership of a national minority, property, age, education, socio-economic status, technical choices, and experience level. 10 | 11 | Everyone who participates in workshop activities is required to conform to this Code of Conduct. It applies to all spaces managed by or affiliated with the workshop, including, but not limited to, workshops, email lists, and online forums such as GitHub, Slack and Twitter. Workshop hosts are expected to assist with the enforcement of the Code of Conduct. By participating, participants indicate their acceptance of the procedures by which the workshop resolves any Code of Conduct incidents, which may include storage and processing of their personal information. 12 | 13 | 14 | ## Expected behavior 15 | 16 | All participants in our events and communications are expected to show respect and courtesy to others. All interactions should be professional regardless of platform: either online or in-person. In order to foster a positive and professional learning environment we encourage the following kinds of behaviors in all workshop events and platforms: 17 | 18 | * Use welcoming and inclusive language 19 | * Be respectful of different viewpoints and experiences 20 | * Gracefully accept constructive criticism 21 | * Focus on what is best for the community 22 | * Show courtesy and respect towards other community members 23 | 24 | 25 | ## Unacceptable behavior 26 | 27 | Examples of unacceptable behavior by participants at any workshop event/platform include: 28 | 29 | * written or verbal comments which have the effect of excluding people on the basis of membership of any specific group 30 | * causing someone to fear for their safety, such as through stalking, following, or intimidation 31 | * violent threats or language directed against another person 32 | * the display of sexual or violent images 33 | * unwelcome sexual attention 34 | * nonconsensual or unwelcome physical contact 35 | * sustained disruption of talks, events or communications 36 | * insults or put downs 37 | * sexist, racist, homophobic, transphobic, ableist, or exclusionary jokes 38 | * excessive swearing 39 | * incitement to violence, suicide, or self-harm 40 | * continuing to initiate interaction (including photography or recording) with someone after being asked to stop 41 | * publication of private communication without consent 42 | * sharing recordings of training sessions with non-participants 43 | 44 | 45 | ## Consequences of Unacceptable behavior 46 | 47 | If you believe someone is violating the Code of Conduct, we ask that you report it to any of the workshop organizers. This is a community-led workshop, and we value the involvement of everyone in the community. We are committed to creating a friendly and respectful place for learning, teaching and contributing. All participants in our events and communications are expected to show respect and courtesy to others. 48 | 49 | To make clear what is expected, everyone participating in this conference and its activities is required to conform to the Code of Conduct. This Code of Conduct applies to all spaces affiliated with the conference, but not limited to, workshops, email lists, and online forums such as GitHub, Slack and Twitter. Workshop organizers are expected to assist with the enforcement of the Code of Conduct. 50 | 51 | Participants who are asked to stop any inappropriate behavior are expected to comply immediately. This applies to any workshop events and platforms, either online or in-person. If a participant engages in behavior that violates this code of conduct, the organizers may warn the offender, ask them to leave the event or platform, or investigate the Code of Conduct violation and impose appropriate sanctions. 52 | 53 | 54 | ## Attribution 55 | 56 | This code of conduct is largely mirrored from the one used by [ML4Science](https://www.ml4science.org/code-of-conduct) that in turn modelled it on the [Software Carpentry's code of conduct](https://www.google.com/url?q=https%3A%2F%2Fdocs.carpentries.org%2Ftopic_folders%2Fpolicies%2Fcode-of-conduct.html&sa=D&sntz=1&usg=AOvVaw3u6XY-Uib9k9m6Y7uxovay). 57 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | You are very welcome to contribute to this repository. 4 | 5 | 6 | ## How to contribute? 7 | 8 | This can be done in several ways, depending on how familiar you are with 9 | the GitHub workflow: 10 | 11 | * simply send an [email](geertjan.bex@uhasselt.be), 12 | * [create an issue](https://help.github.com/articles/creating-an-issue/) 13 | on GitHub, or 14 | * [fork the repository](https://help.github.com/articles/fork-a-repo/) 15 | on GitHub, create a branch based on the development 16 | branch, make your contribution, and submit a 17 | [pull request](https://help.github.com/articles/about-pull-requests/). 18 | 19 | For pull requests, a few things should be noted: 20 | 21 | * you may be asked to modify your contribution to fit better into 22 | the context, 23 | * please base them on the development branch, 24 | * please ensure your branch is in sync with the upstream development 25 | branch before submitting the pull request, 26 | * please try to use [informative commit messages](https://chris.beams.io/posts/git-commit/) 27 | (I'm not claiming that mine are shining examples, but well...), and 28 | * if you want to suggest modifications to PowerPoint presentations, 29 | please make a copy of the original presentation, modify it, and add it. 30 | 31 | Since from `git`'s perspective, PowerPoint files are just binary files, 32 | a merge can only be done using Microsoft's PowerPoint application by 33 | comparing two PowerPoint files. 34 | 35 | Of course, although all contributions are very much appreciated, some of 36 | them might not be integrated into the material. If that should happen, 37 | I'll try to justify that decision, and it can be discussed. 38 | 39 | 40 | ## Citation and licensing 41 | 42 | For contributions in general, 43 | 44 | * if you make a contribution, this will be mentioned in the 45 | [README](README.md), unless requested otherwise; 46 | * contributions will be under the same [license as this repository](LICENSE). 47 | 48 | 49 | ## What can you contribute? 50 | 51 | Basically, anything you like, e.g., 52 | 53 | * corrections of typos, 54 | * bug fixes in sample code, 55 | * additional sample code, 56 | * extra or alternative slides in presentations, 57 | * a section in a presentation, 58 | * a new subject, or aspect of an existing subject. 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python for data science 2 | 3 | GitHub repository for participants of the "Python for data science" training. 4 | For information on the training, see the website 5 | [https://gjbex.github.io/Python-for-data-science/](https://gjbex.github.io/Python-for-data-science/) 6 | 7 | 8 | ## What is it? 9 | 10 | 1. [`python_for_data_science.pptx`](python_for_data_science.pptx): PowerPoint 11 | presentation used for the training. 12 | 1. [`source-code`](source-code): sample code written to develop the slides and 13 | illustrate concepts. 14 | 1. [`environment.yml`](environment.yml): conda environment file intended to be 15 | cross-platform. 16 | 1. [`python_for_data_science_linux64_conda_specs.txt`](python_for_data_science_linux64_conda_specs.txt): 17 | conda environment specification file specific for 64-bit Linux to precisely 18 | reproduce the environment on which the code was developed. 19 | 1. [License](LICENSE): license information for the material in this repository. 20 | 1. [Contributing](CONTRIBUTING.md): information on how to contribute to this 21 | repository. 22 | 1. docs: directory containing the website for this repository. 23 | 1. [Code of conduct](CODE_OF_CONDUCT.md): when participating in this training 24 | you accept to abide by the code of conduct. 25 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | The Python programming language is increasingly popular. It is a 2 | versatile language for general purpose programming and accessible 3 | for novice programmers. However, it is also increasingly used for 4 | data science applications. This training introduces modules that 5 | are useful in that context. 6 | 7 | 8 | ## Learning outcomes 9 | 10 | When you complete this training you will 11 | 12 | * be able to use pandas to represent, compute with and query data; 13 | * be able to visualize data with seaborn and holoviews; 14 | * be able to create data visualizations with matplotlib and bokeh; 15 | * be able to parse textual information using regular expressions; 16 | * be able to interact with relational databases using SQLAlchemy; 17 | * be able to extract information from web pages using beautiful soup; 18 | * be able to represent and query geographical information using geopandas. 19 | 20 | 21 | ## Schedule 22 | 23 | Total duration: 4 hours. 24 | 25 | | Subject | Duration | 26 | |---------------------------------------------|----------| 27 | | introduction and motivation | 5 min. | 28 | | pandas & seaborn or polars & seaborn |105 min. | 29 | | coffee break | 10 min. | 30 | | text parsing with regular expressions | 40 min. | 31 | | querying relational databases | 30 min. | 32 | | web scraping | 10 min. | 33 | | geographical information with geopandas | 30 min. | 34 | | wrap up | 10 min. | 35 | 36 | 37 | ## Training materials 38 | 39 | Slides are available in the 40 | [GitHub repository](https://github.com/gjbex/Python-for-data-science), 41 | as well as example code and hands-on material. 42 | 43 | 44 | ## Target audience 45 | 46 | This training is for you if you need to use Python for data analysis. 47 | 48 | 49 | ## Prerequisites 50 | 51 | You will need experience programming in Python. This is not a training that starts 52 | from scratch. Familiarity with numpy is not required, but would be beneficial. 53 | 54 | If you plan to do Python programming in a Linux or HPC environment you should 55 | be familiar with these as well. 56 | 57 | 58 | ## Trainer(s) 59 | 60 | * Geert Jan Bex ([geertjan.bex@uhasselt.be](mailto:geertjan.bex@uhasselt.be)) 61 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /python_for_data_science.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/python_for_data_science.pptx -------------------------------------------------------------------------------- /source-code/README.md: -------------------------------------------------------------------------------- 1 | #| Source code 2 | 3 | This is source code that is either used in the presentation, or was developed 4 | to create it. There is some material not covered in the presentation as well. 5 | 6 | ## What is it? 7 | * [`altair`](altair): illustration of an interactive plot using Altair. 8 | * [`db-access`](db-access): illustration of accessing SQLite databases and 9 | using SQLAlchemy, including object-relational mapping. 10 | * [`gis`](gis): illustrations of working with geospatial data, including 11 | geopandas. 12 | * [`holoviews`](holoviews): illustrations of using HoloViews for convenient 13 | visualizations. 14 | * [`networkx`](networkx): illustration of using the networkx library for graph 15 | representation and algorithms. 16 | * [`pandas`](pandas): illustrations of using pandas and seaborn. 17 | * [`polars`](polars): Kllustrations of using polars. 18 | * [`regexes`](regexes): illustrations of using regular expressions for 19 | validation and information extraction from textual data. 20 | * [`seaborn`](seaborn): illustrations of using Seaborn to create plots. 21 | * [`web-scraping`](web-scraping): illustration of web scraping using beautiful 22 | soup and graph representation using networkx. 23 | * [`xarray`](xarray): illustrates the xarray library for pandas-like operations 24 | on multi-dimensional arrays. 25 | 26 | **Note:** material on dashboards has been moved to a [dedicated 27 | repository](https://github.com/gjbex/Python-dashboards). 28 | -------------------------------------------------------------------------------- /source-code/altair/README.md: -------------------------------------------------------------------------------- 1 | # Altair 2 | 3 | Altair is a sophisticated plotting library that lets you make highly customizalble 4 | and interactive plots. 5 | 6 | 7 | ## What is it? 8 | 9 | 1. [`altair_interaction.ipynb`](altair/altair_interaction.ipynb): create a plot with 10 | hover values and an interactive legend. 11 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/.gitignore: -------------------------------------------------------------------------------- 1 | test.db 2 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/README.md: -------------------------------------------------------------------------------- 1 | # Orm 2 | Sample object-relational mapping (ORM) code implemented in Pythno's 3 | `sqlalchemy`. Note that the database schema is not the same as that of 4 | the SQL examples in the parent directory. 5 | 6 | TODO: make schemas on slides, in SQL examples and ORM code consistent 7 | 8 | ## What is it? 9 | 1. `experiments.py`: class definitions for experiments, researchers and 10 | samples, taking into account their associations. 11 | 1. `create.py`: create database schema. 12 | 1. `fill.py`: add some data to the database, illustrating back references. 13 | 1. `view.py`: illustrate using back references when using objects. 14 | 1. `orm_utils.py`: some helper functions for more convenient ORM usage. 15 | 1. `shell.py`: a shell to work with the database using a domain specific 16 | language. 17 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/create.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from sqlalchemy import create_engine 5 | import experiments 6 | 7 | arg_parser = ArgumentParser(description='create tables in database') 8 | arg_parser.add_argument('db_name', help='name of DB to create') 9 | options = arg_parser.parse_args() 10 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 11 | experiments.Base.metadata.create_all(engine) 12 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/experiments.py: -------------------------------------------------------------------------------- 1 | '''Module that defines classes to describe experiments, reseachers and 2 | samples. Experiments can have multple researchers associated to them, 3 | and vice versa. Samples are assigned to experiments, but a sample can 4 | be used in a single experiment only. Object Relational Mapping is 5 | provided via SQLAlchemy.''' 6 | 7 | from sqlalchemy import (Column, ForeignKey, UniqueConstraint, 8 | Integer, String, DateTime, Table) 9 | from sqlalchemy.ext.declarative import declarative_base 10 | from sqlalchemy.orm import relationship, backref 11 | 12 | # Base class for all classes associated with tables in the database 13 | Base = declarative_base() 14 | 15 | # Table to hold the experiments-researchers many-to-many association 16 | staff_assignments = Table( 17 | 'staff_assignments', Base.metadata, Column( 18 | 'experiment_id', 19 | Integer, 20 | ForeignKey('experiments.experiment_id') 21 | ), 22 | Column( 23 | 'researcher_id', 24 | Integer, 25 | ForeignKey('researchers.researcher_id') 26 | ), 27 | UniqueConstraint('experiment_id', 'researcher_id') 28 | ) 29 | 30 | 31 | class Experiment(Base): 32 | '''An experiment have a description, a start date, and when finished, 33 | an end date''' 34 | __tablename__ = 'experiments' 35 | experiment_id = Column(Integer, primary_key=True) 36 | start_date = Column(DateTime, nullable=False) 37 | end_date = Column(DateTime) 38 | description = Column(String(2048), nullable=False) 39 | researchers = relationship('Researcher', secondary=staff_assignments, 40 | backref='experiments') 41 | 42 | def __str__(self): 43 | '''string representation of an experiment''' 44 | fmt_str = 'id {id:d}: {desc:s},\n\tstarted on {start}' 45 | str_repr = fmt_str.format(id=self.experiment_id, 46 | desc=self.description, 47 | start=self.start_date) 48 | if self.end_date: 49 | str_repr = '{base:s}, ended on {end}'.format(base=str_repr, 50 | end=self.end_date) 51 | return str_repr 52 | 53 | 54 | class Researcher(Base): 55 | '''A researcher has a first name, a last name, and optionally, a 56 | u-number, and description''' 57 | __tablename__ = 'researchers' 58 | researcher_id = Column(Integer, primary_key=True) 59 | u_number = Column(String(20)) 60 | first_name = Column(String(20), nullable=False) 61 | last_name = Column(String(20), nullable=False) 62 | description = Column(String(20)) 63 | 64 | def __str__(self): 65 | '''string representation of a researcher''' 66 | fmt_str = 'id {id:d}: {last:s}, {first:s}' 67 | str_repr = fmt_str.format(id=self.researcher_id, 68 | last=self.last_name, 69 | first=self.first_name) 70 | if self.u_number: 71 | str_repr = '{base} ({u_nr})'.format(base=str_repr, 72 | u_nr=self.u_number) 73 | if self.description: 74 | str_repr = '{base}: {descr}'.format(base=str_repr, 75 | descr=self.description) 76 | 77 | return str_repr 78 | 79 | 80 | class Sample(Base): 81 | '''A sample is associated to an experiment through the latter's ID, 82 | and it has a description''' 83 | __tablename__ = 'samples' 84 | sample_id = Column(Integer, primary_key=True) 85 | experiment_id = Column(Integer, ForeignKey('experiments.experiment_id')) 86 | description = Column(String, nullable=False) 87 | experiment = relationship('Experiment', backref=backref('samples')) 88 | 89 | def __str__(self): 90 | '''string representation of a sample''' 91 | fmt_str = 'id {id:d}: {descr}' 92 | str_repr = fmt_str.format(id=self.sample_id, descr=self.description) 93 | if self.experiment_id: 94 | str_repr = '{base} used in {e_id:d}'.format(base=str_repr, 95 | e_id=self.experiment_id) 96 | return str_repr 97 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/fill.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from datetime import datetime 5 | from experiments import Experiment, Researcher, Sample 6 | from orm_utils import create_session 7 | 8 | arg_parser = ArgumentParser(description='insert values into the database') 9 | arg_parser.add_argument('db_name', help='name of DB to create') 10 | options = arg_parser.parse_args() 11 | db_session = create_session(options.db_name) 12 | 13 | # create and add researchers 14 | nele = Researcher(first_name='Nele', last_name='Famaey') 15 | db_session.add(nele) 16 | heleen = Researcher(first_name='Heleen', last_name='Fehervary') 17 | db_session.add(heleen) 18 | db_session.commit() 19 | 20 | # create experiments 21 | exp1 = Experiment( 22 | start_date=datetime(2015, 10, 23, 9, 11), 23 | end_date=datetime(2015, 10, 25, 13, 43), 24 | description='first experiment' 25 | ) 26 | exp1.researchers.append(nele) 27 | exp1.samples.append(Sample(description='sample 1')) 28 | exp1.samples.append(Sample(description='sample 2')) 29 | db_session.add(exp1) 30 | exp2 = Experiment( 31 | start_date=datetime(2015, 10, 27, 9, 5), 32 | end_date=datetime(2015, 10, 28, 14, 53), 33 | description='second experiment' 34 | ) 35 | exp2.researchers.append(nele) 36 | exp2.researchers.append(heleen) 37 | exp2.samples.append(Sample(description='sample 3')) 38 | db_session.add(exp2) 39 | db_session.commit() 40 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/orm_utils.py: -------------------------------------------------------------------------------- 1 | '''Module with some convenient function for ORM usage''' 2 | 3 | from sqlalchemy import create_engine 4 | from sqlalchemy.orm import sessionmaker 5 | from experiments import Base 6 | 7 | 8 | def create_session(db_name): 9 | '''Create a SQLAlchemy database session based on the provided name''' 10 | engine = create_engine('sqlite:///{0}'.format(db_name)) 11 | Base.metadata.bind = engine 12 | DBSession = sessionmaker(bind=engine) 13 | return DBSession() 14 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from datetime import datetime 4 | 5 | 6 | def convert2date(date_str): 7 | '''convert a string representation 'yyyy-mm-dd' to a Python 8 | datetime object''' 9 | year, month, day = list(map(int, date_str.split('-'))) 10 | return datetime(year, month, day) 11 | 12 | if __name__ == '__main__': 13 | from argparse import ArgumentParser 14 | from experiments import Experiment, Researcher 15 | from orm_utils import create_session 16 | arg_parser = ArgumentParser(description='query the database') 17 | arg_parser.add_argument('db_name', help='name of DB to create') 18 | arg_parser.add_argument('--first_name', 19 | help='search for researcher by first name') 20 | arg_parser.add_argument('--started_after', 21 | help='search experiments with a start date ' 22 | 'later than the given one') 23 | options = arg_parser.parse_args() 24 | db_session = create_session(options.db_name) 25 | 26 | if options.first_name: 27 | researchers = db_session.query(Researcher).\ 28 | filter_by(first_name=options.first_name).\ 29 | all() 30 | for researcher in researchers: 31 | print(researcher) 32 | elif options.started_after: 33 | date_after = convert2date(options.started_after) 34 | experiments = db_session.query(Experiment).\ 35 | filter(Experiment.start_date > date_after).\ 36 | all() 37 | for experiment in experiments: 38 | print(experiment) 39 | -------------------------------------------------------------------------------- /source-code/db-access/ExperimentDB/view.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | if __name__ == '__main__': 5 | from argparse import ArgumentParser 6 | from experiments import Experiment, Researcher, Sample 7 | from orm_utils import create_session 8 | arg_parser = ArgumentParser(description='show experiments, researchrs, ' 9 | 'samples in the database') 10 | arg_parser.add_argument('db_name', help='name of DB to create') 11 | options = arg_parser.parse_args() 12 | db_session = create_session(options.db_name) 13 | experiments = db_session.query(Experiment).all() 14 | for experiment in experiments: 15 | print(experiment.description) 16 | for researcher in experiment.researchers: 17 | print('\t{0}, {1}'.format(researcher.last_name, 18 | researcher.first_name)) 19 | for sample in experiment.samples: 20 | print('\t{}'.format(sample.description)) 21 | samples = db_session.query(Sample).all() 22 | for sample in samples: 23 | print(sample.description) 24 | print('\t{0}'.format(sample.experiment.description)) 25 | researchers = db_session.query(Researcher).all() 26 | for researcher in researchers: 27 | print(researcher) 28 | for experiment in researcher.experiments: 29 | print('\t{}'.format(experiment.description)) 30 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/.gitignore: -------------------------------------------------------------------------------- 1 | *.sqlite 2 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/README.md: -------------------------------------------------------------------------------- 1 | # Database access 2 | SQLAlchemy also support an object-relational mapping approach, 3 | which is also illustrated by sample code. 4 | 5 | 6 | ## What is it? 7 | 1. `create_sqlalchemy_db.py`: implements two classes representing SQL 8 | tables, and creates them in an RDBMS using SQLAlchemy's 9 | object-relational mapping mechanisms 10 | 1. `fill_sqlalchemy_db.py`: adds data using SQLAlchemy's ORM mechanisms 11 | 1. `query_sqlalchemy_db.py`: queries the RDBMS using SQLAlchemy's ORM 12 | mechanisms 13 | 1. `update_sqlalchemy_db.py`: illustrates updating values using SQLAlchemy's 14 | ORM mechanisms 15 | 1. `create_aggr_db.py`: implements to classes, one representing a cities 16 | (actually the same as in `create_sqlalchemy_db.py`, but with a copy 17 | factory added), and the other 18 | represents the average temperature for each city. 19 | 1. `fill_aggr_db.py`: adds data to the tables of the new database that 20 | result from querying the first database. Note that both tables are 21 | populated, although only measurements are added explicitely. 22 | 1. `query_aggr_db.py`: perform a simple query to verify the contents of 23 | the second database. 24 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/create_aggr_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sqlalchemy import (Column, ForeignKey, UniqueConstraint, 4 | Integer, String, DateTime, Float) 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from sqlalchemy.orm import relationship 7 | 8 | Base = declarative_base() 9 | 10 | 11 | class City(Base): 12 | __tablename__ = 'cities' 13 | city_id = Column(Integer, primary_key=True) 14 | name = Column(String(100), nullable=False, unique=True) 15 | 16 | @staticmethod 17 | def copy(city): 18 | return City(name=city.name) 19 | 20 | def __str__(self): 21 | return self.name 22 | 23 | 24 | class AggrMeasurement(Base): 25 | __tablename__ = 'measurements' 26 | measurement_id = Column(Integer, primary_key=True) 27 | avg_temp = Column(Float, nullable=False) 28 | city_id = Column(Integer, ForeignKey('cities.city_id')) 29 | city = relationship(City) 30 | 31 | def __str__(self): 32 | fmt_str = '{city:s}: {time:s}\n\tT = {temp:.1f} Celsius' 33 | return fmt_str.format(city=str(self.city), 34 | temp=self.avg_temp) 35 | 36 | 37 | if __name__ == '__main__': 38 | from argparse import ArgumentParser 39 | arg_parser = ArgumentParser(description='create tables in database') 40 | arg_parser.add_argument('db_name', help='name of DB to create') 41 | options = arg_parser.parse_args() 42 | from sqlalchemy import create_engine 43 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 44 | Base.metadata.create_all(engine) 45 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/create_sqlalchemy_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sqlalchemy import (Column, ForeignKey, UniqueConstraint, 4 | Integer, String, DateTime, Float) 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from sqlalchemy.orm import relationship 7 | 8 | Base = declarative_base() 9 | 10 | 11 | class City(Base): 12 | __tablename__ = 'cities' 13 | city_id = Column(Integer, primary_key=True) 14 | name = Column(String(100), nullable=False, unique=True) 15 | 16 | def __str__(self): 17 | return self.name 18 | 19 | 20 | class Measurement(Base): 21 | __tablename__ = 'measurements' 22 | __table_args__ = ( 23 | UniqueConstraint('time', 'city_id'), 24 | ) 25 | measurement_id = Column(Integer, primary_key=True) 26 | time = Column(DateTime, nullable=False) 27 | temperature = Column(Float, nullable=False) 28 | city_id = Column(Integer, ForeignKey('cities.city_id')) 29 | city = relationship(City) 30 | 31 | def __str__(self): 32 | fmt_str = '{city:s}: {time:s}\n\tT = {temp:.1f} Celsius' 33 | return fmt_str.format(city=str(self.city), 34 | time=str(self.time), 35 | temp=self.temperature) 36 | 37 | 38 | if __name__ == '__main__': 39 | from argparse import ArgumentParser 40 | arg_parser = ArgumentParser(description='create tables in database') 41 | arg_parser.add_argument('db_name', help='name of DB to create') 42 | options = arg_parser.parse_args() 43 | from sqlalchemy import create_engine 44 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 45 | Base.metadata.create_all(engine) 46 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/fill_aggr_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == '__main__': 4 | from argparse import ArgumentParser 5 | from sqlalchemy import create_engine 6 | from sqlalchemy.orm import sessionmaker 7 | from sqlalchemy.sql import func 8 | from create_sqlalchemy_db import (Base as OrigBase, City as OrigCity, 9 | Measurement as OrigMeasurement) 10 | from create_aggr_db import (Base as AggrBase, City as AggrCity, 11 | AggrMeasurement) 12 | arg_parser = ArgumentParser(description='create tables in database') 13 | arg_parser.add_argument('-orig_db', help='database with original data') 14 | arg_parser.add_argument('-aggr_db', help='database with aggregate data') 15 | options = arg_parser.parse_args() 16 | orig_engine = create_engine('sqlite:///{0}'.format(options.orig_db)) 17 | aggr_engine = create_engine('sqlite:///{0}'.format(options.aggr_db)) 18 | OrigDBSession = sessionmaker(bind=orig_engine) 19 | AggrDBSession = sessionmaker(bind=aggr_engine) 20 | orig_db_session = OrigDBSession() 21 | aggr_db_session = AggrDBSession() 22 | results = orig_db_session \ 23 | .query(func.avg(OrigMeasurement.temperature), OrigCity) \ 24 | .group_by(OrigCity.name) \ 25 | .all() 26 | for result in results: 27 | measurement = AggrMeasurement(avg_temp=result[0], 28 | city=AggrCity.copy(result[1])) 29 | aggr_db_session.add(measurement) 30 | aggr_db_session.commit() 31 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/fill_sqlalchemy_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == '__main__': 4 | from argparse import ArgumentParser 5 | from datetime import datetime 6 | import random 7 | from sqlalchemy import create_engine 8 | from sqlalchemy.orm import sessionmaker 9 | from create_sqlalchemy_db import City, Measurement, Base 10 | arg_parser = ArgumentParser(description='create tables in database') 11 | arg_parser.add_argument('db_name', help='name of DB to create') 12 | options = arg_parser.parse_args() 13 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 14 | Base.metadata.bind = engine 15 | DBSession = sessionmaker(bind=engine) 16 | db_session = DBSession() 17 | city_names = ['New York', 'London', 'Paris'] 18 | city_list = [] 19 | for city_name in city_names: 20 | city = City(name=city_name) 21 | db_session.add(city) 22 | city_list.append(city) 23 | db_session.commit() 24 | year = 2015 25 | month = 1 26 | hour = 10 27 | minutes = 0 28 | for day in range(1, 16): 29 | date = datetime(year, month, day, hour, minutes) 30 | for city in city_list: 31 | temperature = random.uniform(0.0, 30.0) 32 | measurement = Measurement(time=date, temperature=temperature, 33 | city=city) 34 | db_session.add(measurement) 35 | db_session.commit() 36 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/query_aggr_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == '__main__': 4 | from argparse import ArgumentParser 5 | from sqlalchemy import create_engine 6 | from sqlalchemy.orm import sessionmaker 7 | from sqlalchemy.sql import func 8 | from create_aggr_db import (Base as AggrBase, City as AggrCity, 9 | AggrMeasurement) 10 | arg_parser = ArgumentParser(description='create tables in database') 11 | arg_parser.add_argument('aggr_db', help='database with aggregate data') 12 | options = arg_parser.parse_args() 13 | aggr_engine = create_engine('sqlite:///{0}'.format(options.aggr_db)) 14 | AggrDBSession = sessionmaker(bind=aggr_engine) 15 | aggr_db_session = AggrDBSession() 16 | results = aggr_db_session.query(AggrMeasurement).all() 17 | for result in results: 18 | print("{0}: {1:.2f}".format(result.city.name, result.avg_temp)) 19 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/query_sqlalchemy_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from datetime import datetime 4 | 5 | 6 | def convert2date(date_str): 7 | year, month, day = list(map(int, date_str.split('-'))) 8 | return datetime(year, month, day) 9 | 10 | if __name__ == '__main__': 11 | from argparse import ArgumentParser 12 | import sys 13 | from sqlalchemy import create_engine 14 | from sqlalchemy.orm import sessionmaker 15 | from create_sqlalchemy_db import City, Measurement, Base 16 | arg_parser = ArgumentParser(description='create tables in database') 17 | arg_parser.add_argument('db_name', help='name of DB to create') 18 | arg_parser.add_argument('-list', action='store_true', 19 | help='show a list of all cities') 20 | arg_parser.add_argument('-city', help='city to query for') 21 | arg_parser.add_argument('-start', default='1980-01-01', 22 | help='start date') 23 | arg_parser.add_argument('-end', default='2030-12-31', 24 | help='end date') 25 | options = arg_parser.parse_args() 26 | 27 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 28 | Base.metadata.bind = engine 29 | DBSession = sessionmaker(bind=engine) 30 | db_session = DBSession() 31 | cities = db_session.query(City).all() 32 | if options.list: 33 | for city in cities: 34 | print(city.name) 35 | sys.exit(0) 36 | if options.city: 37 | if options.city not in [city.name for city in cities]: 38 | msg = '### error: {0} is not in DB\n' 39 | sys.stderr.write(msg.format(options.city)) 40 | start_date = convert2date(options.start) 41 | end_date = convert2date(options.end) 42 | measurements = db_session.query(Measurement) \ 43 | .join('city') \ 44 | .filter(City.name == options.city, 45 | start_date <= Measurement.time, 46 | Measurement.time <= end_date) \ 47 | .all() 48 | for measurement in measurements: 49 | print(measurement) 50 | -------------------------------------------------------------------------------- /source-code/db-access/Orm/update_sqlalchemy_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | if __name__ == '__main__': 4 | from argparse import ArgumentParser 5 | from sqlalchemy import create_engine 6 | from sqlalchemy.orm import sessionmaker 7 | from create_sqlalchemy_db import City, Base 8 | arg_parser = ArgumentParser(description='create tables in database') 9 | arg_parser.add_argument('db_name', help='name of DB to create') 10 | options = arg_parser.parse_args() 11 | 12 | engine = create_engine('sqlite:///{0}'.format(options.db_name)) 13 | Base.metadata.bind = engine 14 | DBSession = sessionmaker(bind=engine) 15 | db_session = DBSession() 16 | cities = db_session.query(City).all() 17 | for city in cities: 18 | city.name = city.name.lower() 19 | db_session.commit() 20 | -------------------------------------------------------------------------------- /source-code/db-access/README.md: -------------------------------------------------------------------------------- 1 | Database access 2 | =============== 3 | 4 | Accessing relational database systems from Python is straighforward. The 5 | Python standard library comes with a simple in-memory relational database 6 | system SQLite via de `sqlite3` module. 7 | Other RDBMS can be accessed similarly using SQLAlchemy. 8 | 9 | However, SQLAlchemy also support an object-relational mapping approach, 10 | which is also illustrated by sample code. 11 | 12 | 13 | What is it? 14 | ----------- 15 | 1. `ExperimentDB`: Example of SQLAlchemy's object-relational mapping, 16 | illustrating declarative schema definition, bidirectional associations 17 | and back references. Also adds a shell to manipulate the database 18 | using a domain specific language. 19 | 1. `Orm`: simple illustration of SQLAlchemy's ORM approach. 20 | 1. `StraightSql`: Example of how to interact with a relation database 21 | (SQLite3) using SQL from Python. 22 | -------------------------------------------------------------------------------- /source-code/db-access/StraightSql/README.md: -------------------------------------------------------------------------------- 1 | # StraightSql 2 | Accessing relational database systems from Python is straighforward. The 3 | Python standard library comes with a simple in-memory relational database 4 | system SQLite via de `sqlite3` module. 5 | Other RDBMS can be accessed similarly using SQLAlchemy. 6 | 7 | ## What is it? 8 | 1. `fill_db.py`: this script will initialize an SQLite3 database by 9 | creating a table `weather` with three fields `city_code`, `date`, 10 | and `temperature`. The inserted values are randomly determined. 11 | 1. `query_db.py`: this script uses a database such as the one generated 12 | by the script above and cmoputes the average temperature for each 13 | city code between a given date range. 14 | 1. `weather-db`: an example SQLite database. 15 | -------------------------------------------------------------------------------- /source-code/db-access/StraightSql/fill_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from datetime import date, timedelta 5 | import random 6 | import sqlite3 7 | import string 8 | 9 | 10 | def generate_city_codes(nr_cities, code_length=4): 11 | cities = [] 12 | for city_nr in range(nr_cities): 13 | cities.append(''.join([random.choice(string.letters) 14 | for i in range(code_length)])) 15 | return cities 16 | 17 | 18 | def convert_date(date_str): 19 | (year, month, day) = date_str.split('-') 20 | return date(int(year), int(month), int(day)) 21 | 22 | 23 | def date_xrange(start_str, end_str): 24 | start_date = convert_date(start_str) 25 | end_date = convert_date(end_str) 26 | delta = timedelta(days=1) 27 | curr_date = start_date 28 | while curr_date <= end_date: 29 | yield curr_date 30 | curr_date += delta 31 | 32 | 33 | def generate_data(nr_cities, start_str, end_str): 34 | city_codes = generate_city_codes(nr_cities=nr_cities) 35 | for curr_date in date_xrange(start_str, end_str): 36 | for city_code in city_codes: 37 | yield (city_code, curr_date, random.gauss(10.0, 15.0)) 38 | 39 | 40 | def main(): 41 | arg_parser = ArgumentParser(description='Randomly generate Sqlite ' 42 | 'database with weather data') 43 | arg_parser.add_argument('--nr_cities', action='store', type=int, 44 | default=3, help='number of cities') 45 | arg_parser.add_argument('--start', action='store', default='2012-01-01', 46 | help='date to start weather data') 47 | arg_parser.add_argument('--end', action='store', default='2012-01-31', 48 | help='date to end weather data') 49 | arg_parser.add_argument('--db', action='store', required=True, 50 | help='name of the database to use') 51 | options = arg_parser.parse_args() 52 | conn = sqlite3.connect(options.db) 53 | cursor = conn.cursor() 54 | cursor.execute('''CREATE TABLE IF NOT EXISTS weather ( 55 | city_code TEXT NOT NULL, 56 | date TEXT NOT NULL, 57 | temperature REAL NOT NULL)''') 58 | for data in generate_data(options.nr_cities, options.start, 59 | options.end): 60 | cursor.execute('''INSERT INTO weather 61 | (city_code, date, temperature) 62 | VALUES (?, ?, ?)''', 63 | data) 64 | print('inserted {0}, {1}, {2}'.format(data[0], data[1], data[2])) 65 | conn.commit() 66 | cursor.close() 67 | conn.close() 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /source-code/db-access/StraightSql/query_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import sqlite3 5 | 6 | 7 | def main(): 8 | arg_parser = ArgumentParser(description='Compute average temperatures') 9 | arg_parser.add_argument('--start', action='store', default='2012-01-01', 10 | help='date to start weather data') 11 | arg_parser.add_argument('--end', action='store', default='2012-01-31', 12 | help='date to end weather data') 13 | arg_parser.add_argument('--db', action='store', required=True, 14 | help='name of the database to use') 15 | options = arg_parser.parse_args() 16 | conn = sqlite3.connect(options.db) 17 | conn.row_factory = sqlite3.Row 18 | cursor = conn.cursor() 19 | cursor.execute('''SELECT city_code, 20 | AVG(temperature) AS 'temperature' 21 | FROM weather 22 | WHERE date BETWEEN ? AND ? 23 | GROUP BY city_code''', 24 | (options.start, options.end)) 25 | for row in cursor: 26 | print('{city}\t{tmp}'.format(city=row['city_code'], 27 | tmp=row['temperature'])) 28 | cursor.close() 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /source-code/db-access/StraightSql/weather-db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/db-access/StraightSql/weather-db -------------------------------------------------------------------------------- /source-code/gis/Data/.gitignore: -------------------------------------------------------------------------------- 1 | city_centers.json 2 | city_centers.csv 3 | -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000001.TablesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000001.TablesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000001.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000001.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000001.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000001.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000001.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000001.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000002.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000002.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000002.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000002.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000003.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000003.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000003.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000003.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000003.gdbtablx: -------------------------------------------------------------------------------- 1 | $  -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.CatItemsByPhysicalName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000004.CatItemsByPhysicalName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.CatItemsByType.atx: -------------------------------------------------------------------------------- 1 | {70737809-852C-4A03-9E22-2CECEA5B9BFA}{C673FE0F-7280-404F-8532-20755DD8FC06}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.FDO_UUID.atx: -------------------------------------------------------------------------------- 1 | {070D8E3E-9BE4-4F09-95D2-2831D354AAB4}{0BAEF130-2BD2-4FA6-AD04-1C87A674BC17}{3961E754-5BB7-4EAB-BB18-68553F911868}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000004.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000004.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000004.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000004.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000004.spx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.CatItemTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000005.CatItemTypesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.CatItemTypesByParentTypeID.atx: -------------------------------------------------------------------------------- 1 | "  !" 2 | {00000000-0000-0000-0000-000000000000}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&" -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.CatItemTypesByUUID.atx: -------------------------------------------------------------------------------- 1 | " "  2 | !  {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{35B601F7-45CE-4AFF-ADB7-7702D3839B12}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{5ED667A3-9CA9-44A2-8029-D95BF23704B9}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{77292603-930F-475D-AE4F-B8970F42F394}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{787BEA35-4A86-494F-BB48-500B96145B58}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8C368B12-A12E-4C7E-9638-C9C64E69E98F}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{B606A7E1-FA5B-439C-849C-6E9C2481537B}{C29DA988-8C3E-45F7-8B5C-18E51EE7BEB4}{C673FE0F-7280-404F-8532-20755DD8FC06}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D5A40288-029E-4766-8C81-DE3F61129371}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{DB1B697A-3BB6-426A-98A2-6EE7A4C6AED3}{DC64B6E4-DC0F-43BD-B4F5-F22385DCF055}{DC9EF677-1AA3-45A7-8ACD-303A5202D0DC}{E6302665-416B-44FA-BE33-4E15916BA101}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F8413DCB-2248-4935-BFE9-315F397E5110}{FBDD7DD6-4A25-40B7-9A1A-ECC3D1172447}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&" -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000005.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000005.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000005.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000005.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.CatRelsByDestinationID.atx: -------------------------------------------------------------------------------- 1 | {070D8E3E-9BE4-4F09-95D2-2831D354AAB4}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.CatRelsByOriginID.atx: -------------------------------------------------------------------------------- 1 | {0BAEF130-2BD2-4FA6-AD04-1C87A674BC17}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.CatRelsByType.atx: -------------------------------------------------------------------------------- 1 | {DC78F1AB-34E4-43AC-BA47-1C4EABD0E7C7}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.FDO_UUID.atx: -------------------------------------------------------------------------------- 1 | {335F1024-AD90-478A-BFC7-02B52ADCC936}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000006.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000006.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000006.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000006.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByBackwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByBackwardLabel.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByDestItemTypeID.atx: -------------------------------------------------------------------------------- 1 |     2 |  {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByForwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByForwardLabel.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx: -------------------------------------------------------------------------------- 1 |    2 | {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.CatRelTypesByUUID.atx: -------------------------------------------------------------------------------- 1 |   2 | {0D10B3A7-2F64-45E6-B7AC-2FC27BF2133C}{17E08ADB-2B31-4DCD-8FDD-DF529E88F843}{55D2F4DC-CB17-4E32-A8C7-47591E8C71DE}{583A5BAA-3551-41AE-8AA8-1185719F3889}{5DD0C1AF-CB3D-4FEA-8C51-CB3BA8D77CDB}{5F9085E0-788F-4354-AE3C-34C83A7EA784}{725BADAB-3452-491B-A795-55F32D67229C}{79CC71C8-B7D9-4141-9014-B6373E236ABB}{8DB31AF1-DF7C-4632-AA10-3CC44B0C6914}{908A4670-1111-48C6-8269-134FDD3FE617}{A1633A59-46BA-4448-8706-D8ABE2B2B02E}{B32B8563-0B96-4D32-92C4-086423AE9962}{CC28387C-441F-4D7C-A802-41A160317FE0}{D022DE33-45BD-424C-88BF-5B1B6B957BD3}{D088B110-190B-4229-BDF7-89FDDD14D1EA}{DC739A70-9B71-41E8-868C-008CF46F16D7}{DC78F1AB-34E4-43AC-BA47-1C4EABD0E7C7}{E79B44E3-F833-4B12-90A1-364EC4DDC43E}& -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000007.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000007.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000009.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000009.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000009.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000009.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/a00000009.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/a00000009.spx -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/gdb: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /source-code/gis/Data/Alabama.gdb/timestamps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/Alabama.gdb/timestamps -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000001.TablesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000001.TablesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000001.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000001.freelist -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000001.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000001.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000001.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000001.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000001.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000001.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000002.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000002.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000002.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000002.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000003.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000003.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000003.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000003.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000003.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000003.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.CatItemsByPhysicalName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.CatItemsByPhysicalName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.CatItemsByType.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.CatItemsByType.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.FDO_UUID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.FDO_UUID.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.freelist -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000004.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000004.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.CatRelsByDestinationID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.CatRelsByDestinationID.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.CatRelsByOriginID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.CatRelsByOriginID.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.CatRelsByType.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.CatRelsByType.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.FDO_UUID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.FDO_UUID.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.freelist -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000005.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000005.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByBackwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByBackwardLabel.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByDestItemTypeID.atx: -------------------------------------------------------------------------------- 1 |   2 | {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByForwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByForwardLabel.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByOriginItemTypeID.atx: -------------------------------------------------------------------------------- 1 |  2 |     {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.CatRelTypesByUUID.atx: -------------------------------------------------------------------------------- 1 |  2 |    {0D10B3A7-2F64-45E6-B7AC-2FC27BF2133C}{17E08ADB-2B31-4DCD-8FDD-DF529E88F843}{55D2F4DC-CB17-4E32-A8C7-47591E8C71DE}{583A5BAA-3551-41AE-8AA8-1185719F3889}{5DD0C1AF-CB3D-4FEA-8C51-CB3BA8D77CDB}{5F9085E0-788F-4354-AE3C-34C83A7EA784}{725BADAB-3452-491B-A795-55F32D67229C}{79CC71C8-B7D9-4141-9014-B6373E236ABB}{8DB31AF1-DF7C-4632-AA10-3CC44B0C6914}{908A4670-1111-48C6-8269-134FDD3FE617}{A1633A59-46BA-4448-8706-D8ABE2B2B02E}{B32B8563-0B96-4D32-92C4-086423AE9962}{CC28387C-441F-4D7C-A802-41A160317FE0}{D022DE33-45BD-424C-88BF-5B1B6B957BD3}{D088B110-190B-4229-BDF7-89FDDD14D1EA}{DC739A70-9B71-41E8-868C-008CF46F16D7}{DC78F1AB-34E4-43AC-BA47-1C4EABD0E7C7}{E79B44E3-F833-4B12-90A1-364EC4DDC43E}& -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000006.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000006.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.CatItemTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000007.CatItemTypesByName.atx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.CatItemTypesByParentTypeID.atx: -------------------------------------------------------------------------------- 1 | " !" 2 |   {00000000-0000-0000-0000-000000000000}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&  -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.CatItemTypesByUUID.atx: -------------------------------------------------------------------------------- 1 | "  !  2 |  "{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{35B601F7-45CE-4AFF-ADB7-7702D3839B12}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{5ED667A3-9CA9-44A2-8029-D95BF23704B9}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{77292603-930F-475D-AE4F-B8970F42F394}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{787BEA35-4A86-494F-BB48-500B96145B58}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8C368B12-A12E-4C7E-9638-C9C64E69E98F}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{B606A7E1-FA5B-439C-849C-6E9C2481537B}{C29DA988-8C3E-45F7-8B5C-18E51EE7BEB4}{C673FE0F-7280-404F-8532-20755DD8FC06}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D5A40288-029E-4766-8C81-DE3F61129371}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{DB1B697A-3BB6-426A-98A2-6EE7A4C6AED3}{DC64B6E4-DC0F-43BD-B4F5-F22385DCF055}{DC9EF677-1AA3-45A7-8ACD-303A5202D0DC}{E6302665-416B-44FA-BE33-4E15916BA101}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F8413DCB-2248-4935-BFE9-315F397E5110}{FBDD7DD6-4A25-40B7-9A1A-ECC3D1172447}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&  -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000007.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000007.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000007.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000007.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000009.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000009.freelist -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000009.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000009.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000009.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000009.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000009.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000009.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a00000009.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a00000009.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000a.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000a.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000a.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000a.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000a.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000a.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000a.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000a.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000b.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000b.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000b.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000b.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000b.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000b.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000b.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000b.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000c.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000c.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000c.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000c.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000c.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000c.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000c.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000c.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000d.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000d.freelist -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000d.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000d.gdbindexes -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000d.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000d.gdbtable -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000d.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000d.gdbtablx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/a0000000d.spx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/a0000000d.spx -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/gdb: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /source-code/gis/Data/USA.gdb/timestamps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/USA.gdb/timestamps -------------------------------------------------------------------------------- /source-code/gis/Data/cea.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/gis/Data/cea.tif -------------------------------------------------------------------------------- /source-code/gis/README.md: -------------------------------------------------------------------------------- 1 | # Gis 2 | Sample code for processing GIS data using fiona for I/O. 3 | 4 | ## What is it? 5 | 1. `geopandas.ipynb`: Jupyter notebook illustrating the capabilities of 6 | GeoPandas data structures and algorithms. 7 | 1. `gis.ipynb`: Jupyter notebook with sample code that processes various 8 | GIS data formats: shape files using Fiona, GeoJSON files, and 9 | GeoTIFF files using `GDAL`. 10 | 1. `make_your_own_world.ipynb`: Jupyter notebook illustrating how to use 11 | Shapely for geometric computations to create geographial information. 12 | 1. `maps.ipynb`: Jupyter notebook illustrating some more use cases of 13 | Folium. 14 | 1. `shapely.ipynb`: Jupyter notebook illustrating how to work with 15 | Shapely objects and algorithms. 16 | 1. `world_happiness.ipynb`: jupyter notebook illustrating how to create 17 | a chloropleth map using `Folium` and GeoJSON information. 18 | 1, `city_center.ipynb`: GeoPoandas application to approximate the 19 | center of Belgian municipalities by the centroid of their shape. 20 | 1. `Data`: shape, GeoJSON and GeoTIFF files, as well as the World 21 | Happiness data for 2016. 22 | -------------------------------------------------------------------------------- /source-code/gis/city_center.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.2' 9 | jupytext_version: 1.5.2 10 | kernelspec: 11 | display_name: Python 3 12 | language: python 13 | name: python3 14 | --- 15 | 16 | # City centers 17 | 18 | 19 | ## Reading files 20 | 21 | 22 | Load the required modules 23 | 24 | ```python 25 | import geopandas as gpd 26 | %matplotlib inline 27 | ``` 28 | 29 | Load a GeoJSON file containing GeoJSON file. 30 | 31 | ```python 32 | belgium = gpd.read_file('https://gist.githubusercontent.com/jandot/ba7eff2e15a38c6f809ba5e8bd8b6977/raw/eb49ce8dd2604e558e10e15d9a3806f114744e80/belgium_municipalities_topojson.json') 33 | ``` 34 | 35 | Get info on the dataframe. 36 | 37 | ```python 38 | belgium.info() 39 | ``` 40 | 41 | Set the municipaolities `CODE_INS` as the index for the dataframe. 42 | 43 | ```python 44 | belgium.set_index('CODE_INS', inplace=True) 45 | ``` 46 | 47 | Find the entry for Hasselt. 48 | 49 | ```python 50 | belgium.query('ADMUNADU == "HASSELT"') 51 | ``` 52 | 53 | Get the geomerty for Belgium. It is returned as a Shapely object, a `Polygon` in this case. 54 | 55 | ```python 56 | hasselt_shape = belgium.at['71022', 'geometry'] 57 | ``` 58 | 59 | ```python 60 | hasselt_shape 61 | ``` 62 | 63 | ## Centroids 64 | 65 | 66 | Although it is possible to obtain the coordinates of city centers online, this data is not freely available. We can approximate the location by computing the centroid of the geometric shape associdatted with the municipality. 67 | 68 | ```python 69 | hasselt_shape.centroid 70 | ``` 71 | 72 | ```python 73 | belgium['center'] = belgium.geometry.centroid 74 | ``` 75 | 76 | ```python 77 | belgium 78 | ``` 79 | 80 | Write the coordinates to a JSON file. 81 | 82 | ```python 83 | with open('Data/city_centers.json', 'w') as file: 84 | print(gpd.GeoSeries(belgium.center).to_json(), file=file) 85 | ``` 86 | 87 | Plot the centers on top of the map of Belgium. 88 | 89 | ```python 90 | centers = gpd.GeoDataFrame(geometry=belgium.center) 91 | ``` 92 | 93 | ```python 94 | axes = centers.plot(markersize=1, color='red', figsize=(12, 15)) 95 | _ = belgium.geometry.plot(ax=axes, color='white', edgecolor='black', alpha=0.5) 96 | ``` 97 | 98 | ```python 99 | centers['longitude'] = centers.geometry.x 100 | centers['latitude'] = centers.geometry.y 101 | ``` 102 | 103 | ```python 104 | centers.info() 105 | ``` 106 | 107 | ```python 108 | centers[['longitude', 'latitude']].to_csv('Data/city_centers.csv') 109 | ``` 110 | -------------------------------------------------------------------------------- /source-code/holoviews/README.md: -------------------------------------------------------------------------------- 1 | HoloViews 2 | ========= 3 | 4 | HoloViews is a Python library for visual data explorations that works 5 | very well in the context of iPython notebooks. In combination with 6 | pandas, it provides a nice platform for data science. 7 | 8 | What is it? 9 | ----------- 10 | 1. `data_visualization.ipynb`: notebook illustrating various data analysis 11 | tasks and types of visualization. 12 | 1. `data.csv`: data set used for illustration purposes. 13 | 1. `holowviews_demo.ipynb`: notebook illustrating some different 14 | visualization types. 15 | -------------------------------------------------------------------------------- /source-code/networkx/README.md: -------------------------------------------------------------------------------- 1 | # NetworkX 2 | NetworkX is an excellent library that implements graph representations 3 | and algorithms. Note is is not part of Python's standard library. 4 | 5 | ## What is it? 6 | 1. `generate_random_tree.py`: illustration of how to use the NetworkX 7 | DiGraph class to represent a tree, generates a random tree according 8 | to specifications 9 | 1. `add_random_weights.py`: reads a GraphML representation of a tree, and 10 | addds random weights to the edges 11 | 1. `compute_leaf_path_length.py`: compute the length of all paths from the 12 | tree's root to each of the leaves 13 | 1. `par_compute_leaf_length.py`: compute the length of all paths 14 | from the tree's root to each of the leaves, uses `multiprocessing` for 15 | parallel processing 16 | 1. `graph.xml`: example GraphML representation of a tree 17 | 1. `graph.txt`: example edge list representation of a tree 18 | 1. `weighted_tree.xml`: example GraphML representation of a weighted tree 19 | 1. `max_flow.py`: illustration of the maximum flow algorithm on a very 20 | simple case of three parallel flows. 21 | 1. `shortest_path.ipynb`: Jupyter notebook illustrating the shortest path 22 | algorithm. 23 | -------------------------------------------------------------------------------- /source-code/networkx/add_random_edge_weights.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import random 5 | import sys 6 | import networkx as nx 7 | 8 | 9 | def add_edge_weights(tree): 10 | _add_edge_weights(tree, '1') 11 | 12 | 13 | def _add_edge_weights(tree, node): 14 | for child in tree.neighbors_iter(node): 15 | tree[node][child]['weight'] = random.random() 16 | _add_edge_weights(tree, child) 17 | 18 | 19 | def main(): 20 | arg_parser = ArgumentParser(description='add edge weights to tree') 21 | arg_parser.add_argument('--input', required=True, 22 | help='inpput file') 23 | arg_parser.add_argument('--output', required=True, 24 | help='outpput file') 25 | arg_parser.add_argument('--seed', type=int, default=None, 26 | help='seed for random number generator') 27 | arg_parser.add_argument('--delim', dest='delimiter', default=' ', 28 | help='delimiter for edge list') 29 | arg_parser.add_argument('--no-data', action='store_true', 30 | dest='no_data', help='show edge data') 31 | arg_parser.add_argument('--edge-list', action='store_true', 32 | help='generate edge list output') 33 | options = arg_parser.parse_args() 34 | random.seed(options.seed) 35 | tree = nx.read_graphml(options.input) 36 | add_edge_weights(tree) 37 | if options.edge_list: 38 | nx.write_edgelist(tree, options.output, 39 | delimiter=options.delimiter, 40 | data=not options.no_data) 41 | else: 42 | nx.write_graphml(tree, options.output) 43 | return 0 44 | 45 | if __name__ == '__main__': 46 | status = main() 47 | sys.exit(status) 48 | -------------------------------------------------------------------------------- /source-code/networkx/compute_leaf_path_lengths.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import sys 5 | import networkx as nx 6 | 7 | 8 | def is_leaf(tree, node): 9 | return len(tree.neighbors(node)) == 0 10 | 11 | 12 | def leaf_path_lengths(tree): 13 | path_lengths = nx.shortest_path_length(tree, '1', weight='weight') 14 | remove = [] 15 | for node in path_lengths: 16 | if not is_leaf(tree, node): 17 | remove.append(node) 18 | for node in remove: 19 | del path_lengths[node] 20 | return path_lengths 21 | 22 | 23 | def main(): 24 | arg_parser = ArgumentParser(description='compute leaf path lengths') 25 | arg_parser.add_argument('--input', required=True, help='input file') 26 | options = arg_parser.parse_args() 27 | tree = nx.read_graphml(options.input) 28 | path_lengths = leaf_path_lengths(tree) 29 | for leaf in path_lengths: 30 | print('{0}: {1:.4f}'.format(leaf, path_lengths[leaf])) 31 | return 0 32 | 33 | if __name__ == '__main__': 34 | status = main() 35 | sys.exit(status) 36 | -------------------------------------------------------------------------------- /source-code/networkx/generate_random_tree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import random 5 | import sys 6 | import networkx as nx 7 | 8 | 9 | def random_tree(max_branch, max_height): 10 | G = nx.DiGraph() 11 | random_subtree(G, None, max_branch, max_height) 12 | return G 13 | 14 | 15 | def random_subtree(G, root, max_branch, max_height): 16 | if max_height: 17 | if root: 18 | nr_branches = random.randrange(0, max_branch + 1) 19 | for i in range(1, nr_branches + 1): 20 | node = root + '.' + str(i) 21 | G.add_edge(root, node) 22 | random_subtree(G, node, max_branch, max_height - 1) 23 | else: 24 | node = '1' 25 | random_subtree(G, node, max_branch, max_height - 1) 26 | 27 | 28 | def main(): 29 | arg_parser = ArgumentParser(description='generate random tree') 30 | arg_parser.add_argument('--output', required=True, 31 | help='output file name') 32 | arg_parser.add_argument('--branching', dest='max_branch', type=int, 33 | default=3, help='maximum node branching') 34 | arg_parser.add_argument('--height', dest='max_height', type=int, 35 | default=4, help='maximum tree height') 36 | arg_parser.add_argument('--seed', type=int, default=None, 37 | help='seed for random number generator') 38 | arg_parser.add_argument('--delim', dest='delimiter', default=' ', 39 | help='delimiter for edge list') 40 | arg_parser.add_argument('--no-data', action='store_true', 41 | dest='no_data', help='show edge data') 42 | arg_parser.add_argument('--edge-list', action='store_true', 43 | dest='edge_list', 44 | help='generate edge list output') 45 | options = arg_parser.parse_args() 46 | random.seed(options.seed) 47 | tree = random_tree(options.max_branch, options.max_height) 48 | if options.edge_list: 49 | nx.write_edgelist(tree, options.output, 50 | delimiter=options.delimiter, 51 | data=not options.no_data) 52 | else: 53 | nx.write_graphml(tree, options.output) 54 | return 0 55 | 56 | if __name__ == '__main__': 57 | status = main() 58 | sys.exit(status) 59 | -------------------------------------------------------------------------------- /source-code/networkx/graph.txt: -------------------------------------------------------------------------------- 1 | root child1 {'span': 1.3} 2 | child1 grandchild1 {'span': 0.4} 3 | root child2 {'span': 0.7} 4 | child2 grandchild2 {'span': 0.8} 5 | child2 grandchild3 {'span': 0.2} 6 | grandchild3 greatgrandchild1 {'span': 0.1} 7 | -------------------------------------------------------------------------------- /source-code/networkx/graph.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 1.3 13 | 14 | 15 | 0.4 16 | 17 | 18 | 0.2 19 | 20 | 21 | 0.7 22 | 23 | 24 | 0.8 25 | 26 | 27 | 0.1 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /source-code/networkx/max_flow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import networkx as nx 5 | import random 6 | 7 | 8 | def create_graph(nr_nodes_per_layer=3): 9 | alpha = 3 10 | beta = 0.5 11 | G = nx.DiGraph() 12 | source = 0 13 | sink = 2*nr_nodes_per_layer + 1 14 | fmt_str = 'capacity {0} -> {1}: {2:.3f}' 15 | # from source to first layter 16 | for i in range(1, nr_nodes_per_layer + 1): 17 | capacity = random.gammavariate(alpha, beta) 18 | G.add_edge(source, i, capacity=capacity) 19 | print(fmt_str.format(source, i, capacity)) 20 | # from layter 1 to layer 2 21 | for i in range(1, nr_nodes_per_layer + 1): 22 | j = i + nr_nodes_per_layer 23 | capacity = random.gammavariate(alpha, beta) 24 | G.add_edge(i, j, capacity=capacity) 25 | print(fmt_str.format(i, j, capacity)) 26 | # rom layer 2 to sink 27 | for i in range(nr_nodes_per_layer + 1, 2*nr_nodes_per_layer + 1): 28 | capacity = random.gammavariate(alpha, beta) 29 | G.add_edge(i, sink, capacity=capacity) 30 | print(fmt_str.format(i, sink, capacity)) 31 | return G, source, sink 32 | 33 | 34 | def print_flow_dict(G, flow_dict): 35 | for edge in G.edges_iter(): 36 | i, j = edge 37 | print('flow {0} -> {1}: {2:.3f}'.format(i, j, flow_dict[i][j])) 38 | 39 | if __name__ == '__main__': 40 | arg_parser = ArgumentParser(description='experiment with maximum flow ' 41 | 'algorithm') 42 | arg_parser.add_argument('--n', type=int, help='number of nodes/layer') 43 | options = arg_parser.parse_args() 44 | G, source, sink = create_graph(options.n) 45 | flow_value, flow_dict = nx.maximum_flow(G, source, sink) 46 | print('value = {0:.3f}'.format(flow_value)) 47 | print_flow_dict(G, flow_dict) 48 | -------------------------------------------------------------------------------- /source-code/networkx/par_compute_leaf_lengths.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from multiprocessing import Pool 5 | import networkx as nx 6 | 7 | pool = Pool(processes=2) 8 | 9 | 10 | def _is_leaf(tree, node): 11 | return len(tree.neighbors(node)) == 0 12 | 13 | 14 | def _leaf_path_lengths(tree, node): 15 | global pool 16 | if _is_leaf(tree, node): 17 | return {node: 0.0} 18 | else: 19 | path_lengths = {} 20 | results = {} 21 | for child in tree.neighbors_iter(node): 22 | results[child] = _leaf_path_lengths(tree, child) 23 | for child in tree.neighbors_iter(node): 24 | weight = tree[node][child]['weight'] 25 | # lengths = results[child].get() 26 | lengths = results[child] 27 | for leaf in lengths: 28 | path_lengths[leaf] = lengths[leaf] + weight 29 | return path_lengths 30 | 31 | 32 | def leaf_path_lengths(tree): 33 | return _leaf_path_lengths(tree, '1') 34 | 35 | if __name__ == '__main__': 36 | arg_parser = ArgumentParser(description='compute leaf path lengths') 37 | arg_parser.add_argument('--input', required=True, help='input file') 38 | arg_parser.add_argument('--pool-size', default=1, 39 | dest='pool_size', type=int, 40 | help='pool size for parallel processing') 41 | options = arg_parser.parse_args() 42 | tree = nx.read_graphml(options.input) 43 | path_lengths = leaf_path_lengths(tree) 44 | for leaf in path_lengths: 45 | print('{0}: {1:.4f}'.format(leaf, path_lengths[leaf])) 46 | -------------------------------------------------------------------------------- /source-code/networkx/topo_sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | import networkx as nx 5 | 6 | 7 | class Reference(object): 8 | 9 | def __init__(self, author, title, journal, year): 10 | self._author = author 11 | self._title = title 12 | self._journal = journal 13 | self._year = year 14 | self._key = None 15 | 16 | @property 17 | def author(self): 18 | return self._author 19 | 20 | @property 21 | def title(self): 22 | return self._title 23 | 24 | @property 25 | def journal(self): 26 | return self._journal 27 | 28 | @property 29 | def year(self): 30 | return self._year 31 | 32 | @property 33 | def key(self): 34 | return self._key 35 | 36 | @key.setter 37 | def key(self, ref_key): 38 | self._key = ref_key 39 | 40 | def format(self): 41 | fmt_str = '[{key}] {author} "{title}", {journal}, {year}' 42 | return fmt_str.format(key=self.key, author=self.author, 43 | title=self.title, journal=self.journal, 44 | year=self.year) 45 | def __str__(self): 46 | return self.key 47 | 48 | 49 | def create_refs(): 50 | refs = { 51 | 'A01': Reference( 52 | author='Alice', 53 | title='Encryption with blowfish', 54 | journal='Security research', 55 | year=2001,), 56 | 'B02a': Reference( 57 | author='Bob', 58 | title='Applying blowfish for fun and profit', 59 | journal='Security research', 60 | year=2002,), 61 | 'B02b': Reference( 62 | author='Bob', 63 | title='Applying blowfish for email encryption', 64 | journal='Security research', 65 | year=2002,), 66 | 'C03': Reference( 67 | author='Carol', 68 | title='Pufferfish: a new encryption algorithm', 69 | journal='Notices of the Luxemburg Academy of Sciences', 70 | year=2003,), 71 | 'E02': Reference( 72 | author='Eve', 73 | title='Hacking blowfish', 74 | journal='Hacker magazine', 75 | year=2002,), 76 | 'T04': Reference( 77 | author='Trent', 78 | title='Review of modern encryption systems', 79 | journal='ACM Research on Security', 80 | year=2004,), 81 | 'A03': Reference( 82 | author='Alice', 83 | title='Proving blowfish to be mathemattically secure', 84 | journal='Security research', 85 | year=2003,), 86 | } 87 | for key in refs: 88 | refs[key].key = key 89 | return refs 90 | 91 | 92 | def create_graph(): 93 | refs = create_refs() 94 | citations = nx.DiGraph() 95 | citations.add_edge(refs['A01'], refs['B02a']) 96 | citations.add_edge(refs['A01'], refs['B02b']) 97 | citations.add_edge(refs['A01'], refs['E02']) 98 | citations.add_edge(refs['A01'], refs['A03']) 99 | citations.add_edge(refs['A01'], refs['T04']) 100 | citations.add_edge(refs['B02a'], refs['B02b']) 101 | citations.add_edge(refs['B02a'], refs['T04']) 102 | citations.add_edge(refs['E02'], refs['T04']) 103 | citations.add_node(refs['C03']) 104 | return citations 105 | 106 | 107 | def show_sorted(G): 108 | nodes = nx.algorithms.topological_sort(G) 109 | print('\n'.join([n.format() for n in nodes])) 110 | 111 | if __name__ == '__main__': 112 | arg_parser = ArgumentParser(description='illustration of topological ' 113 | 'sort') 114 | options = arg_parser.parse_args() 115 | G = create_graph() 116 | show_sorted(G) 117 | -------------------------------------------------------------------------------- /source-code/networkx/weighted_tree.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 0.0575438931688 15 | 16 | 17 | 0.105301986657 18 | 19 | 20 | 0.560607411083 21 | 22 | 23 | 0.741221587377 24 | 25 | 26 | 0.991426195271 27 | 28 | 29 | 0.00397873408726 30 | 31 | 32 | 0.128230916642 33 | 34 | 35 | 0.538121395348 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /source-code/pandas/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | random_data.csv 3 | genes_table.html 4 | -------------------------------------------------------------------------------- /source-code/pandas/README.md: -------------------------------------------------------------------------------- 1 | # Pandas 2 | pandas is a library that defines three data structures and algorithms 3 | that are useful in the context of data analysis and data science. It 4 | represents `Series`, `DataFrame`, and `Panel`, or 1D, 2D, and 3D arrays. 5 | `DataFrame` is especially useful, and defines methods such as `pivot_table`, 6 | and `query`, and has many facilities to deal with missing data. 7 | 8 | For analysis purposes, pandas has some nice plotting features that are 9 | easy to use. 10 | 11 | ## What is it? 12 | 1. `agt_analysis.ipynb`: a notebook illustrating the analysis and 13 | visualization of water levels as measured by variouus sensors. 14 | 1. `agt_data`: three CSV files using in the notebook. 15 | 1. `data_generation.ipynb`: notebook that generates some simulated gene 16 | expression data using `numpy` and 'pandas`. 17 | 1. `pandas_intro.ipynb`: illustrates various aspects of using pandas such 18 | as importing data, using `Series`, `DataFrame`, cleaning and formatting 19 | data, dealing with missing data, adding and removing columns, and 20 | various algorithms and visualizations. 21 | 1. `data`: some data sets used in the notebook above. 22 | 1. `patients.ipynb`: runninng example used in the Python slides. 23 | 1. `patient_data.ipynb`: extended version of therunninng example used 24 | in the Python slides. 25 | 1. `pipes.ipynb`: consolidating data processing using pipes. 26 | 1. `screenshots`: screenshots made for the slides. 27 | 1. `generate_csv_files.py`: script to generate CSV files in different 28 | formats. 29 | 1. `copy_on_write.ipynb`: Jupyter notebook that illustrates how data is shared 30 | between related notebooks and the role Copy-on-Write plays in order to 31 | prevent accidental data modifications in more than one dataframe. 32 | -------------------------------------------------------------------------------- /source-code/pandas/agt_data/vei_t8303_160519102111_T8303.CSV: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/agt_data/vei_t8303_160519102111_T8303.CSV -------------------------------------------------------------------------------- /source-code/pandas/agt_data/vei_t8320_160519125721_T8320.CSV: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/agt_data/vei_t8320_160519125721_T8320.CSV -------------------------------------------------------------------------------- /source-code/pandas/agt_data/vei_u9117_p1.1_160519125431_U9117.CSV: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/agt_data/vei_u9117_p1.1_160519125431_U9117.CSV -------------------------------------------------------------------------------- /source-code/pandas/bokeh/README.md: -------------------------------------------------------------------------------- 1 | # Bokeh 2 | 3 | You can use Bokeh as a plotting backend for pandas. 4 | 5 | ## What is it? 6 | 7 | 1. `bokeh_plot.ipynb`: using Bokeh as a plotting beackdnd for pandas. 8 | 1. `environment.yml`: conda environment required to run the notebook. 9 | 1. `pandas_bokeh_linux64_conda_specs.txt`: conda environment specification. 10 | 11 | *Note:* currently (2023-03-22) the bokeh backend for pandas requires 12 | bokeh 2.x, and that has to be specified as an explicit dependency. 13 | -------------------------------------------------------------------------------- /source-code/pandas/bokeh/environment.yml: -------------------------------------------------------------------------------- 1 | name: pandas_bokeh 2 | channels: 3 | - conda-forge 4 | - patrikhlobil 5 | dependencies: 6 | - pandas-bokeh 7 | - bokeh==2.4.3 8 | - pandas 9 | - jupyterlab 10 | - ca-certificates 11 | - openssl 12 | prefix: /home/gjb/mambaforge/envs/pandas_bokeh 13 | -------------------------------------------------------------------------------- /source-code/pandas/data/README_DATA.rst: -------------------------------------------------------------------------------- 1 | This data directory contains datasets for the climate_timeseries demoes of the 2 | Pandas tutorial for SciPy2015 conference. 3 | 4 | Sources: 5 | - `temperatures/GLB.Ts+dSST.txt` : Global land surface and 6 | sea surface temperature anomalies (http://data.giss.nasa.gov/gistemp/) 7 | - `temperatures/temperature_90N-60S.dat`: Other datasets downloaded from 8 | http://www.ipcc-data.org/observ/clim/ar4_global.html: 9 | 10 | - `extreme_events/StormEvents_details-*.csv.gz` : NOAA's severe weather data 11 | inventory (http://www1.ncdc.noaa.gov/pub/data/swdi/stormevents/csvfiles/). 12 | 13 | - Global sea_levels since 1992 by the university of Colorado can be downloaded 14 | at http://sealevel.colorado.edu/files/2015_rel2/sl_ns_global.txt 15 | Regional sea levels can be found at http://sealevel.colorado.edu/content/regional-sea-level-time-series 16 | 17 | - `greenhouse_gaz/co2_mm_global.txt` Globally averaged marine surface monthly 18 | mean CO2 records (http://www.esrl.noaa.gov/gmd/ccgg/trends/global.html). More 19 | datasets at http://www.esrl.noaa.gov/gmd/dv/data/. 20 | - `greenhouse_gaz/gv_ch4/` Methane concentration 21 | http://www.esrl.noaa.gov/gmd/dv/data/index.php?parameter_name=Carbon%2BDioxide 22 | 23 | - `csv_files/`: a collection of CSV files in various formats to check the robustness 24 | of parsers. 25 | -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/mac.csv: -------------------------------------------------------------------------------- 1 | 978,-8.602e+02,Ke1zkzp -694,2.401e+02,ge -905,-5.424e+02,G 208,-9.164e+02,xZei0U 748,-8.008e+02,guEJJECK 499,-6.565e+01,nV 743,6.678e+02,Q -504,4.512e+02,07 -117,-2.099e+02,wjH8 -203,-5.841e+02,9 955,7.566e+02,wxTFU -508,-5.845e+02,gi5cxVb 657,-5.503e+02,c6TNnvPbQ 597,1.831e+02,gGgFcf7a7 -433,3.931e+02,SeaM 272,9.580e+01,YyNi 655,4.457e+02,LmP -909,6.144e+02,Wt71FdH1q4 -614,-3.866e+02,ynf -986,-4.678e+02,Rkw 801,-5.830e+02,cvUa 285,-4.007e+01,96L -882,6.181e+02,68eVsh 408,4.749e+02,kDK -761,3.691e+02,aj9482KL 528,4.291e+02,1jjCxDNOe 145,-1.477e+02,aAuCiqI -694,-7.701e+02,zIFY 495,4.393e+02,THm -423,-5.679e+02,44fIr 27,5.895e+02,kkvh 609,-4.880e+02,fPhX -755,8.026e+02,TsAyCQ2 859,8.710e+02,bsjWo 93,-9.786e+02,pE 147,-7.357e+02,c 123,-4.987e+02,oiFK 532,-2.097e+02,njx -154,4.674e+02,3 -956,-9.122e+02,brXezWZiqJ -821,6.238e+02,7OdV -531,4.811e+02,d 238,-7.625e+02,O8DL6Q -262,7.049e+02,iQ0p 166,-6.030e+02,93fZ2O 365,-9.061e+02,q -48,-8.025e+02,pZxGG -835,3.467e+02,neqMqd 309,9.231e+02,H7cKGZM 512,5.812e+02,R62 23,1.338e+02,XnLCoB 369,-3.173e+02,iSZ9EW 799,1.523e+02,obBJFzP5J 30,4.770e+01,cQm3pzq 289,-7.915e+02,0wIDmrBk 406,7.186e+02,NqAVgapeCq 419,-9.563e+01,WFhJAyqP -354,-1.950e+02,Rw -413,-2.096e+02,Mtb -531,2.368e+02,zSULB8 114,7.441e+02,8Ei -724,-8.430e+02,o0z -306,-6.263e+02,jDf 4,6.165e+02,WZgt 319,2.885e+02,Pmr -925,2.693e+02,gGbSHzXnzu 893,-3.361e+02,1PdJj -530,-5.306e+02,zrLo1o -346,2.161e+02,ud -920,-7.844e+02,ZsfRgsJf -119,-7.831e+02,FmsPdi 783,8.940e+01,Aeacjoj8 -411,9.841e+02,AEUCvBuF -792,-8.380e+02,K 693,5.984e+01,9MzBst 827,2.592e+02,pXxta6h 332,7.972e+01,7asCvvJ 954,-9.351e+02,tma 721,-8.859e+02,l9uy4gXCs 400,-3.412e+02,JIwHCXZobO 217,-4.659e+02,vV0Mjojjiz 981,-7.075e+02,Awh5JKJIJ -302,-7.628e+02,KcqYjGj1W -140,1.748e+02,ffDKYpQVr5 783,-3.433e+02,osL -422,2.159e+02,phePDbo7D 420,-9.348e+02,ngHOe3Tw3R 452,1.053e+02,A 318,8.338e+01,rFShg 482,2.043e+02,F -199,-7.887e+02,yEM 190,5.187e+02,pIipCLOK -74,8.999e+02,9ZAk 153,-7.424e+02,qFGqdi 693,7.617e+02,7QnusT8ac 572,-4.410e+02,x0bKy 304,-1.209e+02,F -947,2.824e+02,hGxfjJmLd -834,1.627e+02,hdNFlMEZ 24,3.736e+02,xV8 -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/semi_colon_sep_mac.csv: -------------------------------------------------------------------------------- 1 | 492;572.774619;"HG7pPylS" 267;209.345881;"eNLhc7uT" 260;412.237490;"Htk1" -565;-845.585043;"WMpzvDKW" 863;959.985552;"XTqSi7zfz" 622;-50.990346;"EBuvF" 533;-653.184706;"vtjz9QBf" -672;323.620303;"B" -210;410.846882;"HF4kaRiw9G" -602;-956.862879;"62QmKOUcG" -495;-609.414057;"nQh" 744;-487.792947;"EfeaRD2kb" 31;-91.279616;"mibL" 955;-794.356853;"OLC5l10" 31;-283.885054;"r9q59hY8" -791;256.592637;"B6re5s0" -800;300.283395;"U" -19;-265.379815;"Q56om6HFI" 904;176.583069;"cZfBgbsPG" -742;584.061814;"AeC" -139;9.832187;"usn" -614;356.755901;"cEHy" 474;293.356899;"akA" 723;-78.322503;"EiG1929bn" -730;-401.691028;"eF7" 740;-405.132195;"3fsZfVZGyd" 886;-89.835917;"MCc0QGS920" 130;226.510108;"zqZuA7Md" 977;-835.693221;"xKYP" 834;-85.722676;"Zn2wyxOwK" -747;-220.877022;"EmJJZgDu9" -37;925.263576;"fvT" 398;905.312241;"rOoJzIW7U" 316;-58.432344;"GgQ1xWW8" 752;589.413823;"30Nlbctb" 600;-652.182275;"S4SKf" 772;356.262740;"ldD" 153;481.118307;"cZcVqh" 750;737.147059;"mi" 579;-274.758133;"A" -775;961.843778;"qw4BfjNC" 631;-730.293098;"y6aPQ" 85;672.970644;"lGnMeQ" 317;740.043263;"1jp99I" 927;-290.856432;"NVB6fq2" -434;239.867438;"g" -614;786.726041;"CoEEBSp5XN" 178;173.247801;"AsM1ZMA" -780;-257.326477;"y" -524;-718.150272;"XviI4" 799;-324.006435;"Brui2t" -949;604.063451;"u" 988;947.703916;"ue2Vg" -788;-382.955822;"dnfxgDCWF" -728;846.697569;"5EgtDE1tG" 790;922.988744;"iWPwye" 744;719.042960;"nOnJ2c" -441;667.950534;"ZfiTA" 715;-852.029115;"ABRDyVniI" 195;-666.782009;"v9QJuM" 935;171.050133;"wO" -249;-244.149593;"HSp" 957;447.875697;"yj" -831;-881.878052;"ZD3fj5" 858;614.423292;"1gqhI3" 275;-807.261303;"lj5TUbdXS" 533;-501.035350;"6" -164;382.515768;"BiKGC" -526;699.347505;"xzmZd" -225;677.863233;"RfI7gamTsJ" -574;-200.086048;"8ffanJNlzB" -87;649.364756;"jHJe7X5" -422;696.657854;"HaYL" -208;47.205893;"yk3o1m" 801;965.770152;"2pIe" 228;434.035927;"DraSM0" -997;218.689032;"Zu" -809;-796.450552;"5nJ" -928;808.042471;"JqR3" -325;-779.255834;"1rrFqJe" -927;277.931804;"IGqaR" 203;-712.986631;"DkYkPGslX" 552;801.989389;"jCoKuDd2" 33;229.599220;"scZVAlKKKL" -84;-193.520542;"8" 703;481.641365;"85yCvad" 51;937.951427;"gV2NzywoD3" 290;398.689532;"aIwZ70T" 237;-12.072185;"6fIVc7LrTh" -523;763.538009;"GBWVLJ47" -449;-234.914465;"TP" 623;832.372645;"pxj4" -649;502.136974;"F6CwMBTR" 936;-497.744871;"vN" -800;893.842426;"O" -620;534.478588;"L" -774;569.247999;"Pbo35" 817;441.510084;"9X0H3XYP" -577;929.126989;"r" -1;341.000624;"P8gLqvJ" -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/single_column_mac.csv: -------------------------------------------------------------------------------- 1 | 970.842376 -101.983593 -610.710994 890.780520 -374.796609 363.578911 509.176899 588.709595 -1.910531 156.579025 977.968570 44.131056 -370.340582 -198.488829 0.267611 841.399338 550.606816 -262.789960 -752.625239 849.867006 -878.832208 -0.124232 -695.162788 866.846966 -100.879772 329.046051 28.757907 -967.386149 334.723770 269.807491 -436.236099 193.400581 -137.671597 56.556920 101.185650 127.334063 0.745534 802.795226 380.938982 710.435145 -433.582569 994.516601 692.342719 435.823558 -605.541108 -726.035870 111.228588 -200.261408 -509.959596 159.081217 -112.017329 940.490419 -541.483643 913.740026 -853.477788 850.596492 478.082800 897.336383 291.980214 729.098278 39.376509 -265.003905 785.203763 -704.437804 119.655080 -792.230214 -431.711048 274.726705 -544.606535 -914.507046 555.278259 -908.533073 -286.056471 931.206525 341.497714 885.377027 165.425222 890.049096 -650.564607 -737.856613 98.458676 117.732485 -208.346073 -605.677275 20.452098 -159.057158 309.334523 -124.425325 -686.049265 -357.543632 -231.676967 -172.804433 881.540345 -410.259736 51.345119 -292.491868 199.163960 -901.954864 477.078881 780.365131 -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/single_column_unix.csv: -------------------------------------------------------------------------------- 1 | 255.784415 2 | 496.840212 3 | -333.265569 4 | 300.508296 5 | -508.803324 6 | -804.431992 7 | 888.609750 8 | -320.067686 9 | 169.277733 10 | 888.007619 11 | -514.606644 12 | -16.888510 13 | -566.876288 14 | 82.536657 15 | -139.311808 16 | 804.914019 17 | 510.884950 18 | -44.775407 19 | -753.969826 20 | -891.906357 21 | 487.742013 22 | -663.539886 23 | -788.538217 24 | -328.273438 25 | -791.290225 26 | -800.211072 27 | 355.822164 28 | -345.595950 29 | 258.826702 30 | 267.892482 31 | 765.884615 32 | 559.423184 33 | 999.670168 34 | -36.640997 35 | 37.622374 36 | 553.531485 37 | -121.495563 38 | -639.885689 39 | -279.902200 40 | -616.068502 41 | 152.866109 42 | -139.561608 43 | 684.551166 44 | 863.926384 45 | 926.645077 46 | -426.385452 47 | -158.413961 48 | -598.905701 49 | 87.212340 50 | 522.022500 51 | 634.360472 52 | -572.186879 53 | 628.598339 54 | 693.515976 55 | 356.134742 56 | -799.057944 57 | -105.995049 58 | 998.561827 59 | 5.933852 60 | -34.767252 61 | -771.440139 62 | 696.258202 63 | -546.088582 64 | 806.913579 65 | -724.153653 66 | -889.310797 67 | -118.271573 68 | -384.119106 69 | -523.739915 70 | -972.315006 71 | 982.480867 72 | -537.233272 73 | -594.931895 74 | -733.653696 75 | -254.718005 76 | 9.899194 77 | 270.631512 78 | 541.382319 79 | -799.804199 80 | -295.077946 81 | -243.168208 82 | -910.261691 83 | 77.076674 84 | 386.952903 85 | -558.451717 86 | 971.048095 87 | 878.693154 88 | 870.354188 89 | 847.350912 90 | 325.633582 91 | -973.443779 92 | 981.959881 93 | 771.402454 94 | -608.780064 95 | -217.635291 96 | -614.470930 97 | -841.074320 98 | -287.757695 99 | 434.686611 100 | 111.016075 101 | -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/single_column_windows.csv: -------------------------------------------------------------------------------- 1 | -460.422256 2 | -563.501793 3 | 243.142906 4 | 993.261988 5 | -642.822485 6 | -253.616632 7 | -616.026005 8 | -624.165568 9 | -331.697016 10 | -122.658724 11 | -919.694375 12 | 482.034007 13 | -52.860629 14 | -962.406132 15 | -279.144563 16 | -292.961891 17 | 342.780824 18 | -811.657035 19 | 378.001146 20 | 604.674903 21 | 665.963438 22 | -922.070163 23 | 724.199238 24 | -196.721264 25 | 152.152585 26 | -593.930577 27 | -155.643495 28 | -425.660630 29 | 611.555269 30 | -335.051093 31 | 867.948503 32 | 670.261593 33 | 428.030637 34 | 777.308762 35 | 481.965001 36 | -57.641920 37 | -186.111305 38 | 2.456891 39 | -770.208518 40 | 83.321799 41 | -125.788753 42 | -503.426852 43 | -487.752085 44 | -786.694434 45 | -211.816811 46 | -461.860017 47 | -171.030781 48 | 254.902045 49 | 2.791048 50 | 523.154791 51 | -130.812095 52 | -311.378317 53 | 447.475871 54 | 690.503574 55 | -915.963632 56 | 887.522157 57 | 933.201027 58 | 534.133229 59 | 141.485039 60 | -841.904147 61 | -531.037114 62 | -285.882631 63 | 500.049850 64 | 689.708215 65 | 465.548424 66 | 544.992732 67 | -360.165516 68 | -873.343532 69 | 362.932339 70 | 455.615850 71 | -471.104568 72 | 981.854543 73 | -813.773755 74 | 623.711305 75 | -444.833111 76 | 114.212391 77 | -704.559869 78 | 221.878172 79 | 649.454562 80 | 850.252144 81 | -409.912874 82 | -900.688838 83 | 504.344743 84 | 638.522922 85 | 350.990797 86 | -717.171990 87 | 329.823300 88 | 794.360943 89 | -637.343944 90 | -209.033777 91 | -315.335395 92 | -518.108847 93 | -517.967982 94 | 742.317527 95 | 260.361960 96 | -398.183459 97 | -38.275977 98 | 663.343405 99 | 185.663654 100 | 284.429843 101 | -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/unix.csv: -------------------------------------------------------------------------------- 1 | 323,9.278e+02,elW4c 2 | 441,2.652e+02,qJ 3 | 327,2.666e+02,0WQhSYQe 4 | 219,9.294e+01,6cPRczsl 5 | -193,-5.128e+02,i 6 | 252,4.625e+02,jk5g 7 | 46,-4.597e+02,wv3tA1 8 | -574,1.225e+02,lKWFP3oFwD 9 | 98,1.844e+02,O1wwV 10 | -830,3.554e+02,KhTxhH9MRq 11 | 229,-2.362e+02,3 12 | -520,-8.260e+02,X 13 | 68,2.249e+02,m3WI 14 | 598,-9.950e+02,YE7y0Nj3 15 | -225,7.161e+02,HVU0g1Mg9O 16 | 339,1.685e+02,uYoclMG 17 | 130,-9.535e+02,Fh 18 | 322,5.379e+02,lfL3YkTiO5 19 | 440,6.525e+02,l1bj8 20 | -853,3.212e+02,9oY 21 | -478,6.635e+02,Fb9 22 | 614,-5.771e+01,Bw7rhC 23 | -572,2.693e+02,ihk5VzMn 24 | -858,1.385e+02,DLHJZ 25 | -53,-1.914e+02,8R8DT3WT 26 | -213,7.421e+01,62I 27 | 460,8.733e+01,Sb 28 | -628,-8.935e+02,VfJZ7M0v 29 | 164,5.096e+02,4Le6Rnujx 30 | -563,-2.321e+01,Z7YpqjhrsE 31 | 105,9.659e+02,uxPTpo 32 | -73,6.918e+02,O 33 | 490,-5.819e+02,fceQKZ 34 | 714,-1.140e+02,8wDz 35 | -418,3.243e+02,5gDaJyNc 36 | 210,-5.967e+02,ivdh 37 | 664,-9.200e+01,fvS 38 | 830,-7.916e+02,3r0wDPbl 39 | -683,-3.299e+02,ecJ5qo 40 | -499,4.678e+02,z2 41 | 629,2.424e+02,gMC 42 | 644,8.774e+02,xomo 43 | 152,9.256e+01,7B1XZ 44 | 351,-1.856e+02,uWhVQVHkH 45 | 981,-6.301e+02,JxUt9R 46 | 698,-6.651e+02,l5G 47 | 961,7.277e+02,d 48 | -813,3.336e+02,WwYU8 49 | -294,-4.850e+02,EL8cH2 50 | -541,7.701e+02,vaEtVzWbcH 51 | -612,-4.258e+02,3fJfFOdtP 52 | 438,7.681e+02,M4SxNru4DA 53 | -258,-3.248e+02,xRZ6CUkgB 54 | 234,3.920e+02,GEnLF 55 | -791,2.015e+02,bCP 56 | -385,9.943e+02,2s 57 | -872,-4.872e+01,ZrpuRCgq 58 | 876,6.926e+02,Teem7f 59 | 468,2.593e+02,YJHpd 60 | 68,-2.346e+02,Dydxfe6 61 | 379,-1.106e+02,q38s 62 | -995,9.408e+02,r 63 | -121,4.865e+02,MTidWstF 64 | -541,7.971e+02,AM1MVcBa 65 | -790,8.681e+02,FedZ 66 | 128,8.427e+02,6SW8y 67 | 834,-9.960e+02,WdTMaIri 68 | -845,3.423e+02,k1BvjA8 69 | 140,-4.864e+02,nwNjwzCX 70 | 431,3.562e+02,403cVKZQlM 71 | 141,4.750e+02,dz4x 72 | 544,5.505e+02,eWNaEuIFnA 73 | 173,-7.099e+02,4o2C 74 | 516,-4.158e+02,VhkOF 75 | -782,2.080e+01,0U4X4m 76 | 779,7.738e+02,q 77 | -424,4.017e+02,Zz 78 | 352,-9.812e+02,sS3FReUU38 79 | 24,-1.334e+02,1qGGMg 80 | -771,2.315e+02,SO5kl 81 | 781,8.727e+01,J6hpsSC 82 | -638,-9.130e+02,HUtPikF6N 83 | -841,-8.268e+02,CDsF4E 84 | -347,7.089e+02,uUIu 85 | 254,2.591e+02,Qhab6L 86 | -193,6.933e+02,6V 87 | -611,-3.170e+02,3VZ5q3M8pJ 88 | 992,9.104e+02,2AoAc 89 | -96,4.184e+02,lBVbIZ9xp 90 | -80,6.468e+02,xK0t 91 | -345,-8.250e+02,VN 92 | -204,-3.883e+02,VRtYaz 93 | -845,1.835e+02,MR38ZSWjE 94 | 257,-9.685e+02,411V1i7kw 95 | 55,6.176e+02,wgvOs 96 | 255,-2.648e+02,7IYvLR1 97 | 872,-3.226e+01,2k1QR1Q 98 | 292,9.024e+02,2ImGFXb 99 | -463,-7.520e+02,K7q4VB 100 | -399,4.012e+02,KlyhdMu8Ap 101 | -------------------------------------------------------------------------------- /source-code/pandas/data/csv_files/windows.csv: -------------------------------------------------------------------------------- 1 | 791,-4.555e+02,w 2 | 623,3.414e+01,UTdF 3 | -203,-5.175e+00,qyB 4 | -948,-9.479e+01,z 5 | -269,4.752e+02,6 6 | -531,8.212e+01,h4w7s 7 | -998,-8.778e+02,i 8 | 538,8.102e+02,78Umdip9 9 | -91,-2.861e+00,NS 10 | -100,-1.548e+02,ZVFrD 11 | 959,-6.889e+02,o3J8 12 | -18,-7.170e+02,3xltZxhoWm 13 | 247,-4.391e+02,TAd0zmp 14 | 244,-5.625e+02,s 15 | 678,9.513e+02,Wx 16 | -532,3.262e+02,g 17 | 308,-6.964e+02,u9CGx 18 | 820,8.664e+02,ctlzj0 19 | 288,-1.855e+02,YwiVD 20 | -447,-5.231e+02,DXOz 21 | -514,7.725e+02,ptCCA9ySgT 22 | 601,-2.547e+02,1Zp2L 23 | 50,-6.394e+02,q6vNo 24 | 278,5.791e+02,y 25 | 612,-2.964e+02,PHSa 26 | 495,-7.799e+02,O 27 | -826,9.602e+02,GqaqLxU5 28 | 151,-3.555e+02,3WfWZZ83Ig 29 | -528,-1.752e+02,XEUOUaU9Y 30 | -828,8.992e+02,n 31 | -183,4.353e+02,54pe5I 32 | -530,-2.183e+02,6kZ2JNIh 33 | -662,-5.193e+02,Lks2YwBy 34 | -166,7.273e+02,Q 35 | -350,5.565e+02,Lm 36 | 794,-8.849e+02,HAPGJDD5 37 | -741,1.551e+02,egtA8 38 | 715,-4.185e+02,PceX 39 | 619,6.985e+02,wlxSxN7mt 40 | 416,8.531e+01,Um 41 | -46,1.547e+02,Ax 42 | -192,3.966e+02,Jiy 43 | 232,1.860e+01,uec9w6e 44 | -548,-8.634e+02,p 45 | -151,5.642e+02,rrLU2bb 46 | -330,3.300e+01,56OrF6FYW 47 | -185,-4.423e+02,pT9ZZA 48 | -539,4.652e+02,uZwHcJDgjL 49 | 510,6.863e+02,oKoGqy 50 | -259,-5.505e+02,y 51 | 12,3.308e+02,E 52 | 176,6.532e+02,TViGM 53 | -83,-3.639e+02,qb2mIcd 54 | 389,-1.864e+02,px1iun 55 | -320,7.971e+02,CVXCFZ 56 | 476,1.186e+02,IyoCo 57 | 954,-3.827e+02,Hv 58 | 169,-3.969e+00,jHO0fTKAV 59 | -873,-2.160e+02,je7j1F0u 60 | 177,-6.493e+02,NfZAcivH 61 | 644,-1.307e+02,i5YXwNDHCX 62 | 1000,-5.444e+02,7Eag 63 | 642,-7.653e+02,wzZIKKyNOm 64 | -163,7.451e+02,Zqq 65 | 180,4.495e+02,b 66 | 862,-5.025e+02,mubyY0 67 | 871,-9.222e+02,du4 68 | 538,-4.275e+02,av 69 | 471,-4.785e+01,do 70 | -572,-5.123e+02,yEHQu 71 | -607,-6.330e+02,ec6t3PArny 72 | 534,5.776e+02,Xet9Is87wq 73 | 829,8.102e+01,nS3z 74 | 570,9.702e+02,udIXkjNG0F 75 | -343,2.196e+02,2K 76 | -127,-2.203e+02,Up0tPbDIRU 77 | -4,-7.146e+02,fFYhJLe2fL 78 | -39,3.715e+02,mVBfBz8 79 | -135,1.650e+02,o8v3ukg 80 | 620,5.226e+02,sV 81 | -70,7.530e+02,uokJZ3jIO 82 | 254,-4.784e+02,Ucyvnd 83 | -599,4.960e+02,4WVPZosS 84 | -628,-6.802e+02,Xn 85 | 653,5.453e+02,cO8iXuC 86 | 22,-9.832e+02,adJ6p 87 | 721,5.641e+01,LLXctZ 88 | -230,-9.751e+02,cJbXkbt 89 | -444,4.346e+02,Sflv 90 | 170,2.662e+02,jt 91 | 169,-8.486e+02,oSM9pOjI 92 | -473,-3.107e+01,G4kyJ 93 | 302,8.853e+01,ly 94 | -758,-5.322e+02,Zn 95 | 972,3.974e+02,sB 96 | -604,-5.153e+02,9uzcbzAOq 97 | 46,5.960e+02,mH3q 98 | 347,8.169e+02,uCD8e 99 | 564,-8.315e+01,WIfoXx 100 | -774,7.632e+02,hafvTvf 101 | -------------------------------------------------------------------------------- /source-code/pandas/data/genes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Patient data 4 | 5 | 6 |

Gene expression data

7 |

This is some fake gene expression data. Column identifiers represent 8 | patient IDs, row identifiers represent gene names.

9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 |
1234
FXDG0.0101990.0429880.831946-0.023656
VTUR0.4660120.4946790.8066610.518115
OVAH0.7838470.7541500.2083520.826368
AKSE0.9224330.9297160.6628240.959443
SJNN0.3250890.3021980.5228470.356346
LVKM0.5547020.5310440.1389880.600229
GBDZ0.1382370.1752230.4542140.166761
FVEW0.5414890.5860180.8239170.529811
ZNYZ0.1825920.1806040.1873860.117254
GTKJ0.3091380.2611960.2197690.276605
HPOA0.3679570.3998110.000427-0.041688
OXLP0.0778410.0669210.0864140.099641
NZTK0.0878880.0766910.1346580.118467
RMDV0.7943790.7510750.3532760.310774
KGIZ0.6545090.6275860.2123930.228329
LUFL0.3043660.2598250.6067640.567738
HOON0.1133270.0671780.7440090.697235
CXYA0.7281050.6902680.9198240.880541
OBEK0.0165030.0562420.5718210.536120
AJJU0.0203380.0634230.5441850.500571
163 | 164 |

Pandas is able to extract a table from an HTML page.

165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /source-code/pandas/data/greenhouse_gaz/README_co2_mm_global.txt: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------- 2 | # USE OF NOAA ESRL DATA 3 | # 4 | # These data are made freely available to the public and the 5 | # scientific community in the belief that their wide dissemination 6 | # will lead to greater understanding and new scientific insights. 7 | # The availability of these data does not constitute publication 8 | # of the data. NOAA relies on the ethics and integrity of the user to 9 | # insure that ESRL receives fair credit for their work. If the data 10 | # are obtained for potential use in a publication or presentation, 11 | # ESRL should be informed at the outset of the nature of this work. 12 | # If the ESRL data are essential to the work, or if an important 13 | # result or conclusion depends on the ESRL data, co-authorship 14 | # may be appropriate. This should be discussed at an early stage in 15 | # the work. Manuscripts using the ESRL data should be sent to ESRL 16 | # for review before they are submitted for publication so we can 17 | # insure that the quality and limitations of the data are accurately 18 | # represented. 19 | # 20 | # Contact: Ed Dlugokencky (303 497 6228; ed.dlugokencky@noaa.gov) 21 | # 22 | # File Creation: Fri Jun 5 12:30:57 2015 23 | # 24 | # RECIPROCITY 25 | # 26 | # Use of these data implies an agreement to reciprocate. 27 | # Laboratories making similar measurements agree to make their 28 | # own data available to the general public and to the scientific 29 | # community in an equally complete and easily accessible form. 30 | # Modelers are encouraged to make available to the community, 31 | # upon request, their own tools used in the interpretation 32 | # of the ESRL data, namely well documented model code, transport 33 | # fields, and additional information necessary for other 34 | # scientists to repeat the work and to run modified versions. 35 | # Model availability includes collaborative support for new 36 | # users of the models. 37 | # -------------------------------------------------------------------- 38 | # 39 | # 40 | # See www.esrl.noaa.gov/gmd/ccgg/trends/ for additional details. 41 | # 42 | # 43 | # The uncertainty in the global monthly mean is estimated using a 44 | # a monte carlo technique that computes 100 globally-averaged time 45 | # series, each time using a slightly different set of measurement 46 | # records from the NOAA ESRL cooperative air sampling network. The 47 | # reported uncertainty, 0.13 ppm, is the mean of the standard deviations 48 | # for each monthly mean using this technique. Please see Conway et al., 49 | # 1994, JGR, vol. 99, no. D11. for a complete discussion. Units are ppm. 50 | # 51 | # CO2 expressed as a mole fraction in dry air, micromol/mol, abbreviated as ppm 52 | # 53 | # NOTE: In general, the data presented for the last year are subject to change, 54 | # depending on recalibration of the reference gas mixtures used, and other quality 55 | # control procedures. Occasionally, earlier years may also be changed for the same 56 | # reasons. Usually these changes are minor. 57 | # 58 | -------------------------------------------------------------------------------- /source-code/pandas/data/missing_values.csv: -------------------------------------------------------------------------------- 1 | int_data,float_data,category_data,string_data 2 | 3,3.7,A,str1 3 | 5,5.3,A,str1_str1 4 | 7,7.5,B,str2_str1 5 | 17,3.5,A,str1_str2 6 | 13,5.7,A,str2 7 | 23,7.1,B,str2_str2 8 | ,5.5,A,str3 9 | 29,,B,str3_str1 10 | 31,3.3,,str2_str3 11 | 37,7.7,B, 12 | -------------------------------------------------------------------------------- /source-code/pandas/data/patient_experiment.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/patient_experiment.xlsx -------------------------------------------------------------------------------- /source-code/pandas/data/patient_metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/patient_metadata.xlsx -------------------------------------------------------------------------------- /source-code/pandas/data/patients.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/patients.xlsx -------------------------------------------------------------------------------- /source-code/pandas/data/sales-funnel.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sales-funnel.xlsx -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/NOC_logo_2010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/NOC_logo_2010.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/all_190.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/all_190.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/bodc_logo40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/bodc_logo40.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/data_explorer_2_190.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/data_explorer_2_190.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/graph.jsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/graph.jsp -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/icsu_logo40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/icsu_logo40.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/ioc_en_small_color.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/ioc_en_small_color.gif -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/nerc-long-logo-200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/nerc-long-logo-200.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/psmsl_header.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/psmsl_header.gif -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/style(1).css: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /* 7 | FILE ARCHIVED ON 3:28:21 Apr 7, 2015 AND RETRIEVED FROM THE 8 | INTERNET ARCHIVE ON 19:50:23 Jul 3, 2015. 9 | JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE. 10 | 11 | ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C. 12 | SECTION 108(a)(3)). 13 | */ 14 | /* tables */ 15 | table.tablesorter { 16 | /*background-color: #CDCDCD;*/ 17 | background-color: #0078AB; 18 | font-family:"Courier New"; 19 | font-size: 10pt; 20 | width: 100%; 21 | text-align: left; 22 | margin:10px 0pt 15px; 23 | color: #FFFFFF; 24 | float: right; 25 | } 26 | table.tablesorter thead tr th, table.tablesorter tfoot tr th { 27 | /*background-color: #e6EEEE;*/ 28 | background-color: #0078AB; 29 | border: 1px solid #FFF; 30 | text-align: left; 31 | font-size: 8pt; 32 | padding: 4px; 33 | } 34 | table.tablesorter thead tr .header { 35 | background-image: url(/web/20150407032821/http://www.psmsl.org/javascript/blue/bg.gif); 36 | background-repeat: no-repeat; 37 | background-position: center right; 38 | cursor: pointer; 39 | text-align: left; 40 | font-family:arial; 41 | } 42 | table.tablesorter tbody td { 43 | /*color: #3D3D3D;*/ 44 | color: #000000; 45 | padding: 4px; 46 | background-color: #FFF; 47 | vertical-align: top; 48 | } 49 | table.tablesorter tbody tr.odd td { 50 | /*background-color:#F0F0F6;*/ 51 | background-color:#FF0000; 52 | } 53 | table.tablesorter thead tr .headerSortUp { 54 | background-image: url(/web/20150407032821/http://www.psmsl.org/javascript/blue/asc.gif); 55 | /*border: 2px solid #FF0000;*/ 56 | color: #000000; 57 | } 58 | table.tablesorter thead tr .headerSortDown { 59 | background-image: url(/web/20150407032821/http://www.psmsl.org/javascript/blue/desc.gif); 60 | /*border: 2px solid #FF0000;*/ 61 | color: #000000; 62 | } 63 | table.tablesorter thead tr .headerSortDown, table.tablesorter thead tr .headerSortUp { 64 | /*background-color: #8dbdd8;*/ 65 | /*background-color: #0078AB;*/ 66 | background-color: #FFD1B1; 67 | } 68 | pre { 69 | font-family:courier;} 70 | .useWhiteSpace { 71 | white-space:pre;} 72 | table.tablesorter p { 73 | margin: 0px; 74 | padding: 0px; 75 | border: 0px; 76 | white-space:pre; 77 | } 78 | a { 79 | onclick: "return false";} 80 | 81 | -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wayback-toolbar-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wayback-toolbar-logo.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wm_tb_nxt_off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wm_tb_nxt_off.png -------------------------------------------------------------------------------- /source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wm_tb_prv_on.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/data/sea_levels/Obtaining Tide Gauge Data_files/wm_tb_prv_on.png -------------------------------------------------------------------------------- /source-code/pandas/data/temperatures/annual.land_ocean.90S.90N.df_1901-2000mean.dat: -------------------------------------------------------------------------------- 1 | 1880 -0.1591 2 | 1881 -0.0789 3 | 1882 -0.1313 4 | 1883 -0.1675 5 | 1884 -0.2485 6 | 1885 -0.2042 7 | 1886 -0.1695 8 | 1887 -0.2505 9 | 1888 -0.1605 10 | 1889 -0.1103 11 | 1890 -0.3014 12 | 1891 -0.2527 13 | 1892 -0.2926 14 | 1893 -0.3274 15 | 1894 -0.2880 16 | 1895 -0.2146 17 | 1896 -0.1009 18 | 1897 -0.1294 19 | 1898 -0.2517 20 | 1899 -0.1356 21 | 1900 -0.1125 22 | 1901 -0.1693 23 | 1902 -0.2255 24 | 1903 -0.3442 25 | 1904 -0.3799 26 | 1905 -0.2583 27 | 1906 -0.2060 28 | 1907 -0.3726 29 | 1908 -0.3922 30 | 1909 -0.4041 31 | 1910 -0.3860 32 | 1911 -0.4144 33 | 1912 -0.3422 34 | 1913 -0.3196 35 | 1914 -0.1599 36 | 1915 -0.0810 37 | 1916 -0.2803 38 | 1917 -0.3227 39 | 1918 -0.2106 40 | 1919 -0.2186 41 | 1920 -0.2025 42 | 1921 -0.1390 43 | 1922 -0.2222 44 | 1923 -0.2063 45 | 1924 -0.1843 46 | 1925 -0.1260 47 | 1926 -0.0192 48 | 1927 -0.1007 49 | 1928 -0.0935 50 | 1929 -0.2201 51 | 1930 -0.0282 52 | 1931 0.0041 53 | 1932 -0.0285 54 | 1933 -0.1616 55 | 1934 -0.0285 56 | 1935 -0.0565 57 | 1936 -0.0318 58 | 1937 0.0727 59 | 1938 0.0955 60 | 1939 0.0898 61 | 1940 0.1234 62 | 1941 0.1837 63 | 1942 0.1377 64 | 1943 0.1299 65 | 1944 0.2362 66 | 1945 0.1162 67 | 1946 -0.0202 68 | 1947 -0.0406 69 | 1948 -0.0494 70 | 1949 -0.0752 71 | 1950 -0.1635 72 | 1951 -0.0029 73 | 1952 0.0353 74 | 1953 0.1190 75 | 1954 -0.1028 76 | 1955 -0.1220 77 | 1956 -0.1836 78 | 1957 0.0570 79 | 1958 0.1054 80 | 1959 0.0634 81 | 1960 0.0114 82 | 1961 0.0894 83 | 1962 0.1136 84 | 1963 0.1333 85 | 1964 -0.1268 86 | 1965 -0.0695 87 | 1966 -0.0117 88 | 1967 0.0022 89 | 1968 -0.0121 90 | 1969 0.0911 91 | 1970 0.0471 92 | 1971 -0.0550 93 | 1972 0.0348 94 | 1973 0.1617 95 | 1974 -0.0862 96 | 1975 -0.0177 97 | 1976 -0.1062 98 | 1977 0.1425 99 | 1978 0.0615 100 | 1979 0.1533 101 | 1980 0.1930 102 | 1981 0.2284 103 | 1982 0.1171 104 | 1983 0.2817 105 | 1984 0.0875 106 | 1985 0.0681 107 | 1986 0.1605 108 | 1987 0.2968 109 | 1988 0.3006 110 | 1989 0.2223 111 | 1990 0.3861 112 | 1991 0.3360 113 | 1992 0.2023 114 | 1993 0.2307 115 | 1994 0.2934 116 | 1995 0.4073 117 | 1996 0.2753 118 | 1997 0.4782 119 | 1998 0.5971 120 | 1999 0.4199 121 | 2000 0.3886 122 | 2001 0.5173 123 | 2002 0.5745 124 | 2003 0.5818 125 | 2004 0.5416 126 | 2005 0.6154 127 | 2006 0.5601 128 | 2007 0.5472 129 | 2008 0.4804 130 | 2009 0.5551 131 | 2010 -999.0000 132 | -------------------------------------------------------------------------------- /source-code/pandas/screenshots/add_column.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/add_column.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/add_multilevel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/add_multilevel.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/cum_sum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/cum_sum.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/cumsum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/cumsum.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/interpolate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/interpolate.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/interpolated_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/interpolated_plot.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/pearsonr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/pearsonr.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/pivot_aggfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/pivot_aggfunc.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/pivot_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/pivot_table.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/plot_all_temps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/plot_all_temps.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/plot_missing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/plot_missing.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/query.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/read_excel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/read_excel.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/read_html.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/read_html.png -------------------------------------------------------------------------------- /source-code/pandas/screenshots/scatter_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/pandas/screenshots/scatter_matrix.png -------------------------------------------------------------------------------- /source-code/polars/.gitignore: -------------------------------------------------------------------------------- 1 | # data sets too large to store in repository 2 | large_data*.csv 3 | very_large_data*.csv 4 | 5 | # Slurm job output files 6 | slurm-*.out 7 | -------------------------------------------------------------------------------- /source-code/polars/README.md: -------------------------------------------------------------------------------- 1 | # Polars 2 | 3 | Polars is an alternative to pandas that is designed to have better performance. 4 | 5 | 6 | ## What is it? 7 | 8 | 1. `patient_data.ipynb`: Jupyter notebook that explores functional differences 9 | between pandas and polars. It replicates the notebook in the `pandas` 10 | directory with the same name. 11 | 1. `polars_large_data_benchmark.ipynb`: Jupyter notebook that compares the 12 | performance of polars and pandas on large data sets. 13 | 1. `create_csv_data.py`: Python script to generate one or more large CSV files 14 | for benchmarking. 15 | 1. `create_csv_data.slurm`: Slurm script to run `create_csv_data.py` on a 16 | cluster. 17 | 1. `polars_performance.ipynb`: Jupyter notebook that compares the performance 18 | of polars and pandas on a variety of operations. 19 | 1. `data`: Directory containing the data used in the notebook. 20 | -------------------------------------------------------------------------------- /source-code/polars/create_csv_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from concurrent.futures import ProcessPoolExecutor 5 | import csv 6 | from datetime import datetime, timedelta 7 | import random 8 | import sys 9 | 10 | 11 | def write_file(args): 12 | file_name, rows, curr_time, delta_time, curr_vals, delta_val = args 13 | fieldnames = ['timestamp'] 14 | fieldnames.extend([f'C{i + 1:d}' for i in range(len(curr_vals))]) 15 | with open(file_name, 'w', newline='') as csv_file: 16 | writer = csv.DictWriter(csv_file, fieldnames=fieldnames) 17 | writer.writeheader() 18 | for _ in range(rows): 19 | data = {f'C{i + 1:d}': val for i, val in enumerate(curr_vals)} 20 | data['timestamp'] = curr_time 21 | writer.writerow(data) 22 | curr_time += delta_time 23 | curr_vals = [x + random.uniform(-delta_val, delta_val) 24 | for x in curr_vals] 25 | return file_name 26 | 27 | 28 | if __name__ == '__main__': 29 | arg_parser = ArgumentParser(description='create a set of CSV files') 30 | arg_parser.add_argument('--files', type=int, default=1, 31 | help='number of files to create') 32 | arg_parser.add_argument('base_name', help='base file name to use') 33 | arg_parser.add_argument('--cols', type=int, default=1, 34 | help='number of columns to generate') 35 | arg_parser.add_argument('--rows', type=int, default=100, 36 | help='number of rows to generate per file') 37 | arg_parser.add_argument('--workers', type=int, default=None, 38 | help='number of workersto use') 39 | options = arg_parser.parse_args() 40 | curr_time = datetime.now() 41 | delta_time = timedelta(seconds=1) 42 | curr_vals = [1.0]*options.cols 43 | delta_val = 0.01 44 | with ProcessPoolExecutor(max_workers=options.workers) as executor: 45 | args = [('{0}_{1:04d}.csv'.format(options.base_name, i + 1), 46 | options.rows, curr_time + i*options.rows*delta_time, 47 | delta_time, curr_vals, delta_val) for i in range(options.files)] 48 | for file_name in executor.map(write_file, args): 49 | print('{0} done'.format(file_name)) 50 | -------------------------------------------------------------------------------- /source-code/polars/create_csv_data.slurm: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -l 2 | #SBATCH --account=lpt2_sysadmin 3 | #SBATCH --cluster=wice 4 | #SBATCH --nodes=1 5 | #SBATCH --ntasks=1 6 | #SBATCH --cpus-per-task=1 7 | #SBATCH --mem=2G 8 | #SBATCH --time=01:00:00 9 | #SBATCH --mail-user=geertjan.bex@uhasselt.be 10 | #SBATCH --mail-type=FAIL,END 11 | 12 | module purge 13 | module load Python/3.11.3-GCCcore-12.3.0 14 | 15 | # This should generate a file of approximately 6 GB 16 | python ./create_csv_data.py \ 17 | --files 1 \ 18 | --cols 100 \ 19 | --rows 2500000 \ 20 | large_data 21 | -------------------------------------------------------------------------------- /source-code/polars/data/patient_experiment.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/polars/data/patient_experiment.xlsx -------------------------------------------------------------------------------- /source-code/polars/data/patient_metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gjbex/Python-for-data-science/e24c56798d48d70e4f288ed78ce88e7a656bafa6/source-code/polars/data/patient_metadata.xlsx -------------------------------------------------------------------------------- /source-code/regexes/README.md: -------------------------------------------------------------------------------- 1 | # Regexes 2 | 3 | Regular expressionn are a formalism to describe sets of strings. They 4 | can be used to check whether a string matches a given pattern, for 5 | exstracting parts of strings, or substituting part of strings in a 6 | way that is much more powerful and flexible than the `str` methods 7 | for those purposes. 8 | 9 | ## What is it? 10 | 1. `regexes.ipynb`: Jupiter notebook illustrating various aspects of 11 | using regular expressions in string-related tasks. This conveys 12 | the flavor, rather than being a comprehensive introduction. 13 | 14 | More information on regular expressions can be found in the Python 15 | introduction slides. 16 | -------------------------------------------------------------------------------- /source-code/seaborn/READNE.md: -------------------------------------------------------------------------------- 1 | # Seaborn 2 | Seaborn works on top of matplotlib, and is especially strong for plotting 3 | data and its statistical properties. 4 | 5 | ## What is it? 6 | 1. `seaborn.ipynb`: Jupyter notebook that illustrates a number of Seaborn 7 | features. 8 | 1. `generate_data.py`: script to generate example data. 9 | 1. `Data`: directory containing example data. 10 | -------------------------------------------------------------------------------- /source-code/seaborn/generate_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | nr_data = 100 6 | gaussian = np.random.normal(loc=1.5, scale=2.0, size=nr_data) 7 | poisson = np.random.poisson(lam=5.0, size=nr_data) 8 | labels = np.random.choice(['A', 'B', 'C', 'D'], size=nr_data) 9 | x = np.linspace(0.0, 100.0, nr_data) 10 | y = 1.3*x + 2.0 + np.random.normal(scale=8, size=nr_data) 11 | x1 = np.random.choice(np.arange(1, 10, 1), size=nr_data) 12 | y1 = 1.3*x1 + 2.0 + np.random.normal(scale=1.5, size=nr_data) 13 | x2 = np.sort(np.random.gamma(3, scale=4, size=nr_data)) 14 | y2 = 1.3*x2 + 2.0 + np.random.normal(scale=1.5, size=nr_data) 15 | binary = np.random.choice(np.array(['yes', 'no']), size=nr_data) 16 | x3 = x2.copy() 17 | y3 = np.where(binary == 'yes', 1.3, 1.9)*x3 + \ 18 | np.where(binary == 'yes', 2.0, -1.0) + \ 19 | np.random.normal(size=nr_data) 20 | with open('Data/data.txt', 'w') as file: 21 | print('gaussian,poisson,label,x,y,x1,y1,x2,y2,binary,x3,y3',file=file) 22 | for i in range(nr_data): 23 | print(f'{gaussian[i]},{poisson[i]},{labels[i]},{x[i]},{y[i]},' 24 | f'{x1[i]},{y1[i]},{x2[i]},{y2[i]},{binary[i]},' 25 | f'{x3[i]},{y3[i]}', 26 | file=file) 27 | -------------------------------------------------------------------------------- /source-code/web-scraping/.gitignore: -------------------------------------------------------------------------------- 1 | *.graphml 2 | -------------------------------------------------------------------------------- /source-code/web-scraping/README.md: -------------------------------------------------------------------------------- 1 | # WebScraping 2 | 3 | Web scraping can be quite useful to gather data that is not avaialble 4 | through an API. Here, some sample code is provided for Beautiful Soup, 5 | a web scraping library that is easy to use. 6 | 7 | 8 | ## What is it? 9 | 10 | 1. `link_web.py`: script that uses Beautiful Soup and NetworkX to 11 | create a graph representing the links between web pages, starting 12 | from a given page. 13 | 1. `preprocessing`: Python script that scrapes a web page containing FAQs and 14 | printing them in JSONL format. 15 | -------------------------------------------------------------------------------- /source-code/web-scraping/link_web.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from argparse import ArgumentParser 4 | from bs4 import BeautifulSoup 5 | from collections import deque 6 | import matplotlib.pyplot as plt 7 | import networkx as nx 8 | import sys 9 | import urllib 10 | 11 | def show_links(soup, out=sys.stderr): 12 | print("Opened start page '{0}'".format(soup.title.string), file=out) 13 | links = soup.find_all("a") 14 | for link in links: 15 | href = link.get('href') 16 | if href: 17 | print('\t{0}'.format(href), file=out) 18 | 19 | def process_page(pages_to_do, pages_done, max_level, graph, verbose=False): 20 | if pages_to_do: 21 | page_url, level = pages_to_do.popleft() 22 | if level <= max_level: 23 | if verbose: 24 | print('{0}: {1}'.format(page_url, level)) 25 | pages_done.add(page_url) 26 | try: 27 | page = urllib.request.urlopen(page_url) 28 | soup = BeautifulSoup(page, 'html5lib') 29 | links = soup.find_all("a") 30 | for link in links: 31 | href = link.get('href') 32 | if href and href.startswith('http'): 33 | if href not in pages_done: 34 | pages_to_do.append((href, level + 1)) 35 | graph.add_edge(page_url, href) 36 | except urllib.error.HTTPError: 37 | print('# warning: can not handle {0}'.format(page_url), 38 | file=sys.stderr) 39 | except urllib.error.URLError: 40 | print('# warning: can not handle {0}'.format(page_url), 41 | file=sys.stderr) 42 | 43 | if __name__ == '__main__': 44 | arg_parser = ArgumentParser(description='create graph of hyperlinks') 45 | arg_parser.add_argument('url', help='URL to start link analysis at') 46 | arg_parser.add_argument('--max-level', type=int, default=3, 47 | help='maximum link depth') 48 | arg_parser.add_argument('--out', help='file name for GraphML output') 49 | arg_parser.add_argument('--verbose', action='store_true', 50 | help='give verbose output') 51 | options = arg_parser.parse_args() 52 | pages_done = set() 53 | pages_to_do = deque() 54 | pages_to_do.append((options.url, 0)) 55 | graph = nx.Graph() 56 | graph.add_node(options.url) 57 | while pages_to_do: 58 | process_page(pages_to_do, pages_done, options.max_level, 59 | graph, options.verbose) 60 | print('total pages scraped: {0}'.format(len(pages_done))) 61 | if options.out: 62 | nx.write_graphml(graph, options.out) 63 | else: 64 | nx.draw(graph) 65 | plt.show() 66 | -------------------------------------------------------------------------------- /source-code/web-scraping/preprocessing/README.md: -------------------------------------------------------------------------------- 1 | # Preprocessing 2 | 3 | This directory contains an example of data preprocessing using Beautiful Soup. 4 | 5 | 6 | ## What is it? 7 | 8 | 1. `preprocess_openmp_faq.py`: Python script that reads the OpenMP FAQ from an 9 | HTML file, and writes the output to standard output. The output is in JSONL 10 | format, with each question-answer pair on a single line. 11 | 1. `openmp_faq.html`: HTML file containing the OpenMP FAQ. 12 | 1. `openmp_faq.jsonl`: JSONL file containing the OpenMP FAQ in a structured 13 | format. 14 | -------------------------------------------------------------------------------- /source-code/web-scraping/preprocessing/preprocess_openmp_faq.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # Script that reads the OpenMP FAQ HTML file and extracts the questions and answers. 4 | # The output is a JSONL file with the following format: 5 | # {"input": "question", "output": "answer"} 6 | # 7 | # Usage: python preprocess_faq.py openmp_faq.html > openmp_faq.jsonl 8 | # ------------------------------------------------------------------------ 9 | 10 | import argparse 11 | from bs4 import BeautifulSoup 12 | 13 | 14 | def is_question_header(tag): 15 | return tag.name == 'h4' and tag.has_attr('id') and tag.attrs['id'].startswith('OMP') 16 | 17 | def get_question(h4_tag): 18 | return h4_tag.span.text 19 | 20 | def get_answer(h4_tag): 21 | answer = '' 22 | next_tag = h4_tag.find_next('p') 23 | while next_tag and next_tag.name == 'p': 24 | answer += next_tag.text.strip() + ' ' 25 | next_tag = next_tag.find_next('p') 26 | return clean_answer(answer) 27 | 28 | def clean_answer(text): 29 | text = text.replace('\n', ' ').strip() 30 | pos = text.find('Version 3.0 Last updated:') 31 | text = text[:pos] 32 | pos = text.find('(Quote from:') 33 | return text[:pos].strip() 34 | 35 | def main(): 36 | arg_parser = argparse.ArgumentParser() 37 | arg_parser.add_argument('html_file', help='OpenMP FAQ HTML file') 38 | args = arg_parser.parse_args() 39 | with open(args.html_file) as html_file: 40 | doc = BeautifulSoup(html_file, features='html.parser') 41 | json_strs = [] 42 | for tag in doc.find_all(is_question_header): 43 | json_strs.append(f'{{"input": "{get_question(tag)}", "output": "{get_answer(tag)}"}}') 44 | print('\n'.join(json_strs)) 45 | 46 | if __name__ == '__main__': 47 | main() 48 | -------------------------------------------------------------------------------- /source-code/xarray/.gitignore: -------------------------------------------------------------------------------- 1 | *.nc 2 | -------------------------------------------------------------------------------- /source-code/xarray/README.md: -------------------------------------------------------------------------------- 1 | # Xarray 2 | 3 | xarray is a Python package to represent numerical data with metadata. In that respect it is 4 | a hybrid between numpy and pandas. 5 | 6 | ## What is it? 7 | 8 | 1. `xarray_intro.ipynb`: Jupyter notebook introducing some of xarray's features. 9 | --------------------------------------------------------------------------------