├── requirements.txt ├── hdxpop ├── __init__.py └── hdxpop.py ├── .github └── workflows │ └── python-publish.yml ├── LICENSE ├── setup.py ├── .gitignore └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.22.0 2 | beautifulsoup4>=4.8.2 3 | -------------------------------------------------------------------------------- /hdxpop/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = 'Samapriya Roy' 4 | __email__ = 'samapriya.roy@gmail.com' 5 | __version__ = '0.0.4' 6 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | 5 | name: CI hdxpop 6 | 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [macos-latest, ubuntu-latest, windows-latest] 19 | python-version: [3.6, 3.7, 3.8] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install -U pip setuptools 31 | pip install flake8 pytest wheel 32 | - name: Test package 33 | run: | 34 | python setup.py install 35 | hdxpop -h 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Samapriya Roy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import sys 4 | import setuptools 5 | from setuptools import find_packages 6 | from setuptools.command.test import test as TestCommand 7 | from distutils.version import StrictVersion 8 | from setuptools import __version__ as setuptools_version 9 | 10 | if StrictVersion(setuptools_version) < StrictVersion('38.3.0'): 11 | raise SystemExit( 12 | 'Your `setuptools` version is old. ' 13 | 'Please upgrade setuptools by running `pip install -U setuptools` ' 14 | 'and try again.' 15 | ) 16 | 17 | 18 | def readme(): 19 | with open('README.md') as f: 20 | return f.read() 21 | setuptools.setup( 22 | name='hdxpop', 23 | version='0.0.4', 24 | packages=['hdxpop'], 25 | url='https://github.com/samapriya/hdxpop', 26 | install_requires=['requests>=2.21.1','beautifulsoup4>=4.8.2',], 27 | license='MIT License', 28 | long_description=open('README.md').read(), 29 | long_description_content_type='text/markdown', 30 | python_requires='>=3.2', 31 | classifiers=( 32 | 'Development Status :: 3 - Alpha', 33 | 'Intended Audience :: Developers', 34 | 'Intended Audience :: Science/Research', 35 | 'Natural Language :: English', 36 | 'License :: OSI Approved :: Apache Software License', 37 | 'Programming Language :: Python :: 3', 38 | 'Programming Language :: Python :: 3.6', 39 | 'Programming Language :: Python :: 3.7', 40 | 'Programming Language :: Python :: 3.8', 41 | 'Operating System :: OS Independent', 42 | 'Topic :: Scientific/Engineering :: GIS', 43 | ), 44 | author='Samapriya Roy', 45 | author_email='samapriya.roy@gmail.com', 46 | description='Simple tool to download High Resolution Population Density Maps from Humanitarian Data Exchange', 47 | entry_points={ 48 | "console_scripts": ["hdxpop=hdxpop.hdxpop:main"] 49 | }, 50 | ) 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hdxpop 2 | 3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4606954.svg)](https://doi.org/10.5281/zenodo.4606954) 4 | [![Downloads](https://pepy.tech/badge/hdxpop/month)](https://pepy.tech/project/hdxpop/month) 5 | ![CI hdxpop](https://github.com/samapriya/hdxpop/workflows/CI%20hdxpop/badge.svg) 6 | 7 | Simple tool to download High Resolution Population Density Maps from Humanitarian Data Exchange 8 | 9 | ## [For usage go to the medium article](https://medium.com/@samapriyaroy/community-datasets-in-google-earth-engine-an-experiment-b72daa474819) 10 | 11 | ``` 12 | Samapriya Roy. (2021, March 16). samapriya/hdxpop: hdxpop: Simple tool to download High Resolution Population Density Maps from 13 | Humanitarian Data Exchange (Version 0.0.4). Zenodo. http://doi.org/10.5281/zenodo.4606954 14 | ``` 15 | 16 | ## Table of contents 17 | * [Prerequisites](#prerequisites) 18 | * [Installing hdxpop](#installing-hdxpop) 19 | * [Using without install](#using-without-install) 20 | 21 | ## Prerequisites 22 | This assumes that you have native python & pip installed in your system, you can test this by going to the terminal (or windows command prompt) and trying. I recommend installation within virtual environment if you are worries about messing up your current environment. 23 | 24 | ```python``` and then ```pip list``` 25 | 26 | If you get no errors and you have python 3.2 or higher you should be good to go. 27 | 28 | ## Installing hdxpop 29 | Once you have determined you have python, you can simply install hdxpop using two methods 30 | 31 | ``` 32 | pip install hdxpop 33 | ``` 34 | 35 | For linux I found it helps to specify the pip type and use --user. Here pip refers to your default python and pip installations, assumption here is you are using python3 and up. 36 | 37 | ``` 38 | pip install hdxpop --user 39 | 40 | or 41 | 42 | pip3 install porder --user 43 | ``` 44 | 45 | or you can also try 46 | 47 | ``` 48 | git clone https://github.com/samapriya/hdxpop.git 49 | cd hdxpop 50 | python setup.py install 51 | ``` 52 | 53 | ![hdxpop](https://user-images.githubusercontent.com/6677629/75043238-be6a9780-548d-11ea-9b3e-d7a4824ca8fc.png) 54 | 55 | 56 | ## Using without install 57 | You can do this too,by simply going downloading and unzipping the repo onto your machine and migrating to the innner hdxpop folder with the hdxpop.py file. 58 | 59 | you can then do a 60 | 61 | ``` 62 | python3 hdxpop.py -h 63 | ``` 64 | 65 | or simply if you have Git enabled 66 | 67 | ``` 68 | git clone https://github.com/samapriya/hdxpop.git 69 | cd hdxpop/hdxpop/ 70 | python hdxpop.py -h 71 | ``` 72 | 73 | ## Changelog 74 | 75 | ### v0.0.4 76 | - Changes to search URL and fuzzy search to look for overall population datasets. 77 | 78 | ### v0.0.3 79 | - Changes to search URL to include geotif, zipped only, goetiff and geotiff keyword filters. 80 | 81 | ### v0.0.2 82 | - Added installation instructions and setup 83 | -------------------------------------------------------------------------------- /hdxpop/hdxpop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | __copyright__ = """ 5 | 6 | Copyright 2021 Samapriya Roy 7 | 8 | Permission is hereby granted, free of charge, 9 | to any person obtaining a copy of this software 10 | and associated documentation files (the Software"), 11 | to deal in the Software without restriction, 12 | including without limitation the rights to use, 13 | copy, modify, merge, publish, distribute, sublicense, 14 | and/or sell copies of the Software, and to permit persons 15 | to whom the Software is furnished to do so, 16 | subject to the following conditions: 17 | 18 | MIT LICENSE: https://opensource.org/licenses/MIT 19 | 20 | The above copyright notice and this permission notice shall 21 | be included in all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 25 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 26 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 27 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 28 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 29 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 30 | 31 | """ 32 | __license__ = "Apache 2.0" 33 | 34 | 35 | import os.path 36 | import argparse 37 | import sys 38 | import zipfile 39 | import os 40 | import time 41 | import requests 42 | import json 43 | import os 44 | import sys 45 | import glob 46 | from bs4 import BeautifulSoup 47 | 48 | MAIN_URL = "https://data.humdata.org/m/organization/facebook?res_format=GeoTIFF&ext_page_size=250" 49 | 50 | 51 | i = 1 # Set a counter 52 | 53 | 54 | def downonly(url, destination): 55 | """[Downloader to check and download imagery] 56 | 57 | [This looks for the availability of the files,checks for existing files and downloadeds the 58 | necessary files] 59 | """ 60 | global i 61 | filenames = glob.glob1(destination, "*") 62 | filenames = [os.path.join(destination, files) for files in filenames] 63 | 64 | # Get filename from URL Head 65 | r = requests.head(url) 66 | fname = r.headers["Location"].split("?")[0].split("/")[-1] 67 | local_path = os.path.join(destination, fname) 68 | if not local_path in filenames: 69 | r = requests.get(url) 70 | print("Processing: " + str(i) + " downloading to==> " + local_path) 71 | if r.status_code == 200: 72 | i = i + 1 73 | f = open(local_path, "wb") 74 | for chunk in r.iter_content(chunk_size=512 * 1024): 75 | if chunk: 76 | f.write(chunk) 77 | f.close() 78 | else: 79 | print("Existing file Skipping: " + local_path) 80 | 81 | 82 | lt = [] 83 | sub = [] 84 | 85 | 86 | def lk(url, destination): 87 | page = requests.get(url) 88 | soup = BeautifulSoup(page.content, "html.parser") 89 | links = soup.select("a") 90 | for link in links: 91 | try: 92 | ziplink = link.get("href") 93 | if "population" in ziplink.split("/")[-1] and not ziplink.split( 94 | "/" 95 | )[-1].endswith("csv.zip") and ziplink.endswith(".zip"): 96 | sub.append(ziplink) 97 | url = "https://data.humdata.org{}".format(ziplink) 98 | downonly(url, destination) 99 | except Exception as e: 100 | pass 101 | 102 | 103 | def humdata(destination): 104 | try: 105 | page = requests.get(MAIN_URL) 106 | soup = BeautifulSoup(page.content, "html.parser") 107 | links = soup.select("a") 108 | for link in links: 109 | try: 110 | url = link.get("href") 111 | if url.startswith("/dataset"): 112 | lt.append("https://data.humdata.org" + url) 113 | except Exception as e: 114 | pass 115 | for locations in set(lt): 116 | lk(locations, destination) 117 | print("Processed total of : " + str(len(sub)) + " links") 118 | except Exception as e: 119 | print(e) 120 | except (KeyboardInterrupt, SystemExit) as e: 121 | print("Program escaped by User") 122 | sys.exit() 123 | 124 | 125 | def humdata_from_parser(args): 126 | humdata(destination=args.folder) 127 | 128 | 129 | def unzip(initial, final): 130 | """Unzip files and moves them into a folder""" 131 | for root, dirs, files in os.walk(initial): 132 | for i in files: 133 | if i.endswith(".zip"): 134 | fullpath = os.path.join(root, i) 135 | zip_ref = zipfile.ZipFile(fullpath) # create zipfile object 136 | for file in zip_ref.namelist(): 137 | if zip_ref.getinfo(file).filename.endswith( 138 | ".tif" 139 | ) and not os.path.exists( 140 | os.path.join(final, zip_ref.getinfo(file).filename) 141 | ): 142 | print("Extracting: " + str(zip_ref.getinfo(file).filename)) 143 | zip_ref.extract(file, final) 144 | elif zip_ref.getinfo(file).filename.endswith( 145 | ".tif" 146 | ) and os.path.exists( 147 | os.path.join(final, zip_ref.getinfo(file).filename) 148 | ): 149 | print( 150 | "Existing file Skipped: " 151 | + str(zip_ref.getinfo(file).filename) 152 | ) 153 | 154 | 155 | def unzip_from_parser(args): 156 | unzip(initial=args.initial, final=args.final) 157 | 158 | 159 | def main(args=None): 160 | parser = argparse.ArgumentParser( 161 | description="Simple tool to download High Resolution Population Density Maps from HDX" 162 | ) 163 | subparsers = parser.add_subparsers() 164 | parser_humdata = subparsers.add_parser( 165 | "humdata", help="Download High Resolution Population Density Maps from HDX" 166 | ) 167 | parser_humdata.add_argument("--folder", help="Folder to store results") 168 | parser_humdata.set_defaults(func=humdata_from_parser) 169 | 170 | parser_unzip = subparsers.add_parser("unzip", help="Unzip downloaded HDX files") 171 | parser_unzip.add_argument("--initial", help="Folder with zipped files") 172 | parser_unzip.add_argument("--final", help="Foler with unzipped tif files") 173 | parser_unzip.set_defaults(func=unzip_from_parser) 174 | 175 | args = parser.parse_args() 176 | 177 | args.func(args) 178 | 179 | 180 | if __name__ == "__main__": 181 | main() 182 | --------------------------------------------------------------------------------