├── .gitignore ├── scripts ├── requirements.txt └── process.py ├── README.md ├── datapackage.json └── .github └── workflows └── actions.yml /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | cache 3 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.32.3 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | badge 2 | 3 | Country, regional and world GDP in current US Dollars ($). Regional means 4 | collections of countries e.g. Europe & Central Asia. 5 | 6 | ## Data 7 | 8 | The data is sourced from the World Bank (specifically [this dataset][current]) which 9 | in turn lists as sources: *World Bank national accounts data, and OECD National 10 | Accounts data files*. 11 | 12 | Note that there are a variety of different GDP indicators on offer from the 13 | World Bank including: 14 | 15 | * [GDP in current USD][current] 16 | * [GDP in constant USD (2000)][constant] 17 | * [GDP, PPP (constant 2005 international $)][ppp] 18 | * [GDP (constant LCU)][lcu] 19 | 20 | [constant]: http://data.worldbank.org/indicator/NY.GDP.MKTP.KD 21 | [current]: http://data.worldbank.org/indicator/NY.GDP.MKTP.CD 22 | [ppp]: http://data.worldbank.org/indicator/NY.GDP.MKTP.PP.KD 23 | [lcu]: http://data.worldbank.org/indicator/NY.GDP.MKTP.KN 24 | 25 | 26 | ## Preparation 27 | 28 | Process is recorded and automated in python script: 29 | 30 | ``` 31 | scripts/process.py 32 | ``` 33 | 34 | ## Automation 35 | 36 | Up-to-date (auto-updates every month) gdp dataset could be found on the datahub.io: 37 | https://datahub.io/core/gdp 38 | 39 | ## License 40 | 41 | This Data Package is made available under the Public Domain Dedication and License v1.0 whose full text can be found at: http://www.opendatacommons.org/licenses/pddl/1.0/ 42 | -------------------------------------------------------------------------------- /datapackage.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Country, regional and world GDP in current US Dollars ($). Regional means collections of countries e.g. Europe & Central Asia. Data is sourced from the World Bank and turned into a standard normalized CSV.", 3 | "image": "http://assets.okfn.org/p/opendatahandbook/img/data-wrench.png", 4 | "keywords": [ 5 | "GDP", 6 | "World", 7 | "Gross Domestic Product", 8 | "Time series" 9 | ], 10 | "last_updated": "2025-10-07", 11 | "licenses": [ 12 | { 13 | "name": "ODC-PDDL-1.0", 14 | "path": "http://opendatacommons.org/licenses/pddl/", 15 | "title": "Open Data Commons Public Domain Dedication and License v1.0" 16 | } 17 | ], 18 | "name": "gdp", 19 | "resources": [ 20 | { 21 | "name": "gdp", 22 | "path": "data/gdp.csv", 23 | "schema": { 24 | "fields": [ 25 | { 26 | "name": "Country Name", 27 | "type": "string" 28 | }, 29 | { 30 | "name": "Country Code", 31 | "type": "string" 32 | }, 33 | { 34 | "name": "Year", 35 | "type": "year" 36 | }, 37 | { 38 | "description": "GDP in current USD", 39 | "name": "Value", 40 | "type": "number" 41 | } 42 | ] 43 | } 44 | } 45 | ], 46 | "sources": [ 47 | { 48 | "name": "World Bank and OECD", 49 | "path": "http://data.worldbank.org/indicator/NY.GDP.MKTP.CD", 50 | "title": "World Bank and OECD" 51 | } 52 | ], 53 | "title": "Country, Regional and World GDP (Gross Domestic Product)", 54 | "version": "2025", 55 | "collection": "economic-data" 56 | } -------------------------------------------------------------------------------- /.github/workflows/actions.yml: -------------------------------------------------------------------------------- 1 | name: Update gdp data 2 | 3 | on: 4 | # Schedule to run on the 1st day of each month 5 | schedule: 6 | - cron: '0 0 1 * *' 7 | 8 | # Run on push to main branch 9 | push: 10 | branches: 11 | - main 12 | 13 | # Run on pull requests targeting the main branch 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | # Allows manual triggering of the workflow 19 | workflow_dispatch: 20 | 21 | jobs: 22 | build: 23 | runs-on: ubuntu-latest 24 | 25 | if: github.ref == 'refs/heads/main' 26 | 27 | steps: 28 | - name: Check out repository 29 | uses: actions/checkout@v3 30 | 31 | - name: Set up Python 3.12 32 | uses: actions/setup-python@v4 33 | with: 34 | python-version: '3.12' 35 | 36 | - name: Install Python dependencies 37 | run: | 38 | python -m venv venv 39 | source venv/bin/activate 40 | pip install --upgrade pip 41 | pip install -r scripts/requirements.txt 42 | 43 | - name: Run scripts 44 | run: | 45 | source venv/bin/activate 46 | cd scripts 47 | python process.py 48 | 49 | - name: Configure Git 50 | run: | 51 | git config --global user.email "${{ env.CI_COMMIT_EMAIL }}" 52 | git config --global user.name "${{ env.CI_COMMIT_NAME }}" 53 | env: 54 | CI_COMMIT_NAME: "Automated commit" 55 | CI_COMMIT_EMAIL: "actions@users.noreply.github.com" 56 | 57 | - name: Commit and Push changes 58 | run: | 59 | git diff --quiet && echo "No changes to commit" || ( 60 | git add data datapackage.json && 61 | git commit -m "${{ env.CI_COMMIT_MESSAGE }}" && 62 | git push origin main 63 | ) 64 | env: 65 | CI_COMMIT_MESSAGE: "Automated commit" 66 | -------------------------------------------------------------------------------- /scripts/process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import csv 4 | import json 5 | import zipfile 6 | import requests 7 | 8 | from datetime import datetime, timedelta 9 | 10 | cache = 'cache' 11 | data = 'data/gdp.csv' 12 | script_dir = os.path.dirname(os.path.abspath(__file__)) 13 | url = 'https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv' 14 | outheadings = ['Country Name', 'Country Code', 'Year', 'Value'] 15 | current_year = datetime.now().year 16 | datapackage = '../datapackage.json' 17 | 18 | def search_files_in_cache(): 19 | cache_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache') 20 | 21 | if not os.path.exists(cache_dir): 22 | print("Cache folder does not exist!") 23 | return None 24 | 25 | filtered_files = [f for f in os.listdir(cache_dir) if 'metadata' not in f.lower()] 26 | 27 | if not filtered_files: 28 | print("No valid files found in cache.") 29 | return None 30 | 31 | return filtered_files 32 | 33 | 34 | def transform_csv(dest): 35 | with open(dest, 'r') as f: 36 | reader = csv.reader(f) 37 | next(reader) 38 | next(reader) 39 | updated_date_row = next(reader) 40 | last_updated_date = updated_date_row[1].split("-") 41 | last_updated_date = f"{last_updated_date[0]}-{last_updated_date[1]}-{last_updated_date[2]}" 42 | 43 | next(reader) 44 | header = next(reader) 45 | transformed_data = [] 46 | for row in reader: 47 | country_name = row[0] 48 | country_code = row[1] 49 | 50 | for i in range(4, len(row)): 51 | year = header[i] 52 | value = row[i] 53 | 54 | if value: 55 | transformed_data.append([country_name, country_code, year, value]) 56 | 57 | return last_updated_date, transformed_data 58 | 59 | def update_datapackage(last_updated): 60 | with open(datapackage, 'r') as f: 61 | dp = json.load(f) 62 | 63 | dp['last_updated'] = str(last_updated) 64 | dp['version'] = str(current_year) 65 | 66 | with open(datapackage, 'w') as f: 67 | json.dump(dp, f, indent=2) 68 | 69 | def extract_zip(): 70 | cache_dir = os.path.join(script_dir, cache) 71 | 72 | if not os.path.exists(cache_dir): 73 | os.makedirs(cache_dir) 74 | 75 | try: 76 | r = requests.get(url, stream=True) 77 | r.raise_for_status() 78 | z = zipfile.ZipFile(io.BytesIO(r.content)) 79 | z.extractall(path=cache_dir) 80 | except requests.exceptions.RequestException as e: 81 | print(f"Error downloading the file: {e}") 82 | return 83 | 84 | 85 | def process(): 86 | extract_zip() # Ensure ZIP extraction happens first 87 | file_name = search_files_in_cache()[0] 88 | dest = os.path.join(script_dir, cache, file_name) 89 | last_updated, transformed_data = transform_csv(dest) 90 | 91 | output_dir = os.path.dirname('data/') 92 | if not os.path.exists(output_dir): 93 | os.makedirs(output_dir) 94 | 95 | with open(os.path.abspath(data), 'w', newline='') as outfile: 96 | writer = csv.writer(outfile) 97 | writer.writerow(outheadings) 98 | writer.writerows(transformed_data) 99 | 100 | update_datapackage(last_updated) 101 | 102 | if __name__ == '__main__': 103 | process() 104 | --------------------------------------------------------------------------------