├── .gitignore
├── scripts
    ├── requirements.txt
    └── process.py
├── README.md
├── datapackage.json
└── .github
    └── workflows
        └── actions.yml


/.gitignore:
--------------------------------------------------------------------------------
1 | .*.swp
2 | cache
3 | 


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.32.3
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a className="gh-badge" href="https://datahub.io/core/gdp"><img src="https://badgen.net/badge/icon/View%20on%20datahub.io/orange?icon=https://datahub.io/datahub-cube-badge-icon.svg&label&scale=1.25" alt="badge" /></a>
 2 | 
 3 | Country, regional and world GDP in current US Dollars ($). Regional means
 4 | collections of countries e.g. Europe & Central Asia.
 5 | 
 6 | ## Data
 7 | 
 8 | The data is sourced from the World Bank (specifically [this dataset][current]) which
 9 | in turn lists as sources: *World Bank national accounts data, and OECD National
10 | Accounts data files*.
11 | 
12 | Note that there are a variety of different GDP indicators on offer from the
13 | World Bank including:
14 | 
15 | * [GDP in current USD][current]
16 | * [GDP in constant USD (2000)][constant]
17 | * [GDP, PPP (constant 2005 international $)][ppp]
18 | * [GDP (constant LCU)][lcu]
19 | 
20 | [constant]: http://data.worldbank.org/indicator/NY.GDP.MKTP.KD
21 | [current]: http://data.worldbank.org/indicator/NY.GDP.MKTP.CD
22 | [ppp]: http://data.worldbank.org/indicator/NY.GDP.MKTP.PP.KD
23 | [lcu]: http://data.worldbank.org/indicator/NY.GDP.MKTP.KN
24 | 
25 | 
26 | ## Preparation
27 | 
28 | Process is recorded and automated in python script:
29 | 
30 | ```
31 | scripts/process.py
32 | ```
33 | 
34 | ## Automation
35 | 
36 | Up-to-date (auto-updates every month) gdp dataset could be found on the datahub.io:
37 | https://datahub.io/core/gdp
38 | 
39 | ## License
40 | 
41 | This Data Package is made available under the Public Domain Dedication and License v1.0 whose full text can be found at: http://www.opendatacommons.org/licenses/pddl/1.0/
42 | 


--------------------------------------------------------------------------------
/datapackage.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Country, regional and world GDP in current US Dollars ($). Regional means collections of countries e.g. Europe & Central Asia. Data is sourced from the World Bank and turned into a standard normalized CSV.",
 3 |   "image": "http://assets.okfn.org/p/opendatahandbook/img/data-wrench.png",
 4 |   "keywords": [
 5 |     "GDP",
 6 |     "World",
 7 |     "Gross Domestic Product",
 8 |     "Time series"
 9 |   ],
10 |   "last_updated": "2025-10-07",
11 |   "licenses": [
12 |     {
13 |       "name": "ODC-PDDL-1.0",
14 |       "path": "http://opendatacommons.org/licenses/pddl/",
15 |       "title": "Open Data Commons Public Domain Dedication and License v1.0"
16 |     }
17 |   ],
18 |   "name": "gdp",
19 |   "resources": [
20 |     {
21 |       "name": "gdp",
22 |       "path": "data/gdp.csv",
23 |       "schema": {
24 |         "fields": [
25 |           {
26 |             "name": "Country Name",
27 |             "type": "string"
28 |           },
29 |           {
30 |             "name": "Country Code",
31 |             "type": "string"
32 |           },
33 |           {
34 |             "name": "Year",
35 |             "type": "year"
36 |           },
37 |           {
38 |             "description": "GDP in current USD",
39 |             "name": "Value",
40 |             "type": "number"
41 |           }
42 |         ]
43 |       }
44 |     }
45 |   ],
46 |   "sources": [
47 |     {
48 |       "name": "World Bank and OECD",
49 |       "path": "http://data.worldbank.org/indicator/NY.GDP.MKTP.CD",
50 |       "title": "World Bank and OECD"
51 |     }
52 |   ],
53 |   "title": "Country, Regional and World GDP (Gross Domestic Product)",
54 |   "version": "2025",
55 |   "collection": "economic-data"
56 | }


--------------------------------------------------------------------------------
/.github/workflows/actions.yml:
--------------------------------------------------------------------------------
 1 | name: Update gdp data
 2 | 
 3 | on:
 4 |   # Schedule to run on the 1st day of each month
 5 |   schedule:
 6 |     - cron: '0 0 1 * *'
 7 | 
 8 |   # Run on push to main branch
 9 |   push:
10 |     branches:
11 |       - main
12 | 
13 |   # Run on pull requests targeting the main branch
14 |   pull_request:
15 |     branches:
16 |       - main
17 | 
18 |   # Allows manual triggering of the workflow
19 |   workflow_dispatch: 
20 | 
21 | jobs:
22 |   build:
23 |     runs-on: ubuntu-latest
24 | 
25 |     if: github.ref == 'refs/heads/main'
26 | 
27 |     steps:
28 |     - name: Check out repository
29 |       uses: actions/checkout@v3
30 | 
31 |     - name: Set up Python 3.12
32 |       uses: actions/setup-python@v4
33 |       with:
34 |         python-version: '3.12'
35 | 
36 |     - name: Install Python dependencies
37 |       run: |
38 |           python -m venv venv
39 |           source venv/bin/activate
40 |           pip install --upgrade pip
41 |           pip install -r scripts/requirements.txt
42 | 
43 |     - name: Run scripts
44 |       run: |
45 |         source venv/bin/activate
46 |         cd scripts
47 |         python process.py
48 | 
49 |     - name: Configure Git
50 |       run: |
51 |         git config --global user.email "${{ env.CI_COMMIT_EMAIL }}"
52 |         git config --global user.name "${{ env.CI_COMMIT_NAME }}"
53 |       env:
54 |         CI_COMMIT_NAME: "Automated commit"
55 |         CI_COMMIT_EMAIL: "actions@users.noreply.github.com"
56 | 
57 |     - name: Commit and Push changes
58 |       run: |
59 |         git diff --quiet && echo "No changes to commit" || (
60 |           git add data datapackage.json &&
61 |           git commit -m "${{ env.CI_COMMIT_MESSAGE }}" &&
62 |           git push origin main
63 |         )
64 |       env:
65 |         CI_COMMIT_MESSAGE: "Automated commit"
66 | 


--------------------------------------------------------------------------------
/scripts/process.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import io
  3 | import csv
  4 | import json
  5 | import zipfile
  6 | import requests
  7 | 
  8 | from datetime import datetime, timedelta
  9 | 
 10 | cache = 'cache'
 11 | data = 'data/gdp.csv'
 12 | script_dir = os.path.dirname(os.path.abspath(__file__))
 13 | url = 'https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv'
 14 | outheadings = ['Country Name', 'Country Code', 'Year', 'Value']
 15 | current_year = datetime.now().year
 16 | datapackage = '../datapackage.json'
 17 | 
 18 | def search_files_in_cache():
 19 |     cache_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache')
 20 | 
 21 |     if not os.path.exists(cache_dir):
 22 |         print("Cache folder does not exist!")
 23 |         return None
 24 | 
 25 |     filtered_files = [f for f in os.listdir(cache_dir) if 'metadata' not in f.lower()]
 26 | 
 27 |     if not filtered_files:
 28 |         print("No valid files found in cache.")
 29 |         return None
 30 | 
 31 |     return filtered_files
 32 | 
 33 | 
 34 | def transform_csv(dest):
 35 |     with open(dest, 'r') as f:
 36 |         reader = csv.reader(f)
 37 |         next(reader)
 38 |         next(reader)
 39 |         updated_date_row = next(reader)
 40 |         last_updated_date = updated_date_row[1].split("-")
 41 |         last_updated_date = f"{last_updated_date[0]}-{last_updated_date[1]}-{last_updated_date[2]}"
 42 | 
 43 |         next(reader) 
 44 |         header = next(reader)
 45 |         transformed_data = []
 46 |         for row in reader:
 47 |             country_name = row[0]  
 48 |             country_code = row[1]  
 49 | 
 50 |             for i in range(4, len(row)):
 51 |                 year = header[i]
 52 |                 value = row[i]
 53 | 
 54 |                 if value:
 55 |                     transformed_data.append([country_name, country_code, year, value])
 56 |     
 57 |     return last_updated_date, transformed_data
 58 | 
 59 | def update_datapackage(last_updated):
 60 |     with open(datapackage, 'r') as f:
 61 |         dp = json.load(f)
 62 | 
 63 |     dp['last_updated'] = str(last_updated)
 64 |     dp['version'] = str(current_year)
 65 | 
 66 |     with open(datapackage, 'w') as f:
 67 |         json.dump(dp, f, indent=2)
 68 | 
 69 | def extract_zip():
 70 |     cache_dir = os.path.join(script_dir, cache)
 71 | 
 72 |     if not os.path.exists(cache_dir):
 73 |         os.makedirs(cache_dir)
 74 | 
 75 |     try:
 76 |         r = requests.get(url, stream=True)
 77 |         r.raise_for_status()
 78 |         z = zipfile.ZipFile(io.BytesIO(r.content))
 79 |         z.extractall(path=cache_dir)
 80 |     except requests.exceptions.RequestException as e:
 81 |         print(f"Error downloading the file: {e}")
 82 |         return
 83 | 
 84 | 
 85 | def process():
 86 |     extract_zip()  # Ensure ZIP extraction happens first
 87 |     file_name = search_files_in_cache()[0]
 88 |     dest = os.path.join(script_dir, cache, file_name)
 89 |     last_updated, transformed_data = transform_csv(dest)
 90 | 
 91 |     output_dir = os.path.dirname('data/')
 92 |     if not os.path.exists(output_dir):
 93 |         os.makedirs(output_dir)
 94 | 
 95 |     with open(os.path.abspath(data), 'w', newline='') as outfile:
 96 |         writer = csv.writer(outfile)
 97 |         writer.writerow(outheadings)
 98 |         writer.writerows(transformed_data)
 99 | 
100 |     update_datapackage(last_updated)
101 |     
102 | if __name__ == '__main__':
103 |     process()
104 | 


--------------------------------------------------------------------------------