├── .dockerignore
├── .env.travis
├── .gitattributes
├── .github
    └── workflows
    │   ├── dev.yml
    │   ├── release.yml
    │   └── staging.yml
├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.yml
├── manage.py
├── requirements.txt
├── rorapi
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── countries.txt
    │   ├── create_update.py
    │   ├── csv_bulk.py
    │   ├── csv_create.py
    │   ├── csv_update.py
    │   ├── csv_utils.py
    │   ├── es_utils.py
    │   ├── features.py
    │   ├── matching.py
    │   ├── models.py
    │   ├── queries.py
    │   ├── record_utils.py
    │   ├── serializers.py
    │   ├── urls.py
    │   └── views.py
    ├── management
    │   └── commands
    │   │   ├── __init__.py
    │   │   ├── createindex.py
    │   │   ├── deleteindex.py
    │   │   ├── generaterorid.py
    │   │   ├── getrordump.py
    │   │   ├── indexror.py
    │   │   ├── indexrordump.py
    │   │   ├── legacyconvertgrid.py
    │   │   ├── legacydownloadgrid.py
    │   │   ├── legacyindexgrid.py
    │   │   ├── legacyseeschema.py
    │   │   ├── legacyupgrade.py
    │   │   └── setup.py
    ├── migrations
    │   ├── 0001_create_client_model.py
    │   ├── 0002_auto_20250326_1054.py
    │   ├── 0003_auto_20250415_1207.py
    │   └── __init__.py
    ├── settings.py
    ├── tests
    │   ├── __init__.py
    │   ├── tests_functional
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── dataset_affiliations.json
    │   │   │   └── dataset_names.json
    │   │   ├── evaluation.py
    │   │   ├── tests_matching_v1.py
    │   │   ├── tests_matching_v2.py
    │   │   ├── tests_search_v1.py
    │   │   └── tests_search_v2.py
    │   ├── tests_integration
    │   │   ├── __init__.py
    │   │   ├── tests.py
    │   │   ├── tests_matching_v1.py
    │   │   ├── tests_matching_v2.py
    │   │   ├── tests_search_v1.py
    │   │   ├── tests_search_v2.py
    │   │   ├── tests_v1.py
    │   │   └── tests_v2.py
    │   └── tests_unit
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       ├── test_data_address.json
    │   │       ├── test_data_address_empty.json
    │   │       ├── test_data_create_valid.json
    │   │       ├── test_data_empty_es7.json
    │   │       ├── test_data_new_record_invalid_v2.json
    │   │       ├── test_data_new_record_valid_v2.json
    │   │       ├── test_data_retrieve_es7.json
    │   │       ├── test_data_retrieve_es7_v2.json
    │   │       ├── test_data_search_es7.json
    │   │       ├── test_data_search_es7_v2.json
    │   │       ├── test_update_valid.json
    │   │       └── test_upload_csv.csv
    │   │   ├── tests_client.py
    │   │   ├── tests_es_utils_v1.py
    │   │   ├── tests_es_utils_v2.py
    │   │   ├── tests_generaterorid_v1.py
    │   │   ├── tests_generaterorid_v2.py
    │   │   ├── tests_matching_v1.py
    │   │   ├── tests_matching_v2.py
    │   │   ├── tests_models_common.py
    │   │   ├── tests_models_v1.py
    │   │   ├── tests_models_v2.py
    │   │   ├── tests_queries_v1.py
    │   │   ├── tests_queries_v2.py
    │   │   ├── tests_views_v1.py
    │   │   ├── tests_views_v2.py
    │   │   └── utils.py
    ├── v1
    │   ├── __init__.py
    │   ├── index_template_es7.json
    │   ├── models.py
    │   └── serializers.py
    ├── v2
    │   ├── __init__.py
    │   ├── index_template_es7.json
    │   ├── models.py
    │   ├── record_constants.py
    │   ├── record_template.json
    │   ├── serializers.py
    │   └── tests.py
    └── wsgi.py
└── vendor
    └── docker
        ├── 00_app_env.conf
        ├── 10_ssh.sh
        ├── _ror-api-dev.auto.tfvars.tmpl
        ├── _ror-api.auto.tfvars.tmpl
        ├── ntp.conf
        └── webapp.conf


/.dockerignore:
--------------------------------------------------------------------------------
1 | .env
2 | .env.*
3 | 


--------------------------------------------------------------------------------
/.env.travis:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/.env.travis


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Override core.autocrlf and set line endings to *nix
2 | * text eol=lf


--------------------------------------------------------------------------------
/.github/workflows/dev.yml:
--------------------------------------------------------------------------------
  1 | name: Deploy Dev
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - "dev"
  6 |     paths-ignore:
  7 |       - '**docker-compose.yml'
  8 |       - ./github/*
  9 | jobs:
 10 |   test:
 11 |     runs-on: ubuntu-latest
 12 |     env:
 13 |       ELASTIC7_HOST: "localhost"
 14 |       ELASTIC7_PORT: "9200"
 15 |       ELASTIC_PASSWORD: "changeme"
 16 |       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 17 |       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 18 |       AWS_REGION: ${{ secrets.AWS_REGION }}
 19 |       GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
 20 |       LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_DEV }}
 21 |       DB_HOST: 127.0.0.1 # Will not work with 'localhost', since that will try a Unix socket connection (!)
 22 |     services:
 23 |       elasticsearch7:
 24 |         image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0
 25 |         env:
 26 |           discovery.type: single-node
 27 |           ES_JAVA_OPTS: -Xms512m -Xmx512m
 28 |           ELASTIC_PASSWORD: changeme
 29 |           xpack.security.enabled: "false"
 30 |           http.cors.enabled: "true"
 31 |           http.cors.allow-origin: "*"
 32 |         ports:
 33 |           - 9200:9200
 34 |       db:
 35 |         image: mysql:8.0
 36 |         env:
 37 |           MYSQL_DATABASE: "rorapi"
 38 |           MYSQL_USER: "ror_user"
 39 |           MYSQL_PASSWORD: "password"
 40 |           MYSQL_ROOT_PASSWORD: "password"
 41 |         ports:
 42 |           - 3306:3306
 43 |         options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
 44 |     steps:
 45 |       - name: Checkout ror-api code
 46 |         uses: actions/checkout@v2
 47 |         with:
 48 |           path: ror-api
 49 |       - name: Checkout ror-data-test
 50 |         uses: actions/checkout@v2
 51 |         with:
 52 |           repository: ror-community/ror-data-test
 53 |           token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
 54 |           path: ror-data-test
 55 |       - name: Get last data dump name
 56 |         working-directory: ./ror-data-test
 57 |         run: |
 58 |           FILE="$(ls -Art *.zip | tail -n 1)"
 59 |           echo ${FILE%.*}
 60 |           echo "LATEST_DUMP_FILE=${FILE%.*}" >> $GITHUB_ENV
 61 |       - name: Cache dependency
 62 |         uses: actions/cache@v4
 63 |         with:
 64 |           path: ~/.cache/pip
 65 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
 66 |           restore-keys: |
 67 |             ${{ runner.os }}-pip-
 68 |       - name: Set up Python environment
 69 |         uses: actions/setup-python@v2
 70 |         with:
 71 |           python-version: "3.8"
 72 |       - name: Install requirements
 73 |         working-directory: ./ror-api
 74 |         run: |
 75 |           python -m pip install --upgrade pip
 76 |           pip install -r requirements.txt
 77 |           pip install yapf
 78 | 
 79 |           python manage.py collectstatic --noinput
 80 |       - name: Setup
 81 |         working-directory: ./ror-api
 82 |         run: |
 83 |           python manage.py setup v1.55-2024-10-31-ror-data -t
 84 |           # Dump file temp hard coded for v2.1 release
 85 |           # Pulled from ror-data-test per settings.py config
 86 |       - name: Test
 87 |         working-directory: ./ror-api
 88 |         run: |
 89 |           python manage.py test rorapi.tests.tests_unit
 90 |           # TODO fix these tests running in GitHub Action
 91 |           # python manage.py test rorapi.tests_integration
 92 |           # python manage.py test rorapi.tests_functional
 93 | 
 94 |   build:
 95 |     needs: test
 96 |     runs-on: ubuntu-latest
 97 |     steps:
 98 |       - name: Checkout
 99 |         uses: actions/checkout@v2
100 |       - name: Set up Docker Buildx
101 |         uses: docker/setup-buildx-action@v1
102 |       - name: Cache Docker layers
103 |         uses: actions/cache@v4
104 |         with:
105 |           path: /tmp/.buildx-cache
106 |           key: ${{ runner.os }}-buildx-${{ github.sha }}
107 |           restore-keys: |
108 |             ${{ runner.os }}-buildx-
109 |       - name: Login to DockerHub
110 |         uses: docker/login-action@v1
111 |         with:
112 |           username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }}
113 |           password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }}
114 |       - name: Build and push
115 |         uses: docker/build-push-action@v2
116 |         with:
117 |           context: .
118 |           file: ./Dockerfile
119 |           push: true
120 |           tags: rorcommunity/ror-api:dev
121 |           cache-from: type=local,src=/tmp/.buildx-cache
122 |           cache-to: type=local,dest=/tmp/.buildx-cache
123 | 
124 |   deploy:
125 |     needs: [test, build]
126 |     runs-on: ubuntu-latest
127 |     env:
128 |       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
129 |     steps:
130 |       - name: Checkout
131 |         uses: actions/checkout@v2
132 |         with:
133 |           ref: ${{ github.event.pull_request.head.sha }}
134 |       - name: Extract variables
135 |         shell: bash
136 |         run: |
137 |           echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')"
138 |           echo "::set-output name=TAG::$(git tag --points-at HEAD)"
139 |           echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)"
140 |           echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)"
141 |           echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})"
142 |         id: extract_variables
143 | 
144 |       - name: Checkout terraform config repo
145 |         uses: actions/checkout@v2
146 |         with:
147 |           # public repo with terraform configuration
148 |           repository: "ror-community/new-deployment"
149 |           persist-credentials: false
150 |       - name: Commit changes to terraform config repository
151 |         # use go template in terraform config repository to update git sha and tag
152 |         # commit and push changes to trigger terraform workflow
153 |         run: |
154 |           export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}
155 |           export GIT_TAG=${{ steps.extract_variables.outputs.GIT_TAG }}
156 |           wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz
157 |           tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz
158 |           rm dockerize-linux-amd64-v0.6.0.tar.gz
159 |           ./dockerize -template ror/services/api/_ror-api-dev.auto.tfvars.tmpl:ror/services/api/_ror-api-dev.auto.tfvars
160 | 
161 |           git config --local user.email "action@github.com"
162 |           git config --local user.name "GitHub Action"
163 |           git add ror/services/api/_ror-api-dev.auto.tfvars
164 |           git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}"
165 |       - name: Push changes
166 |         uses: ad-m/github-push-action@v0.6.0
167 |         with:
168 |           github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
169 |           repository: "ror-community/new-deployment"
170 |           branch: "refs/heads/master"
171 |           tags: false
172 | 
173 |       - name: Notify Slack
174 |         uses: edge/simple-slack-notify@master
175 |         with:
176 |           channel: "#deployment-updates"
177 |           color: "good"
178 |           text: "A new version of the <https://api.dev.ror.org|REST API> is deployed to dev."
179 |           failure_text: "${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed"
180 |           fields: |
181 |             [{ "title": "Commit message", "value": "${{ steps.extract_variables.outputs.MESSAGE }}" },
182 |              { "title": "Committed by", "value": "<https://github.com/${{ github.repository }}/commits?author=${{ github.actor }}|${{ github.actor }}>", "short": true },
183 |              { "title": "Commit SHA", "value": "<https://github.com/${{ github.repository }}/commit/${{ steps.extract_variables.outputs.GIT_SHA }}|${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}>", "short": true },
184 |              { "title": "Repository", "value": "<https://github.com/${{ github.repository }}|${{ github.repository }}>", "short": true },
185 |              { "title": "Branch", "value": "<https://github.com/${{ github.repository }}/tree/${{ steps.extract_variables.outputs.BRANCH }}|${{ steps.extract_variables.outputs.BRANCH }}>", "short": true }]


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
  1 | name: Release
  2 | on:
  3 |   release:
  4 |     types: [published]
  5 | jobs:
  6 |   test:
  7 |     runs-on: ubuntu-latest
  8 |     env:
  9 |       ELASTIC_PASSWORD: "changeme"
 10 |       ELASTIC7_HOST: "localhost"
 11 |       ELASTIC7_PORT: "9200"
 12 |       DB_HOST: 127.0.0.1
 13 |       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 14 |       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 15 |       AWS_REGION: ${{ secrets.AWS_REGION }}
 16 |       GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
 17 |       LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_PROD}}
 18 |     services:
 19 |       elasticsearch7:
 20 |         image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0
 21 |         env:
 22 |           discovery.type: single-node
 23 |           ES_JAVA_OPTS: -Xms512m -Xmx512m
 24 |           ELASTIC_PASSWORD: changeme
 25 |           xpack.security.enabled: "false"
 26 |           http.cors.enabled: "true"
 27 |           http.cors.allow-origin: "*"
 28 |         ports:
 29 |           - 9200:9200
 30 |       db:
 31 |         image: mysql:8.0
 32 |         env:
 33 |           MYSQL_DATABASE: "rorapi"
 34 |           MYSQL_USER: "ror_user"
 35 |           MYSQL_PASSWORD: "password"
 36 |           MYSQL_ROOT_PASSWORD: "password"
 37 |         ports:
 38 |           - 3306:3306
 39 |         options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
 40 |     steps:
 41 |       - name: Checkout ror-api code
 42 |         uses: actions/checkout@v2
 43 |         with:
 44 |           path: ror-api
 45 |       - name: Cache dependency
 46 |         uses: actions/cache@v4
 47 |         with:
 48 |           path: ~/.cache/pip
 49 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
 50 |           restore-keys: |
 51 |             ${{ runner.os }}-pip-
 52 |       - name: Set up Python environment
 53 |         uses: actions/setup-python@v2
 54 |         with:
 55 |           python-version: "3.8"
 56 |       - name: Install requirements
 57 |         working-directory: ./ror-api
 58 |         run: |
 59 |           python -m pip install --upgrade pip
 60 |           pip install -r requirements.txt
 61 |           pip install yapf
 62 | 
 63 |           python manage.py collectstatic --noinput
 64 |       - name: Setup
 65 |         working-directory: ./ror-api
 66 |         run: |
 67 |           python manage.py setup v1.55-2024-10-31-ror-data -t
 68 |           # temp hard-coded dump file for v2.1 release
 69 |       - name: Test
 70 |         working-directory: ./ror-api
 71 |         run: |
 72 |           python manage.py test rorapi.tests.tests_unit
 73 |           # TODO fix these tests running in GitHub Action
 74 |           # python manage.py test rorapi.tests_integration
 75 |           # python manage.py test rorapi.tests_functional
 76 | 
 77 |   build:
 78 |     needs: test
 79 |     runs-on: ubuntu-latest
 80 |     steps:
 81 |       - name: Checkout
 82 |         uses: actions/checkout@v2
 83 |       - name: Set up Docker Buildx
 84 |         uses: docker/setup-buildx-action@v1
 85 |       - name: Cache Docker layers
 86 |         uses: actions/cache@v4
 87 |         with:
 88 |           path: /tmp/.buildx-cache
 89 |           key: ${{ runner.os }}-buildx-${{ github.sha }}
 90 |           restore-keys: |
 91 |             ${{ runner.os }}-buildx-
 92 |       - name: Login to DockerHub
 93 |         uses: docker/login-action@v1
 94 |         with:
 95 |           username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }}
 96 |           password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }}
 97 |       - name: Get git tag
 98 |         run: |
 99 |           echo "::set-output name=GIT_TAG::$(git tag --points-at HEAD)"
100 |         id: set_git_vars
101 |       - name: Build and push
102 |         uses: docker/build-push-action@v2
103 |         with:
104 |           context: .
105 |           file: ./Dockerfile
106 |           push: true
107 |           tags: rorcommunity/ror-api:${{ steps.set_git_vars.outputs.GIT_TAG }}
108 |           cache-from: type=local,src=/tmp/.buildx-cache
109 |           cache-to: type=local,dest=/tmp/.buildx-cache
110 | 
111 |   deploy:
112 |     needs: [test, build]
113 |     runs-on: ubuntu-latest
114 |     env:
115 |       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
116 |     steps:
117 |       - name: Checkout
118 |         uses: actions/checkout@v2
119 | 
120 |       - name: Extract variables
121 |         shell: bash
122 |         run: |
123 |           echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')"
124 |           echo "::set-output name=TAG::$(git tag --points-at HEAD)"
125 |           echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)"
126 |           echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)"
127 |         id: extract_variables
128 | 
129 |       - name: Checkout terraform config repo
130 |         uses: actions/checkout@v2
131 |         with:
132 |           # public repo with terraform configuration
133 |           repository: 'ror-community/new-deployment'
134 |           persist-credentials: false
135 |       - name: Commit changes to terraform config repository
136 |         # use go template in terraform config repository to update git sha and tag
137 |         # commit and push changes to trigger terraform workflow
138 |         run: |
139 |           export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}
140 |           export GIT_TAG=${{ steps.extract_variables.outputs.TAG }}
141 |           wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz
142 |           tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz
143 |           rm dockerize-linux-amd64-v0.6.0.tar.gz
144 |           ./dockerize -template ror/services/api/_ror-api.auto.tfvars.tmpl:ror/services/api/_ror-api.auto.tfvars
145 | 
146 |           git config --local user.email "action@github.com"
147 |           git config --local user.name "GitHub Action"
148 |           git add ror/services/api/_ror-api.auto.tfvars
149 |           git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}"
150 |       - name: Push changes
151 |         uses: ad-m/github-push-action@v0.6.0
152 |         with:
153 |           github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
154 |           repository: 'ror-community/new-deployment'
155 |           branch: 'refs/heads/master'
156 |           tags: false
157 | 
158 |       - name: Notify Slack
159 |         uses: edge/simple-slack-notify@master
160 |         with:
161 |           channel: '#deployment-updates'
162 |           color: 'good'
163 |           text: 'A new version of the <https://api.ror.org|REST API> is deployed to production.'
164 |           failure_text: '${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed'
165 |           fields: |
166 |             [{ "title": "Committed by", "value": "<https://github.com/${{ github.repository }}/commits?author=${{ github.actor }}|${{ github.actor }}>", "short": true },
167 |              { "title": "Commit SHA", "value": "<https://github.com/${{ github.repository }}/commit/${{ steps.extract_variables.outputs.GIT_SHA }}|${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}>", "short": true },
168 |              { "title": "Repository", "value": "<https://github.com/${{ github.repository }}|${{ github.repository }}>", "short": true },
169 |              { "title": "Release", "value": "<https://github.com/${{ github.repository }}/releases/tag/${{ steps.extract_variables.outputs.TAG }}|${{ steps.extract_variables.outputs.TAG }}>", "short": true }]
170 | 


--------------------------------------------------------------------------------
/.github/workflows/staging.yml:
--------------------------------------------------------------------------------
  1 | name: Deploy Staging
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - "staging"
  6 | jobs:
  7 |   test:
  8 |     runs-on: ubuntu-latest
  9 |     env:
 10 |       ELASTIC_PASSWORD: "changeme"
 11 |       ELASTIC7_HOST: "localhost"
 12 |       ELASTIC7_PORT: "9200"
 13 |       DB_HOST: 127.0.0.1
 14 |       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 15 |       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 16 |       AWS_REGION: ${{ secrets.AWS_REGION }}
 17 |       GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
 18 |       LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_STAGING }}
 19 |     services:
 20 |       elasticsearch7:
 21 |         image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0
 22 |         env:
 23 |           discovery.type: single-node
 24 |           ES_JAVA_OPTS: -Xms512m -Xmx512m
 25 |           ELASTIC_PASSWORD: changeme
 26 |           xpack.security.enabled: "false"
 27 |           http.cors.enabled: "true"
 28 |           http.cors.allow-origin: "*"
 29 |         ports:
 30 |           - 9200:9200
 31 |       db:
 32 |         image: mysql:8.0
 33 |         env:
 34 |           MYSQL_DATABASE: "rorapi"
 35 |           MYSQL_USER: "ror_user"
 36 |           MYSQL_PASSWORD: "password"
 37 |           MYSQL_ROOT_PASSWORD: "password"
 38 |         ports:
 39 |           - 3306:3306
 40 |         options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
 41 |     steps:
 42 |       - name: Checkout ror-api code
 43 |         uses: actions/checkout@v2
 44 |         with:
 45 |           path: ror-api
 46 |       - name: Cache dependency
 47 |         uses: actions/cache@v4
 48 |         with:
 49 |           path: ~/.cache/pip
 50 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
 51 |           restore-keys: |
 52 |             ${{ runner.os }}-pip-
 53 |       - name: Set up Python environment
 54 |         uses: actions/setup-python@v2
 55 |         with:
 56 |           python-version: "3.8"
 57 |       - name: Install requirements
 58 |         working-directory: ./ror-api
 59 |         run: |
 60 |           python -m pip install --upgrade pip
 61 |           pip install -r requirements.txt
 62 |           pip install yapf
 63 | 
 64 |           python manage.py collectstatic --noinput
 65 |       - name: Setup
 66 |         working-directory: ./ror-api
 67 |         run: |
 68 |           python manage.py setup v1.55-2024-10-31-ror-data -t
 69 |           # temp hard-coded dump file for v2.1 release
 70 |       - name: Test
 71 |         working-directory: ./ror-api
 72 |         run: |
 73 |           python manage.py test rorapi.tests.tests_unit
 74 |           # TODO fix these tests running in GitHub Action
 75 |           # python manage.py test rorapi.tests_integration
 76 |           # python manage.py test rorapi.tests_functional
 77 | 
 78 |   build:
 79 |     needs: test
 80 |     runs-on: ubuntu-latest
 81 |     steps:
 82 |       - name: Checkout
 83 |         uses: actions/checkout@v2
 84 |       - name: Set up Docker Buildx
 85 |         uses: docker/setup-buildx-action@v1
 86 |       - name: Cache Docker layers
 87 |         uses: actions/cache@v4
 88 |         with:
 89 |           path: /tmp/.buildx-cache
 90 |           key: ${{ runner.os }}-buildx-${{ github.sha }}
 91 |           restore-keys: |
 92 |             ${{ runner.os }}-buildx-
 93 |       - name: Login to DockerHub
 94 |         uses: docker/login-action@v1
 95 |         with:
 96 |           username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }}
 97 |           password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }}
 98 |       - name: Build and push
 99 |         uses: docker/build-push-action@v2
100 |         with:
101 |           context: .
102 |           file: ./Dockerfile
103 |           push: true
104 |           tags: rorcommunity/ror-api:staging
105 |           cache-from: type=local,src=/tmp/.buildx-cache
106 |           cache-to: type=local,dest=/tmp/.buildx-cache
107 | 
108 |   deploy:
109 |     needs: [test, build]
110 |     runs-on: ubuntu-latest
111 |     env:
112 |       SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
113 |     steps:
114 |       - name: Checkout
115 |         uses: actions/checkout@v2
116 |         with:
117 |           ref: ${{ github.event.pull_request.head.sha }}
118 |       - name: Extract variables
119 |         shell: bash
120 |         run: |
121 |           echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')"
122 |           echo "::set-output name=TAG::$(git tag --points-at HEAD)"
123 |           echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)"
124 |           echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)"
125 |           echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})"
126 |         id: extract_variables
127 | 
128 |       - name: Checkout terraform config repo
129 |         uses: actions/checkout@v2
130 |         with:
131 |           # public repo with terraform configuration
132 |           repository: 'ror-community/new-deployment'
133 |           persist-credentials: false
134 |       - name: Commit changes to terraform config repository
135 |         # use go template in terraform config repository to update git sha and tag
136 |         # commit and push changes to trigger terraform workflow
137 |         run: |
138 |           export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}
139 |           export GIT_TAG=${{ steps.extract_variables.outputs.GIT_TAG }}
140 |           wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz
141 |           tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz
142 |           rm dockerize-linux-amd64-v0.6.0.tar.gz
143 |           ./dockerize -template ror/services/api/_ror-api-staging.auto.tfvars.tmpl:ror/services/api/_ror-api-staging.auto.tfvars
144 | 
145 |           git config --local user.email "action@github.com"
146 |           git config --local user.name "GitHub Action"
147 |           git add ror/services/api/_ror-api-staging.auto.tfvars
148 |           git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}"
149 |       - name: Push changes
150 |         uses: ad-m/github-push-action@v0.6.0
151 |         with:
152 |           github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
153 |           repository: 'ror-community/new-deployment'
154 |           branch: 'refs/heads/master'
155 |           tags: false
156 | 
157 |       - name: Notify Slack
158 |         uses: edge/simple-slack-notify@master
159 |         with:
160 |           channel: '#deployment-updates'
161 |           color: 'good'
162 |           text: 'A new version of the <https://api.staging.ror.org|REST API> is deployed to staging.'
163 |           failure_text: '${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed'
164 |           fields: |
165 |             [{ "title": "Commit message", "value": "${{ steps.extract_variables.outputs.MESSAGE }}" },
166 |              { "title": "Committed by", "value": "<https://github.com/${{ github.repository }}/commits?author=${{ github.actor }}|${{ github.actor }}>", "short": true },
167 |              { "title": "Commit SHA", "value": "<https://github.com/${{ github.repository }}/commit/${{ steps.extract_variables.outputs.GIT_SHA }}|${{ steps.extract_variables.outputs.GIT_SHA_SHORT }}>", "short": true },
168 |              { "title": "Repository", "value": "<https://github.com/${{ github.repository }}|${{ github.repository }}>", "short": true },
169 |              { "title": "Branch", "value": "<https://github.com/${{ github.repository }}/tree/${{ steps.extract_variables.outputs.BRANCH }}|${{ steps.extract_variables.outputs.BRANCH }}>", "short": true }]
170 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | .env.*
 3 | !.env.example
 4 | !.env.travis
 5 | docker-compose.override.yml
 6 | __pycache__/
 7 | *.pyc
 8 | rorapi/data/**
 9 | esdata/**
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | 
 5 | sudo: required
 6 | 
 7 | services:
 8 |   - docker
 9 | 
10 | env:
11 |   - DOCKER_COMPOSE_VERSION=1.23.2
12 | 
13 | before_install:
14 |   - pip3 install yapf
15 |   - yapf -d -r .
16 | 
17 | install:
18 |   - sudo rm /usr/local/bin/docker-compose
19 |   - curl -L https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-`uname -s`-`uname -m` > docker-compose
20 |   - chmod +x docker-compose
21 |   - sudo mv docker-compose /usr/local/bin
22 |   - sudo pip install awscli
23 | 
24 | before_script:
25 |   - cp .env.travis .env
26 |   - docker-compose up -d
27 |   - sleep 120
28 |   - docker-compose exec web python manage.py createindex
29 |   - docker-compose exec web python manage.py upgrade
30 |   - docker-compose exec web python manage.py setup
31 | 
32 | script:
33 |   - docker-compose exec web python manage.py test rorapi.tests
34 |   - docker-compose exec web python manage.py test rorapi.tests_integration
35 |   - docker-compose exec web python manage.py test rorapi.tests_functional
36 | 
37 | after_success:
38 |   - export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY
39 |   - export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_KEY
40 |   - docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD";
41 |   - REPO=rorcommunity/ror-api;
42 |   - AUTO_DEPLOY=false;
43 |   - if [ "${TRAVIS_TAG?}" ]; then
44 |       docker build -f Dockerfile -t $REPO:$TRAVIS_TAG .;
45 |       docker push $REPO:$TRAVIS_TAG;
46 |       echo "Pushed to" $REPO:$TRAVIS_TAG;
47 |       AUTO_DEPLOY=true;
48 |     elif [[ "$TRAVIS_BRANCH" == "dev" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then
49 |       docker build -f Dockerfile -t $REPO:$TRAVIS_BRANCH .;
50 |       docker push $REPO:$TRAVIS_BRANCH;
51 |       echo "Pushed to" $REPO:$TRAVIS_BRANCH;
52 |       AUTO_DEPLOY=true;
53 |     else
54 |       docker build -f Dockerfile -t $REPO:$TRAVIS_BRANCH .;
55 |       docker push $REPO:$TRAVIS_BRANCH;
56 |       echo "Pushed to" $REPO:$TRAVIS_BRANCH;
57 |     fi
58 | 
59 |   - if [ "$AUTO_DEPLOY" == "true" ]; then
60 |       wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz;
61 |       tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz;
62 |       rm dockerize-linux-amd64-v0.6.0.tar.gz;
63 |       export GIT_SHA=$(git rev-parse --short HEAD);
64 |       export GIT_TAG=$(git describe --tags $(git rev-list --tags --max-count=1));
65 |       git clone "https://${TRAVIS_SECURE_TOKEN}@github.com/ror-community/new-deployment.git";
66 | 
67 |       sentry-cli releases new ror-api:${GIT_TAG} --finalize --project ror-api;
68 | 
69 |       if [ "${TRAVIS_TAG?}" ]; then
70 |         ./dockerize -template vendor/docker/_ror-api.auto.tfvars.tmpl:new-deployment/ror/services/api/_ror-api.auto.tfvars;
71 |         sentry-cli releases deploys ror-ap:${GIT_TAG} new -e production;
72 |       else
73 |         ./dockerize -template vendor/docker/_ror-api-dev.auto.tfvars.tmpl:new-deployment/ror/services/api/_ror-api-dev.auto.tfvars;
74 |         sentry-cli releases deploys ror-api:${GIT_TAG} new -e dev;
75 |       fi
76 | 
77 |       sentry-cli releases set-commits --auto ror-api:${GIT_TAG};
78 | 
79 |       cd new-deployment;
80 |       git remote;
81 |       git config user.email ${DOCKER_EMAIL};
82 |       git config user.name ${DOCKER_USERNAME};
83 | 
84 |       if [ "${TRAVIS_TAG?}" ]; then
85 |         git add ror/services/api/_ror-api.auto.tfvars;
86 |         git commit -m "Adding ror-api git variables for commit tagged ${TRAVIS_TAG?}";
87 |       else
88 |         git add ror/services/api/_ror-api-dev.auto.tfvars;
89 |         git commit -m "Adding ror-api git variables for latest commit on branch $TRAVIS_BRANCH";
90 |       fi
91 | 
92 |       git push "https://${TRAVIS_SECURE_TOKEN}@github.com/ror-community/new-deployment.git" master;
93 |     fi
94 | 
95 | notifications:
96 |   email: false
97 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM phusion/passenger-full:1.0.12
 2 | MAINTAINER Martin Fenner "mfenner@datacite.org"
 3 | 
 4 | # Set correct environment variables
 5 | ENV HOME /home/app
 6 | 
 7 | # Allow app user to read /etc/container_environment
 8 | RUN usermod -a -G docker_env app
 9 | 
10 | # Use baseimage-docker's init process
11 | CMD ["/sbin/my_init"]
12 | 
13 | # Update installed APT packages, clean up when done
14 | RUN mv /etc/apt/sources.list.d /etc/apt/sources.list.d.bak && \
15 |     apt update && apt install -y ca-certificates && \
16 |     mv /etc/apt/sources.list.d.bak /etc/apt/sources.list.d && \
17 |     apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \
18 |     apt-get clean && \
19 |     apt-get install ntp wget unzip tzdata python3-pip libmagic1 default-libmysqlclient-dev libcairo2-dev pkg-config -y && \
20 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
21 | 
22 | # Enable Passenger and Nginx and remove the default site
23 | # Preserve env variables for nginx
24 | RUN rm -f /etc/service/nginx/down && \
25 |     rm /etc/nginx/sites-enabled/default
26 | COPY vendor/docker/webapp.conf /etc/nginx/sites-enabled/webapp.conf
27 | COPY vendor/docker/00_app_env.conf /etc/nginx/conf.d/00_app_env.conf
28 | 
29 | # Use Amazon NTP servers
30 | COPY vendor/docker/ntp.conf /etc/ntp.conf
31 | 
32 | # Copy webapp folder
33 | COPY . /home/app/webapp/
34 | RUN chown -R app:app /home/app/webapp && \
35 |     chmod -R 755 /home/app/webapp
36 | 
37 | # enable SSH
38 | RUN rm -f /etc/service/sshd/down && \
39 |     /etc/my_init.d/00_regen_ssh_host_keys.sh
40 | 
41 | # install custom ssh key during startup
42 | RUN mkdir -p /etc/my_init.d
43 | COPY vendor/docker/10_ssh.sh /etc/my_init.d/10_ssh.sh
44 | 
45 | # workdir
46 | WORKDIR /home/app/webapp
47 | 
48 | # point /usr/bin/python to Python3
49 | RUN ln -s -f /usr/bin/python3 /usr/bin/python
50 | 
51 | # install Python packages
52 | RUN pip3 install --no-cache-dir --upgrade pip
53 | RUN pip3 install --no-cache-dir -r requirements.txt
54 | RUN pip3 install yapf
55 | 
56 | # collect static files for Django
57 | ENV DJANGO_SKIP_DB_CHECK=True
58 | RUN python manage.py collectstatic --noinput
59 | 
60 | # Expose web
61 | EXPOSE 80
62 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2017-2019 Crossref
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   elasticsearch7:
 5 |     image: docker.elastic.co/elasticsearch/elasticsearch:7.10.1
 6 |     ports:
 7 |       - "9200:9200"
 8 |       - "9300:9300"
 9 |     environment:
10 |       discovery.type: single-node
11 |       ES_JAVA_OPTS: -Xmx256m -Xms256m
12 |       ELASTIC_PASSWORD: changeme
13 |       xpack.security.enabled: "false"
14 |       http.cors.enabled: "true"
15 |       http.cors.allow-origin: "*"
16 |     healthcheck:
17 |       test: curl -f http://elastic:changeme@elasticsearch:8200
18 |       interval: 10s
19 |       timeout: 1s
20 |     volumes:
21 |       - ./esdata:/usr/share/elasticsearch/data
22 |   db:
23 |     image: mysql:8.0
24 |     volumes:
25 |       - mysql_data:/var/lib/mysql
26 |     env_file:
27 |       - .env
28 |     ports:
29 |       - "3306:3306"
30 |     healthcheck:
31 |       test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
32 |       timeout: 20s
33 |       retries: 10
34 |   web:
35 |     container_name: rorapiweb
36 |     env_file: .env
37 |     build: .
38 |     #image: rorcommunity/ror-api
39 |     ports:
40 |       - "9292:80"
41 |       - "2222:22"
42 |     volumes:
43 |       - ./rorapi:/home/app/webapp/rorapi
44 |     depends_on:
45 |       - elasticsearch7
46 |       - db
47 | volumes:
48 |   mysql_data:
49 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def main():
 8 |     os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rorapi.settings')
 9 |     try:
10 |         from django.core.management import execute_from_command_line
11 |     except ImportError as exc:
12 |         raise ImportError(
13 |             "Couldn't import Django. Are you sure it's installed and "
14 |             "available on your PYTHONPATH environment variable? Did you "
15 |             "forget to activate a virtual environment?") from exc
16 |     execute_from_command_line(sys.argv)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     main()
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Django==2.2.28
 2 | elasticsearch_dsl==7.4.1
 3 | geonamescache==1.3.0
 4 | requests==2.22.0
 5 | requests-aws4auth==0.9
 6 | mock==3.0.5
 7 | base32_crockford==0.3.0
 8 | elasticsearch==7.10.1
 9 | djangorestframework==3.11.2
10 | coreapi==2.3.3
11 | django-prometheus==1.0.15
12 | sentry-sdk==0.12.2
13 | python-dotenv==0.10.3
14 | django-cors-headers==3.1.0
15 | unidecode==1.1.1
16 | fuzzywuzzy==0.17.0
17 | python-Levenshtein==0.12.1
18 | statsmodels==0.10.2
19 | boto3
20 | pandas==1.4.1
21 | numpy==1.22
22 | titlecase==2.3
23 | update_address @ git+https://github.com/ror-community/update_address.git
24 | launchdarkly-server-sdk==7.6.1
25 | jsonschema==3.2.0
26 | python-magic
27 | iso639-lang
28 | mysqlclient==2.2.7
29 | bleach==6.0.0
30 | pycountry==22.3.5
31 | django-ses==3.5.0


--------------------------------------------------------------------------------
/rorapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/__init__.py


--------------------------------------------------------------------------------
/rorapi/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/common/__init__.py


--------------------------------------------------------------------------------
/rorapi/common/create_update.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from datetime import datetime
 3 | from rorapi.common.record_utils import *
 4 | import update_address as ua
 5 | from rorapi.v2.record_constants import *
 6 | from rorapi.v2.serializers import (
 7 |     OrganizationSerializer as OrganizationSerializerV2
 8 | )
 9 | from rorapi.management.commands.generaterorid import check_ror_id
10 | 
11 | V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/master/ror_schema_v2_1.json")
12 | 
13 | 
14 | def update_record(json_input, existing_record):
15 |     record = copy.deepcopy(existing_record)
16 |     for k, v in json_input.items():
17 |         record[k] = copy.deepcopy(v)
18 |     return update_last_mod(record)
19 | 
20 | def update_last_mod(record):
21 |     record['admin']['last_modified'] = copy.deepcopy(V2_LAST_MOD)
22 |     record['admin']['last_modified']['date'] = datetime.now().strftime("%Y-%m-%d")
23 |     return record
24 | 
25 | def check_optional_fields(record):
26 |     for k in V2_OPTIONAL_FIELD_DEFAULTS:
27 |         if k not in record:
28 |             return True
29 |     return False
30 | 
31 | def add_missing_optional_fields(record):
32 |     for k, v in V2_OPTIONAL_FIELD_DEFAULTS.items():
33 |         if k not in record:
34 |             record[k] = v
35 |     return record
36 | 
37 | def add_created_last_mod(record):
38 |     today = datetime.now().strftime("%Y-%m-%d")
39 |     record['admin'] = copy.deepcopy(V2_ADMIN)
40 |     record['admin']['created']['date'] = today
41 |     record['admin']['last_modified']['date'] = today
42 |     return record
43 | 
44 | def update_locations(locations):
45 |     error = None
46 |     updated_locations = []
47 |     for location in locations:
48 |         if 'geonames_id' in location:
49 |             try:
50 |                 print(location['geonames_id'])
51 |                 updated_location = ua.new_geonames_v2(str(location['geonames_id']))
52 |                 updated_locations.append(updated_location['location'])
53 |             except:
54 |                 error = "Error retrieving Geonames data for ID {}. Please check that this is a valid Geonames ID".format(location['geonames_id'])
55 |     return error, updated_locations
56 | 
57 | def sort_list_fields(v2_record):
58 |     for field in v2_record:
59 |         if field in V2_SORT_KEYS:
60 |             if V2_SORT_KEYS[field] is not None:
61 |                 sort_key = V2_SORT_KEYS[field]
62 |                 sorted_vals = sorted(v2_record[field], key=lambda x: x[sort_key])
63 |             else:
64 |                 sorted_vals = sorted(v2_record[field])
65 |             v2_record[field] = sorted_vals
66 |     return v2_record
67 | 
68 | 
69 | def new_record_from_json(json_input, version):
70 |     error = None
71 |     valid_data = None
72 |     new_record = copy.deepcopy(json_input)
73 |     if check_optional_fields(new_record):
74 |         new_record = add_missing_optional_fields(new_record)
75 |     error, updated_locations = update_locations(new_record['locations'])
76 |     if not error:
77 |         new_record['locations'] = updated_locations
78 |         new_record = add_created_last_mod(new_record)
79 |         new_ror_id = check_ror_id(version)
80 |         print("new ror id: " + new_ror_id)
81 |         new_record['id'] = new_ror_id
82 |         error, valid_data = validate_record(sort_list_fields(new_record), V2_SCHEMA)
83 |     return error, valid_data
84 | 
85 | 
86 | def update_record_from_json(new_json, existing_org):
87 |     error = None
88 |     valid_data = None
89 |     serializer = OrganizationSerializerV2(existing_org)
90 |     existing_record = serializer.data
91 |     updated_record = update_record(new_json, existing_record)
92 |     error, updated_locations = update_locations(updated_record['locations'])
93 |     if not error:
94 |         updated_record['locations'] = updated_locations
95 |         error, valid_data = validate_record(sort_list_fields(updated_record), V2_SCHEMA)
96 |     return error, valid_data
97 | 


--------------------------------------------------------------------------------
/rorapi/common/csv_bulk.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | import io
  4 | import os
  5 | import shutil
  6 | import urllib
  7 | from datetime import datetime
  8 | from rest_framework.renderers import JSONRenderer
  9 | from rorapi.settings import DATA
 10 | from rorapi.v2.serializers import (
 11 |     OrganizationSerializer as OrganizationSerializerV2
 12 | )
 13 | from rorapi.common.csv_update import update_record_from_csv
 14 | from rorapi.common.csv_create import new_record_from_csv
 15 | 
 16 | 
 17 | def save_record_file(ror_id, updated, json_obj, dir_name):
 18 |     dir_path = os.path.join(DATA['DIR'],dir_name)
 19 |     if not os.path.exists(dir_path):
 20 |         os.makedirs(dir_path)
 21 |     subdir = 'updates' if updated else 'new'
 22 |     if not os.path.exists(os.path.join(dir_path, subdir)):
 23 |         os.mkdir(os.path.join(dir_path, subdir))
 24 |     full_path = os.path.join(dir_path, subdir, ror_id.split('https://ror.org/')[1] + '.json')
 25 |     with open(full_path, "w") as outfile:
 26 |         json.dump(json_obj, outfile, ensure_ascii=False, indent=2)
 27 | 
 28 | def save_report_file(report, report_fields, csv_file, dir_name, validate_only):
 29 |     dir_path = os.path.join(DATA['DIR'],dir_name)
 30 |     if not os.path.exists(dir_path):
 31 |         os.makedirs(dir_path)
 32 |     filepath =  os.path.join(dir_path, 'report.csv')
 33 |     with open(filepath, 'w') as csvfile:
 34 |             writer = csv.DictWriter(csvfile, fieldnames=report_fields)
 35 |             writer.writeheader()
 36 |             writer.writerows(report)
 37 |     if not validate_only:
 38 |         # save copy of input file
 39 |         filepath =  os.path.join(dir_path, 'input.csv')
 40 |         csv_file.seek(0)
 41 |         with open(filepath, 'wb+') as f:
 42 |             for chunk in csv_file.chunks():
 43 |                 f.write(chunk)
 44 | 
 45 | def process_csv(csv_file, version, validate_only):
 46 |     print("Processing CSV")
 47 |     dir_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + "-ror-records"
 48 |     success_msg = None
 49 |     error = None
 50 |     report = []
 51 |     report_fields = ['row', 'html_url', 'ror_id', 'action', 'errors']
 52 |     skipped_count = 0
 53 |     updated_count = 0
 54 |     new_count = 0
 55 |     read_file = csv_file.read().decode('utf-8')
 56 |     print(read_file)
 57 |     reader = csv.DictReader(io.StringIO(read_file))
 58 |     row_num = 2
 59 |     for row in reader:
 60 |         html_url = None
 61 |         ror_id = None
 62 |         updated = False
 63 |         print("Row data")
 64 |         print(row)
 65 |         if row['html_url']:
 66 |             html_url = row['html_url']
 67 |         if row['id']:
 68 |             ror_id = row['id']
 69 |             updated = True
 70 |             row_errors, v2_record = update_record_from_csv(row, version)
 71 |         else:
 72 |             row_errors, v2_record = new_record_from_csv(row, version)
 73 |         if not row_errors:
 74 |             if updated:
 75 |                 action = 'updated'
 76 |                 updated_count += 1
 77 |             else:
 78 |                 action = 'created'
 79 |                 new_count += 1
 80 |             ror_id = v2_record['id']
 81 |             serializer = OrganizationSerializerV2(v2_record)
 82 |             json_obj = json.loads(JSONRenderer().render(serializer.data))
 83 |             print(json_obj)
 84 |             if not validate_only:
 85 |                 #create file
 86 |                 file = save_record_file(ror_id, updated, json_obj, dir_name)
 87 |         else:
 88 |             action = 'skipped'
 89 |             skipped_count += 1
 90 |         if validate_only and action == 'created':
 91 |             ror_id = None
 92 |         report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''})
 93 |         row_num += 1
 94 |     if new_count > 0 or updated_count > 0 or skipped_count > 0:
 95 |         try:
 96 |             if validate_only:
 97 |                 try:
 98 |                     save_report_file(report, report_fields, csv_file, dir_name, validate_only)
 99 |                     success_msg = os.path.join(DATA['DIR'], dir_name, 'report.csv')
100 |                 except Exception as e:
101 |                     error = f"Error creating validation report: {e}"
102 |             else:
103 |                 #create report file
104 |                 save_report_file(report, report_fields, csv_file, dir_name, validate_only)
105 |                 # create zip file
106 |                 zipfile = shutil.make_archive(os.path.join(DATA['DIR'], dir_name), 'zip', DATA['DIR'], dir_name)
107 |                 # upload to S3
108 |                 try:
109 |                     DATA['CLIENT'].upload_file(zipfile, DATA['PUBLIC_STORE'], dir_name + '.zip')
110 |                     zipfile = f"https://s3.eu-west-1.amazonaws.com/{DATA['PUBLIC_STORE']}/{urllib.parse.quote(dir_name)}.zip"
111 |                     success_msg = {"file": zipfile,
112 |                         "rows processed": new_count + updated_count + skipped_count,
113 |                         "created": new_count,
114 |                         "updated": updated_count,
115 |                         "skipped": skipped_count}
116 |                 except Exception as e:
117 |                     error = f"Error uploading zipfile to S3: {e}"
118 |         except Exception as e:
119 |             error = f"Unexpected error generating records: {e}"
120 | 
121 |     return error, success_msg


--------------------------------------------------------------------------------
/rorapi/common/csv_create.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from rorapi.common.record_utils import *
  3 | from rorapi.common.csv_utils import *
  4 | from rorapi.v2.record_constants import *
  5 | from rorapi.common.serializers import ErrorsSerializer
  6 | from rorapi.common.create_update import new_record_from_json
  7 | 
  8 | 
  9 | def new_record_from_csv(csv_data, version):
 10 |     v2_data = copy.deepcopy(V2_TEMPLATE)
 11 |     errors = []
 12 |     #domains
 13 |     if csv_data['domains']:
 14 |         v2_data['domains'] = [d.strip() for d in csv_data['domains'].strip(';').split(';')]
 15 | 
 16 |     #established
 17 |     if csv_data['established']:
 18 |         v2_data['established'] = int(csv_data['established'].strip())
 19 | 
 20 |     #external ids
 21 |     for k,v in V2_EXTERNAL_ID_TYPES.items():
 22 |         if csv_data['external_ids.type.' + v + '.all']:
 23 |             all_ids = [i.strip() for i in csv_data['external_ids.type.' + v + '.all'].strip(';').split(';')]
 24 |             ext_id_obj = {
 25 |                 "type": v,
 26 |                 "all": all_ids,
 27 |                 "preferred": csv_data['external_ids.type.' + v + '.preferred'].strip() if csv_data['external_ids.type.' + v + '.preferred'] else all_ids[0]
 28 |             }
 29 |             v2_data['external_ids'].append(ext_id_obj)
 30 | 
 31 |     #links
 32 |     for k,v in V2_LINK_TYPES.items():
 33 |         if csv_data['links.type.' + v]:
 34 |             for l in csv_data['links.type.' + v].strip(';').split(';'):
 35 |                 link_obj = {
 36 |                     "type": v,
 37 |                     "value": l.strip()
 38 |                 }
 39 |                 v2_data['links'].append(link_obj)
 40 | 
 41 |     #locations
 42 |     if csv_data['locations.geonames_id']:
 43 |         geonames_ids = [i.strip() for i in csv_data['locations.geonames_id'].strip(';').split(';')]
 44 |         for geonames_id in geonames_ids:
 45 |             location_obj = {
 46 |                 "geonames_id": geonames_id,
 47 |                 "geonames_details": {}
 48 |             }
 49 |             v2_data['locations'].append(location_obj)
 50 | 
 51 |     #names
 52 |     temp_names = []
 53 |     for k,v in V2_NAME_TYPES.items():
 54 |         if csv_data['names.types.' + v]:
 55 |             for n in csv_data['names.types.' + v].strip(';').split(';'):
 56 |                 if LANG_DELIMITER in n:
 57 |                     if n.count(LANG_DELIMITER) == 1:
 58 |                         name_val, lang  = n.split("*")
 59 |                         if lang:
 60 |                             lang_errors, lang_code = get_lang_code(lang.strip())
 61 |                             if lang_errors:
 62 |                                 errors.append("Could not convert language value to ISO code: {}".format(lang))
 63 |                     else:
 64 |                         name_val = None
 65 |                         lang_code = None
 66 |                         errors.append("Could not parse name value {} in names.types.{} because it contains multiple {} lang delimiter chars.".format(n, v, LANG_DELIMITER))
 67 |                 else:
 68 |                     name_val = n
 69 |                     lang_code = None
 70 |                 if name_val:
 71 |                     name_obj = {
 72 |                         "types": [v],
 73 |                         "value": name_val.strip(),
 74 |                         "lang": lang_code
 75 |                     }
 76 |                     temp_names.append(name_obj)
 77 |     print("temp names 1:")
 78 |     print(temp_names)
 79 |     name_vals = [n['value'] for n in temp_names]
 80 |     dup_names = []
 81 |     for n in name_vals:
 82 |         if name_vals.count(n) > 1:
 83 |             if n not in dup_names:
 84 |                 dup_names.append(n)
 85 |     for d in dup_names:
 86 |         dup_names_objs = [t for t in temp_names if t['value'] == d]
 87 |         lang_codes = [dno['lang'] for dno in dup_names_objs]
 88 |         for lang_code in lang_codes:
 89 |             if lang_codes.count(lang_code) > 1:
 90 |                 name_lang_dups = [dno for dno in dup_names_objs if dno['lang'] == lang_code]
 91 |                 types = []
 92 |                 for n in name_lang_dups:
 93 |                     types.extend(n['types'])
 94 |                 name_obj = {
 95 |                     "types": types,
 96 |                     "value": d,
 97 |                     "lang": lang_code
 98 |                 }
 99 |                 if name_obj not in temp_names:
100 |                     temp_names = [t for t in temp_names if t not in name_lang_dups]
101 |                     temp_names.append(name_obj)
102 |     print("temp names 2:")
103 |     print(temp_names)
104 |     v2_data['names'] = temp_names
105 | 
106 |     #status
107 |     if csv_data['status']:
108 |         v2_data['status'] = csv_data['status'].strip().lower()
109 | 
110 |     #types
111 |     if csv_data['types']:
112 |         v2_data['types'] = [t.strip().lower() for t in csv_data['types'].strip(';').split(';')]
113 | 
114 |     validation_error, new_record = new_record_from_json(v2_data, version)
115 |     if validation_error:
116 |         errors.append(validation_error)
117 |     return errors, new_record


--------------------------------------------------------------------------------
/rorapi/common/csv_utils.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import io
  3 | import re
  4 | 
  5 | UPDATE_ACTIONS = {
  6 |     "ADD": "add",
  7 |     "DELETE": "delete",
  8 |     "REPLACE": "replace"
  9 | }
 10 | 
 11 | UPDATE_ACTIONS_MULTI = [UPDATE_ACTIONS["ADD"], UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]]
 12 | 
 13 | UPDATE_ACTIONS_SINGLE = [UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]]
 14 | 
 15 | NO_DELETE_FIELDS = ["id", "locations.geonames_id", "names.types.ror_display", "status", "types"]
 16 | 
 17 | CSV_REQUIRED_FIELDS_ACTIONS = {
 18 |     "id": None,
 19 |     "domains": UPDATE_ACTIONS_MULTI,
 20 |     "established": UPDATE_ACTIONS_SINGLE,
 21 |     "external_ids.type.fundref.all": UPDATE_ACTIONS_MULTI,
 22 |     "external_ids.type.fundref.preferred": UPDATE_ACTIONS_SINGLE,
 23 |     "external_ids.type.grid.all": UPDATE_ACTIONS_MULTI,
 24 |     "external_ids.type.grid.preferred": UPDATE_ACTIONS_SINGLE,
 25 |     "external_ids.type.isni.all": UPDATE_ACTIONS_MULTI,
 26 |     "external_ids.type.isni.preferred": UPDATE_ACTIONS_SINGLE,
 27 |     "external_ids.type.wikidata.all": UPDATE_ACTIONS_MULTI,
 28 |     "external_ids.type.wikidata.preferred": UPDATE_ACTIONS_SINGLE,
 29 |     "links.type.website": UPDATE_ACTIONS_MULTI,
 30 |     "links.type.wikipedia": UPDATE_ACTIONS_MULTI,
 31 |     "locations.geonames_id": UPDATE_ACTIONS_MULTI,
 32 |     "names.types.acronym": UPDATE_ACTIONS_MULTI,
 33 |     "names.types.alias": UPDATE_ACTIONS_MULTI,
 34 |     "names.types.label": UPDATE_ACTIONS_MULTI,
 35 |     "names.types.ror_display": [UPDATE_ACTIONS["REPLACE"]],
 36 |     "status": [UPDATE_ACTIONS["REPLACE"]],
 37 |     "types": UPDATE_ACTIONS_MULTI
 38 | }
 39 | 
 40 | LANG_DELIMITER = "*"
 41 | 
 42 | UPDATE_DELIMITER = "=="
 43 | 
 44 | 
 45 | def get_actions_values(csv_field):
 46 |     print("getting actions values:")
 47 |     actions_values = {}
 48 |     if csv_field.lower() == UPDATE_ACTIONS["DELETE"]:
 49 |         actions_values[UPDATE_ACTIONS["DELETE"]] = None
 50 |     elif UPDATE_DELIMITER in csv_field:
 51 |         for ua in list(UPDATE_ACTIONS.values()):
 52 |             print(ua)
 53 |             if ua + UPDATE_DELIMITER in csv_field:
 54 |                 print("doing regex:")
 55 |                 regex = r"(" + re.escape(
 56 |       ua + UPDATE_DELIMITER) + r")(.*?)(?=$|(add|delete|replace)==)"
 57 |                 result = re.search(regex, csv_field)
 58 |                 print(result[0])
 59 |                 temp_val = result[0].replace(ua + UPDATE_DELIMITER, '')
 60 |                 print("temp val:")
 61 |                 print(temp_val)
 62 |                 actions_values[ua] = [v.strip() for v in temp_val.split(';') if v]
 63 | 
 64 |     else:
 65 |         actions_values[UPDATE_ACTIONS["REPLACE"]] = [v.strip() for v in csv_field.split(';') if v]
 66 |     print(actions_values)
 67 |     return actions_values
 68 | 
 69 | def validate_csv(csv_file):
 70 |     errors = []
 71 |     try:
 72 |         read_file = csv_file.read().decode('utf-8')
 73 |         reader = csv.DictReader(io.StringIO(read_file))
 74 |         rowcount = 0
 75 |         for row in reader:
 76 |             rowcount += 1
 77 |         if rowcount > 0:
 78 |             csv_fields = reader.fieldnames
 79 |             missing_fields = []
 80 |             for field in CSV_REQUIRED_FIELDS_ACTIONS.keys():
 81 |                 if field not in csv_fields:
 82 |                     missing_fields.append(field)
 83 |             print(missing_fields)
 84 |             if missing_fields:
 85 |                 errors.append(f'CSV file is missing columns: {", ".join(missing_fields)}')
 86 |         else:
 87 |             errors.append("CSV file contains no data rows")
 88 |     except IOError as e:
 89 |         errors.append(f"Error parsing CSV file: {e}")
 90 |     print(errors)
 91 |     return errors
 92 | 
 93 | def validate_csv_row_update_syntax(csv_data):
 94 |     print("validating row")
 95 |     errors = []
 96 |     for k, v in csv_data.items():
 97 |         if UPDATE_DELIMITER in v:
 98 |             print("field:")
 99 |             print(k)
100 |             print("value:")
101 |             print(v)
102 |             actions_values = get_actions_values(v)
103 |             print("actions values:")
104 |             print(actions_values)
105 |             update_actions = list(actions_values.keys())
106 |             if not update_actions:
107 |                 errors.append("Update delimiter '{}' found in '{}' field but no valid update action found in value {}".format(UPDATE_DELIMITER, k, v))
108 |             if len(update_actions) > 2:
109 |                 errors.append("{} update actions '{}' found in '{}' field but only 2 are allowed".format(str(len(update_actions)), ", ".join(update_actions), k))
110 |             if len(update_actions) == 2:
111 |                 if not (UPDATE_ACTIONS['ADD'] and UPDATE_ACTIONS['DELETE']) in update_actions:
112 |                     errors.append("Invalid combination of update actions '{}' found in '{}' field.".format(", ".join(update_actions), k))
113 |             disallowed_actions = [ua for ua in update_actions if ua not in CSV_REQUIRED_FIELDS_ACTIONS[k]]
114 |             print("allowed actions:")
115 |             print(CSV_REQUIRED_FIELDS_ACTIONS[k])
116 |             print("disallowed actions:")
117 |             print(disallowed_actions)
118 |             if disallowed_actions:
119 |                 errors.append("Invalid update action(s) '{}' found in {} field. Allowed actions for this field are '{}'".format(", ".join(disallowed_actions), k, ", ".join(CSV_REQUIRED_FIELDS_ACTIONS[k])))
120 |         if v.strip() == UPDATE_ACTIONS['DELETE'].lower() and k in NO_DELETE_FIELDS:
121 |              errors.append("Invalid update action '{}' in {} field. Cannot remove all values from a required field.".format(UPDATE_ACTIONS['DELETE'], k))
122 |     return errors


--------------------------------------------------------------------------------
/rorapi/common/es_utils.py:
--------------------------------------------------------------------------------
 1 | from rorapi.settings import ES7, ES_VARS
 2 | 
 3 | from elasticsearch_dsl import Search, Q
 4 | 
 5 | 
 6 | class ESQueryBuilder:
 7 |     """Elasticsearch query builder class"""
 8 | 
 9 |     def __init__(self, version):
10 |         if version == "v2":
11 |             self.search = Search(using=ES7, index=ES_VARS["INDEX_V2"])
12 |         else:
13 |             self.search = Search(using=ES7, index=ES_VARS["INDEX_V1"])
14 |         self.search = self.search.extra(track_total_hits=True)
15 |         self.search = self.search.params(search_type="dfs_query_then_fetch")
16 | 
17 |     def add_id_query(self, id):
18 |         self.search = self.search.query("match", id={"query": id, "operator": "and"})
19 | 
20 |     def add_match_all_query(self):
21 |         self.search = self.search.query("match_all")
22 | 
23 |     def add_string_query(self, terms):
24 |         self.search = self.search.query(
25 |             "nested",
26 |             path="names_ids",
27 |             score_mode="max",
28 |             query=Q("query_string", query=terms, fuzzy_max_expansions=1),
29 |         )
30 | 
31 |     def add_string_query_advanced(self, terms):
32 |         self.search = self.search.query(
33 |             "bool",
34 |             must=Q(
35 |                 "query_string",
36 |                 query=terms,
37 |                 default_field="*",
38 |                 default_operator="and",
39 |                 fuzzy_max_expansions=1,
40 |             ),
41 |         )
42 | 
43 |     def add_phrase_query(self, fields, terms):
44 |         self.search.query = Q(
45 |             "dis_max", queries=[Q("match_phrase", **{f: terms}) for f in fields]
46 |         )
47 | 
48 |     def add_common_query(self, fields, terms):
49 |         self.search.query = Q(
50 |             "dis_max",
51 |             queries=[
52 |                 Q("common", **{f: {"query": terms, "cutoff_frequency": 0.001}})
53 |                 for f in fields
54 |             ],
55 |         )
56 | 
57 |     def add_match_query(self, terms):
58 |         self.search = self.search.query("match", acronyms=terms)
59 | 
60 |     def add_fuzzy_query(self, fields, terms):
61 |         self.search.query = Q(
62 |             "dis_max",
63 |             queries=[
64 |                 Q("match", **{f: {"query": terms, "fuzziness": "AUTO"}}) for f in fields
65 |             ],
66 |         )
67 | 
68 |     def add_filters(self, filters):
69 |         for f, v in filters.items():
70 |             self.search = self.search.filter("terms", **{f: v})
71 | 
72 |     def add_aggregations(self, names):
73 |         for name in names:
74 |             self.search.aggs.bucket(
75 |                 name[0], "terms", field=name[1], size=10, min_doc_count=1
76 |             )
77 | 
78 |     def paginate(self, page):
79 |         self.search = self.search[
80 |             ((page - 1) * ES_VARS["BATCH_SIZE"]) : (page * ES_VARS["BATCH_SIZE"])
81 |         ]
82 | 
83 |     def get_query(self):
84 |         return self.search
85 |  
86 |     def add_sort(self, field, order="asc"):
87 |         self.search = self.search.sort({field: {"order": order}})


--------------------------------------------------------------------------------
/rorapi/common/features.py:
--------------------------------------------------------------------------------
1 | import ldclient
2 | from ldclient.config import Config
3 | from rorapi.settings import LAUNCH_DARKLY_KEY
4 | 
5 | ldclient.set_config(Config(LAUNCH_DARKLY_KEY))
6 | launch_darkly_client = ldclient.get()


--------------------------------------------------------------------------------
/rorapi/common/models.py:
--------------------------------------------------------------------------------
 1 | from geonamescache.mappers import country
 2 | 
 3 | 
 4 | class Entity:
 5 |     """Generic model class"""
 6 | 
 7 |     def __init__(self, base_object, attributes):
 8 |         [setattr(self, a, getattr(base_object, a)) for a in attributes]
 9 | 
10 | 
11 | class TypeBucket:
12 |     """A model class for type aggregation bucket"""
13 | 
14 |     def __init__(self, data):
15 |         self.id = data.key.lower()
16 |         self.title = data.key
17 |         self.count = data.doc_count
18 | 
19 | 
20 | class CountryBucket:
21 |     """A model class for country aggregation bucket"""
22 | 
23 |     def __init__(self, data):
24 |         self.id = data.key.lower()
25 |         mapper = country(from_key="iso", to_key="name")
26 |         try:
27 |             self.title = mapper(data.key)
28 |         except AttributeError:
29 |             # if we have a country code with no name mapping, skip it to prevent 500
30 |             pass
31 |         self.count = data.doc_count
32 | 
33 | 
34 | class StatusBucket:
35 |     """A model class for status aggregation bucket"""
36 | 
37 |     def __init__(self, data):
38 |         self.id = data.key.lower()
39 |         self.title = data.key
40 |         self.count = data.doc_count
41 | 
42 | 
43 | class Errors:
44 |     """Errors model class"""
45 | 
46 |     def __init__(self, errors):
47 |         self.errors = errors
48 | 


--------------------------------------------------------------------------------
/rorapi/common/record_utils.py:
--------------------------------------------------------------------------------
 1 | import jsonschema
 2 | import requests
 3 | from iso639 import Lang
 4 | 
 5 | 
 6 | def get_lang_code(lang_string):
 7 |     lang_code = None
 8 |     error = None
 9 |     if len(lang_string) == 2:
10 |         lang_string = lang_string.lower()
11 |     else:
12 |         lang_string = lang_string.title()
13 |     try:
14 |         lg = Lang(lang_string)
15 |         lang_code = lg.pt1
16 |     except Exception as e:
17 |         error = e.msg
18 |     return error, lang_code
19 | 
20 | def get_file_from_url(url):
21 |     rsp = requests.get(url)
22 |     rsp.raise_for_status()
23 |     return rsp.json()
24 | 
25 | def validate_record(data, schema):
26 |     try:
27 |         print("validating data:")
28 |         print(data)
29 |         jsonschema.validate(data, schema)
30 |     except jsonschema.ValidationError as e:
31 |         return "Validation error: " + e.message, None
32 |     else:
33 |         return None, data
34 | 
35 | 


--------------------------------------------------------------------------------
/rorapi/common/serializers.py:
--------------------------------------------------------------------------------
 1 | from rest_framework import serializers
 2 | 
 3 | 
 4 | class OrganizationRelationshipsSerializer(serializers.Serializer):
 5 |     label = serializers.CharField()
 6 |     type = serializers.CharField()
 7 |     id = serializers.CharField()
 8 | 
 9 | 
10 | class BucketSerializer(serializers.Serializer):
11 |     id = serializers.CharField()
12 |     title = serializers.CharField()
13 |     count = serializers.IntegerField()
14 | 
15 | 
16 | class ErrorsSerializer(serializers.Serializer):
17 |     errors = serializers.StringRelatedField(many=True)
18 | 


--------------------------------------------------------------------------------
/rorapi/common/urls.py:
--------------------------------------------------------------------------------
 1 | from django.conf.urls import url, include
 2 | from django.urls import path, re_path
 3 | from rest_framework.documentation import include_docs_urls
 4 | from  . import views
 5 | from rorapi.common.views import (
 6 |     HeartbeatView,GenerateAddress,GenerateId,IndexData,IndexDataDump,BulkUpdate,ClientRegistrationView,ValidateClientView)
 7 | 
 8 | urlpatterns = [
 9 |     # Health check
10 |     url(r"^(?P<version>(v1|v2))\/heartbeat$", HeartbeatView.as_view()),
11 |     url(r"^heartbeat$", HeartbeatView.as_view()),
12 |     # Using REST API
13 |     url(r"^(?P<version>(v1|v2))\/generateaddress\/(?P<geonamesid>[0-9]+)", GenerateAddress.as_view()),
14 |     path('generateaddress/<str:geonamesid>', GenerateAddress.as_view()),
15 |     url(r"^generateid$", GenerateId.as_view()),
16 |     re_path(r"^(?P<version>(v1|v2))\/bulkupdate$", BulkUpdate.as_view()),
17 |     re_path(r"^(?P<version>(v1|v2))\/register$", ClientRegistrationView.as_view()),
18 |     path('validate-client-id/<str:client_id>/', ValidateClientView.as_view()),
19 |     url(r"^(?P<version>(v1|v2))\/indexdata/(?P<branch>.*)", IndexData.as_view()),
20 |     url(r"^(?P<version>(v1|v2))\/indexdatadump\/(?P<filename>v(\d+\.)?(\d+\.)?(\*|\d+)-\d{4}-\d{2}-\d{2}-ror-data)\/(?P<dataenv>(test|prod))$", IndexDataDump.as_view()),
21 |     url(r"^(?P<version>(v1|v2))\/", include(views.organizations_router.urls)),
22 |     url(r"^", include(views.organizations_router.urls)),
23 |     url(r"^docs/", include_docs_urls(title="Research Organization Registry")),
24 |     # Prometheus
25 |     url("", include("django_prometheus.urls")),
26 | 
27 | ]
28 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/management/commands/__init__.py


--------------------------------------------------------------------------------
/rorapi/management/commands/createindex.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from rorapi.settings import ES7, ES_VARS
 3 | 
 4 | from django.core.management.base import BaseCommand
 5 | 
 6 | 
 7 | def create_index(self, index, template_file):
 8 |     with open(template_file, 'r') as it:
 9 |         template = json.load(it)
10 |     ES7.indices.put_template(index, template)
11 |     self.stdout.write('Updated index template for {}'.format(index))
12 |     ES7.indices.create(index=index)
13 |     self.stdout.write('Created index {}'.format(index))
14 | 
15 | class Command(BaseCommand):
16 |     help = 'Create ROR API index'
17 | 
18 |     def handle(self, *args, **options):
19 |         if(options['schema']==1 or options['schema'] is None):
20 |             print("creating v1 index")
21 |             create_index(self, ES_VARS['INDEX_V1'], ES_VARS['INDEX_TEMPLATE_ES7_V1'])
22 |         if(options['schema']==2 or options['schema'] is None):
23 |             print("creating v2 index")
24 |             create_index(self, ES_VARS['INDEX_V2'], ES_VARS['INDEX_TEMPLATE_ES7_V2'])


--------------------------------------------------------------------------------
/rorapi/management/commands/deleteindex.py:
--------------------------------------------------------------------------------
 1 | from rorapi.settings import ES7, ES_VARS
 2 | 
 3 | from django.core.management.base import BaseCommand
 4 | 
 5 | def delete_index(self, index):
 6 |     if ES7.indices.exists(index):
 7 |         ES7.indices.delete(index=index)
 8 |         self.stdout.write('Deleted index {}'.format(index))
 9 |     else:
10 |         self.stdout.write('Index {} does not exist'.format(index))
11 | 
12 | class Command(BaseCommand):
13 |     help = 'Deletes ROR API index'
14 | 
15 |     def handle(self, *args, **options):
16 |         if(options['schema']==1 or options['schema'] is None):
17 |             print("deleting v1 index")
18 |             delete_index(self, ES_VARS['INDEX_V1'])
19 |         if(options['schema']==2 or options['schema'] is None):
20 |             print("deleting v2 index")
21 |             delete_index(self, ES_VARS['INDEX_V2'])
22 | 
23 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/generaterorid.py:
--------------------------------------------------------------------------------
 1 | import base32_crockford
 2 | import random
 3 | from rorapi.common.queries import retrieve_organization, get_ror_id
 4 | from rorapi.settings import ROR_API
 5 | 
 6 | def generate_ror_id():
 7 |     """Generates random ROR ID.
 8 | 
 9 |     The checksum calculation is copied from
10 |     https://github.com/datacite/base32-url/blob/master/lib/base32/url.rb
11 |     to maintain the compatibility with previously generated ROR IDs.
12 |     """
13 | 
14 |     n = random.randint(0, 200000000)
15 |     n_encoded = base32_crockford.encode(n).lower().zfill(6)
16 |     checksum = str(98 - ((n * 100) % 97)).zfill(2)
17 |     return '{}0{}{}'.format(ROR_API['ID_PREFIX'], n_encoded, checksum)
18 | 
19 | 
20 | def check_ror_id(version):
21 |     """Checks if generated ror id exists in the index. If so, it generates a new id, otherwise it returns the generated ror id
22 |     """
23 |     ror_id = get_ror_id(generate_ror_id())
24 |     errors, organization = retrieve_organization(ror_id, version)
25 |     if errors is None:
26 |         return check_ror_id(version)
27 |     return ror_id
28 | 
29 | 
30 | def generate_ror_client_id():
31 |     """Generates a random ROR client ID.
32 |     """
33 | 
34 |     n = random.randint(0, 2**160 - 1)
35 |     return base32_crockford.encode(n).lower().zfill(32)
36 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/getrordump.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import re
 4 | import requests
 5 | import zipfile
 6 | import base64
 7 | from io import BytesIO
 8 | from rorapi.settings import ES7, ES_VARS, ROR_DUMP, DATA
 9 | from django.core.management.base import BaseCommand
10 | 
11 | HEADERS = {'Accept': 'application/vnd.github.v3+json'}
12 | AUTH_HEADERS = {'Authorization': 'token {}'.format(ROR_DUMP['GITHUB_TOKEN']), 'Accept': 'application/vnd.github.v3+json'}
13 | 
14 | def get_ror_dump_sha(filename, use_test_data, github_headers):
15 |     sha = ''
16 |     if use_test_data:
17 |         contents_url = ROR_DUMP['TEST_REPO_URL'] + '/contents'
18 |     else:
19 |         contents_url = ROR_DUMP['PROD_REPO_URL'] + '/contents'
20 |     try:
21 |         response = requests.get(contents_url, headers=github_headers)
22 |     except requests.exceptions.RequestException as e:
23 |         raise SystemExit(f"{contents_url}: is Not reachable \nErr: {e}")
24 |     try:
25 |         repo_contents = response.json()
26 |         for file in repo_contents:
27 |             if filename in file['name']:
28 |                 sha = file['sha']
29 |         return sha
30 |     except:
31 |         return None
32 | 
33 | def get_ror_dump_zip(self, filename, use_test_data, github_headers):
34 |     sha = get_ror_dump_sha(filename, use_test_data, github_headers)
35 |     if sha:
36 |         if use_test_data:
37 |             blob_url = ROR_DUMP['TEST_REPO_URL'] + '/git/blobs/' + sha
38 |         else:
39 |             blob_url = ROR_DUMP['PROD_REPO_URL'] + '/git/blobs/' + sha
40 |         try:
41 |             response = requests.get(blob_url, headers=github_headers)
42 |         except requests.exceptions.RequestException as e:
43 |             raise SystemExit(f"Github blob is Not reachable \nErr: {e}")
44 |         try:
45 |             response_json = response.json()
46 |             file_decoded = base64.b64decode(response_json['content'])
47 |             with open(filename + '.zip', 'wb') as zip_file:
48 |                 zip_file.write(file_decoded)
49 |             with zipfile.ZipFile(zip_file.name, 'r') as ror_zip:
50 |                 filenames = ror_zip.namelist()
51 |                 dir_names = [f for f in filenames if ('json' not in f and 'csv' not in f)]
52 |                 if dir_names:
53 |                     raise SystemExit(f"Dump zip has extra directory and cannot be indexed")
54 |             return zip_file.name
55 |         except:
56 |             raise SystemExit(f"Something went wrong saving zip file")
57 | 
58 | class Command(BaseCommand):
59 |     help = 'Downloads a specified ROR data dump from Github'
60 | 
61 |     def handle(self, *args, **options):
62 |         filename = options['filename']
63 |         use_test_data = options['testdata']
64 |         self.stdout.write('Getting ROR dump')
65 |         if ROR_DUMP['GITHUB_TOKEN']:
66 |             github_headers = AUTH_HEADERS
67 |         else:
68 |             github_headers = HEADERS
69 |         ror_dump_zip = get_ror_dump_zip(self, filename, use_test_data, github_headers)
70 | 
71 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/indexror.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | from functools import wraps
  4 | from threading import local
  5 | import zipfile
  6 | import os
  7 | import glob
  8 | from os.path import exists
  9 | import pathlib
 10 | import shutil
 11 | from rorapi.settings import ES7, ES_VARS, DATA
 12 | 
 13 | from django.core.management.base import BaseCommand
 14 | from elasticsearch import TransportError
 15 | 
 16 | def get_nested_names_v1(org):
 17 |     yield org['name']
 18 |     for label in org['labels']:
 19 |         yield label['label']
 20 |     for alias in org['aliases']:
 21 |         yield alias
 22 |     for acronym in org['acronyms']:
 23 |         yield acronym
 24 | 
 25 | def get_nested_names_v2(org):
 26 |     for name in org['names']:
 27 |         yield name['value']
 28 | 
 29 | def get_nested_ids_v1(org):
 30 |     yield org['id']
 31 |     yield re.sub('https://', '', org['id'])
 32 |     yield re.sub('https://ror.org/', '', org['id'])
 33 |     for ext_name, ext_id in org['external_ids'].items():
 34 |         if ext_name == 'GRID':
 35 |             yield ext_id['all']
 36 |         else:
 37 |             for eid in ext_id['all']:
 38 |                 yield eid
 39 | 
 40 | def get_nested_ids_v2(org):
 41 |     yield org['id']
 42 |     yield re.sub('https://', '', org['id'])
 43 |     yield re.sub('https://ror.org/', '', org['id'])
 44 |     for ext_id in org['external_ids']:
 45 |         for eid in ext_id['all']:
 46 |             yield eid
 47 | 
 48 | def prepare_files(path, local_file):
 49 |     data = []
 50 |     err = {}
 51 |     try:
 52 |         if exists(local_file):
 53 |             with zipfile.ZipFile(local_file, 'r') as zip_ref:
 54 |                 zip_ref.extractall(path)
 55 |     except Exception as e:
 56 |         err[prepare_files.__name__] = f"ERROR: {e}"
 57 | 
 58 |     json_files = os.path.join(path, "*.json")
 59 |     file_list = glob.glob(json_files)
 60 |     for file in file_list:
 61 |         try:
 62 |             with open(file) as f:
 63 |                 data.append(json.load(f))
 64 |         except Exception as e:
 65 |             key = f"In {prepare_files.__name__}_{file}"
 66 |             err[key] =  f"ERROR: {e}"
 67 |     return data, err
 68 | 
 69 | 
 70 | def get_rc_data(dir, contents):
 71 |     err = {}
 72 |     path = f"{dir}/files.zip"
 73 |     branch_objects = [i for i in contents if path == i['Key']]
 74 |     local_file = None
 75 |     local_path = None
 76 |     if branch_objects:
 77 |         s3_file = branch_objects[0]['Key']
 78 |         local_path = os.path.join(DATA['DIR'], dir)
 79 |         os.makedirs(local_path)
 80 |         local_file = local_path + "/files.zip"
 81 |         try:
 82 |             DATA['CLIENT'].download_file(DATA['DATA_STORE'],s3_file, local_file)
 83 |         except Exception as e:
 84 |             key = f"In {get_rc_data.__name__}_downloading files"
 85 |             err[key] = f"ERROR: {e}"
 86 |     else:
 87 |        err[get_rc_data.__name__] = f"ERROR: {dir} not found in S3 bucket"
 88 |     return local_path, local_file, err
 89 | 
 90 | def get_data():
 91 |     err = {}
 92 |     # return contents or None
 93 |     contents = None
 94 |     try:
 95 |         objects = DATA['CLIENT'].list_objects_v2(Bucket = DATA['DATA_STORE'])
 96 |         contents = objects['Contents']
 97 |     except Exception as e:
 98 |         err[get_data.__name__] = f"ERROR: Could not get objects from {DATA['DATA_STORE']}: {e}"
 99 |     return contents, err
100 | 
101 | 
102 | def process_files(dir, version):
103 |     err = []
104 |     if dir:
105 |         path = os.path.join(DATA['WORKING_DIR'], dir)
106 |         if os.path.isdir(path):
107 |             p = pathlib.Path(path)
108 |             shutil.rmtree(p)
109 |         objects, e = get_data()
110 |         err.append(e)
111 |         if objects and not(e):
112 |             # check if objects exist, otherwise error
113 |             path, file, e = get_rc_data(dir, objects)
114 |             err.append(e)
115 |             if path and file and not(e):
116 |                 data, e = prepare_files(path, file)
117 |                 if not(e):
118 |                     index_error = index(data, version)
119 |                     err.append(index_error)
120 |                 else:
121 |                     err.append(e)
122 |         else:
123 |             err.append({process_files.__name__: f"No objects found in {dir}"})
124 |     else:
125 |         err.append({process_files.__name__: "Need S3 directory argument"})
126 |     err = [i for i in err if i]
127 |     if err:
128 |         msg = {"status": "ERROR", "msg": err}
129 |     else:
130 |         msg = {"status": "OK", "msg": f"{dir} indexed using version {version}"}
131 | 
132 |     return msg
133 | 
134 | 
135 | def index(dataset, version):
136 |     err = {}
137 |     if version == 'v2':
138 |         index = ES_VARS['INDEX_V2']
139 |     else:
140 |         index = ES_VARS['INDEX_V1']
141 |     backup_index = '{}-tmp'.format(index)
142 |     ES7.reindex(body={
143 |         'source': {
144 |             'index': index
145 |         },
146 |         'dest': {
147 |             'index': backup_index
148 |         }
149 |     })
150 | 
151 |     try:
152 |         for i in range(0, len(dataset), ES_VARS['BULK_SIZE']):
153 |             body = []
154 |             for org in dataset[i:i + ES_VARS['BULK_SIZE']]:
155 |                 body.append({
156 |                     'index': {
157 |                         '_index': index,
158 |                         '_id': org['id']
159 |                     }
160 |                 })
161 |                 if 'v2' in index:
162 |                     org['names_ids'] = [{
163 |                         'name': n
164 |                     } for n in get_nested_names_v2(org)]
165 |                     org['names_ids'] += [{
166 |                         'id': n
167 |                     } for n in get_nested_ids_v2(org)]
168 |                 else:
169 |                     org['names_ids'] = [{
170 |                         'name': n
171 |                     } for n in get_nested_names_v1(org)]
172 |                     org['names_ids'] += [{
173 |                         'id': n
174 |                     } for n in get_nested_ids_v1(org)]
175 |                 body.append(org)
176 |             ES7.bulk(body)
177 |     except TransportError:
178 |         err[index.__name__] = f"Indexing error, reverted index back to previous state"
179 |         ES7.reindex(body={
180 |             'source': {
181 |                 'index': backup_index
182 |             },
183 |             'dest': {
184 |                 'index': index
185 |             }
186 |         })
187 |     if ES7.indices.exists(backup_index):
188 |         ES7.indices.delete(backup_index)
189 |     return err
190 | 
191 | class Command(BaseCommand):
192 |     help = 'Indexes ROR dataset'
193 | 
194 |     def add_arguments(self, parser):
195 |         parser.add_argument('dir', type=str, help='add directory name for S3 bucket to be processed')
196 |         parser.add_argument('version', type=str, help='schema version of files to be processed')
197 | 
198 |     def handle(self,*args, **options):
199 |         dir = options['dir']
200 |         version = options['version']
201 |         process_files(dir, version)
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/indexrordump.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import requests
  5 | import zipfile
  6 | import base64
  7 | from io import BytesIO
  8 | from rorapi.settings import ES7, ES_VARS, ROR_DUMP, DATA
  9 | 
 10 | from django.core.management.base import BaseCommand
 11 | from elasticsearch import TransportError
 12 | 
 13 | HEADERS = {'Accept': 'application/vnd.github.v3+json'}
 14 | 
 15 | def get_nested_names_v1(org):
 16 |     yield org['name']
 17 |     for label in org['labels']:
 18 |         yield label['label']
 19 |     for alias in org['aliases']:
 20 |         yield alias
 21 |     for acronym in org['acronyms']:
 22 |         yield acronym
 23 | 
 24 | def get_nested_names_v2(org):
 25 |     for name in org['names']:
 26 |         yield name['value']
 27 | 
 28 | def get_nested_ids_v1(org):
 29 |     yield org['id']
 30 |     yield re.sub('https://', '', org['id'])
 31 |     yield re.sub('https://ror.org/', '', org['id'])
 32 |     for ext_name, ext_id in org['external_ids'].items():
 33 |         if ext_name == 'GRID':
 34 |             yield ext_id['all']
 35 |         else:
 36 |             for eid in ext_id['all']:
 37 |                 yield eid
 38 | 
 39 | def get_nested_ids_v2(org):
 40 |     yield org['id']
 41 |     yield re.sub('https://', '', org['id'])
 42 |     yield re.sub('https://ror.org/', '', org['id'])
 43 |     for ext_id in org['external_ids']:
 44 |         for eid in ext_id['all']:
 45 |             yield eid
 46 | 
 47 | def index_dump(self, filename, index, dataset):
 48 |     backup_index = '{}-tmp'.format(index)
 49 |     ES7.reindex(body={
 50 |         'source': {
 51 |             'index': index
 52 |         },
 53 |         'dest': {
 54 |             'index': backup_index
 55 |         }
 56 |     })
 57 | 
 58 |     try:
 59 |         for i in range(0, len(dataset), ES_VARS['BULK_SIZE']):
 60 |             body = []
 61 |             for org in dataset[i:i + ES_VARS['BULK_SIZE']]:
 62 |                 body.append({
 63 |                     'index': {
 64 |                         '_index': index,
 65 |                         '_id': org['id']
 66 |                     }
 67 |                 })
 68 |                 if 'v2' in index:
 69 |                     org['names_ids'] = [{
 70 |                         'name': n
 71 |                     } for n in get_nested_names_v2(org)]
 72 |                     org['names_ids'] += [{
 73 |                         'id': n
 74 |                     } for n in get_nested_ids_v2(org)]
 75 |                 else:
 76 |                     org['names_ids'] = [{
 77 |                         'name': n
 78 |                     } for n in get_nested_names_v1(org)]
 79 |                     org['names_ids'] += [{
 80 |                         'id': n
 81 |                     } for n in get_nested_ids_v1(org)]
 82 |                 body.append(org)
 83 |             ES7.bulk(body)
 84 |     except TransportError:
 85 |         self.stdout.write(TransportError)
 86 |         self.stdout.write('Reverting to backup index')
 87 |         ES7.reindex(body={
 88 |             'source': {
 89 |                 'index': backup_index
 90 |             },
 91 |             'dest': {
 92 |                 'index': index
 93 |             }
 94 |         })
 95 |     if ES7.indices.exists(backup_index):
 96 |         ES7.indices.delete(backup_index)
 97 |     self.stdout.write('ROR dataset ' + filename + ' indexed')
 98 | 
 99 | 
100 | class Command(BaseCommand):
101 |     help = 'Indexes ROR dataset from a full dump file in ror-data repo'
102 | 
103 |     def handle(self, *args, **options):
104 |         json_files = []
105 |         filename = options['filename']
106 |         ror_dump_zip = filename + '.zip'
107 |         if os.path.exists(ror_dump_zip):
108 |             if not os.path.exists(DATA['WORKING_DIR']):
109 |                 os.makedirs(DATA['WORKING_DIR'])
110 |             self.stdout.write('Extracting ROR dump')
111 |             with zipfile.ZipFile(ror_dump_zip, 'r') as zip_ref:
112 |                 zip_ref.extractall(DATA['WORKING_DIR'] + filename)
113 |             unzipped_files = os.listdir(DATA['WORKING_DIR'] + filename)
114 |             for file in unzipped_files:
115 |                 if file.endswith(".json"):
116 |                     json_files.append(file)
117 |             if json_files:
118 |                 for json_file in json_files:
119 |                     index = None
120 |                     json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file
121 |                     if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None):
122 |                         self.stdout.write('Loading JSON')
123 |                         with open(json_path, 'r') as it:
124 |                             dataset = json.load(it)
125 |                         self.stdout.write('Indexing ROR dataset ' + json_file)
126 |                         index = ES_VARS['INDEX_V2']
127 |                         index_dump(self, json_file, index, dataset)
128 |                     if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None):
129 |                         self.stdout.write('Loading JSON')
130 |                         with open(json_path, 'r') as it:
131 |                             dataset = json.load(it)
132 |                         self.stdout.write('Indexing ROR dataset ' + json_file)
133 |                         index = ES_VARS['INDEX_V1']
134 |                         index_dump(self, json_file, index, dataset)
135 |             else:
136 |                 self.stdout.write("ROR data dump does not contain any JSON files")
137 | 
138 |         else:
139 |             self.stdout.write("ROR data dump zip file does not exist")
140 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/legacyconvertgrid.py:
--------------------------------------------------------------------------------
  1 | import base32_crockford
  2 | import json
  3 | import os.path
  4 | import random
  5 | import zipfile
  6 | import re
  7 | from rorapi.settings import ES, ES_VARS, ROR_API, GRID, ROR_DUMP
  8 | 
  9 | from django.core.management.base import BaseCommand
 10 | 
 11 | # Previously used to convert latest GRID dataset configured in settings.py
 12 | # to ROR and assign ROR IDs to each GRID org
 13 | # As of Mar 2022 ROR is no longer based on GRID
 14 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3
 15 | # Individual record files in S3 are indexed with indexror.py
 16 | # Entire dataset zip files in https://github.com/ror-community/ror-data
 17 | # can be indexed with setup.py, which uses indexrordump.py
 18 | 
 19 | def generate_ror_id():
 20 |     """Generates random ROR ID.
 21 | 
 22 |     The checksum calculation is copied from
 23 |     https://github.com/datacite/base32-url/blob/master/lib/base32/url.rb
 24 |     to maintain the compatibility with previously generated ROR IDs.
 25 |     """
 26 | 
 27 |     n = random.randint(0, 200000000)
 28 |     n_encoded = base32_crockford.encode(n).lower().zfill(6)
 29 |     checksum = str(98 - ((n * 100) % 97)).zfill(2)
 30 |     return '{}0{}{}'.format(ROR_API['ID_PREFIX'], n_encoded, checksum)
 31 | 
 32 | 
 33 | def get_ror_id(grid_id, es):
 34 |     """Maps GRID ID to ROR ID.
 35 | 
 36 |     If given GRID ID was indexed previously, corresponding ROR ID is obtained
 37 |     from the index. Otherwise, new ROR ID is generated.
 38 |     """
 39 | 
 40 |     s = ES.search(ES_VARS['INDEX'],
 41 |                   body={'query': {
 42 |                       'term': {
 43 |                           'external_ids.GRID.all': grid_id
 44 |                       }
 45 |                   }})
 46 |     if s['hits']['total'] == 1:
 47 |         return s['hits']['hits'][0]['_id']
 48 |     return generate_ror_id()
 49 | 
 50 | 
 51 | def geonames_city(geonames_city):
 52 |     geonames = ["geonames_admin1", "geonames_admin2"]
 53 |     geonames_attributes = ["id", "name", "ascii_name", "code"]
 54 |     nuts = ["nuts_level1", "nuts_level2", "nuts_level3"]
 55 |     nuts_attributes = ["code", "name"]
 56 |     geonames_city_hsh = {}
 57 |     for k, v in geonames_city.items():
 58 |         if (k in geonames):
 59 |             if isinstance(v, dict):
 60 |                 geonames_city_hsh[k] = {
 61 |                     i: v.get(i, None)
 62 |                     for i in geonames_attributes
 63 |                 }
 64 |             elif v is None:
 65 |                 geonames_city_hsh[k] = {i: None for i in geonames_attributes}
 66 |         elif (k in nuts):
 67 |             if isinstance(v, dict):
 68 |                 geonames_city_hsh[k] = {
 69 |                     i: v.get(i, None)
 70 |                     for i in nuts_attributes
 71 |                 }
 72 |             elif v is None:
 73 |                 geonames_city_hsh[k] = {i: None for i in nuts_attributes}
 74 |         else:
 75 |             geonames_city_hsh[k] = v
 76 |     return geonames_city_hsh
 77 | 
 78 | 
 79 | def addresses(location):
 80 |     line = ""
 81 |     address = ["line_1", "line_2", "line_3"]
 82 |     combine_lines = address + ["country", "country_code"]
 83 |     geonames_admin = ["id", "code", "name", "ascii_name"]
 84 |     nuts = ["code", "name"]
 85 |     new_addresses = []
 86 |     hsh = {}
 87 |     hsh["line"] = None
 88 |     for h in location:
 89 |         for k, v in h.items():
 90 |             if not (k in combine_lines) and (k != "geonames_city"):
 91 |                 v = v if v != "" else None
 92 |                 hsh[k] = v
 93 |             elif k == "geonames_city":
 94 |                 if isinstance(v, dict):
 95 |                     hsh[k] = geonames_city(v)
 96 |                 elif v is None:
 97 |                     hsh[k] = {}
 98 |             elif (k in combine_lines):
 99 |                 n = []
100 |                 for i in address:
101 |                     if not (h[i] is None):
102 |                         n.append(h[i])
103 |                 line = " ".join(n)
104 |                 line = re.sub(' +', ' ', line)
105 |                 if (len(line) == 1 and line == " "):
106 |                     line = line.strip()
107 |                     line = line if len(line) > 0 else None
108 |                 hsh["line"] = line
109 |         new_addresses.append(hsh)
110 |     return new_addresses
111 | 
112 | 
113 | def convert_organization(grid_org, es):
114 |     """Converts the organization metadata from GRID schema to ROR schema."""
115 |     return {
116 |         'id':
117 |         get_ror_id(grid_org['id'], ES),
118 |         'name':
119 |         grid_org['name'],
120 |         'types':
121 |         grid_org['types'],
122 |         'links':
123 |         grid_org['links'],
124 |         'aliases':
125 |         grid_org['aliases'],
126 |         'acronyms':
127 |         grid_org['acronyms'],
128 |         'status':
129 |         grid_org['status'],
130 |         'wikipedia_url':
131 |         grid_org['wikipedia_url'],
132 |         'labels':
133 |         grid_org['labels'],
134 |         'email_address':
135 |         grid_org['email_address'],
136 |         'ip_addresses':
137 |         grid_org['ip_addresses'],
138 |         'established':
139 |         grid_org['established'],
140 |         'country': {
141 |             'country_code': grid_org['addresses'][0]['country_code'],
142 |             'country_name': grid_org['addresses'][0]['country']
143 |         },
144 |         'relationships':
145 |         grid_org["relationships"],
146 |         'addresses':
147 |         addresses(grid_org["addresses"]),
148 |         'external_ids':
149 |         getExternalIds(
150 |             dict(grid_org.get('external_ids', {}),
151 |                  GRID={
152 |                      'preferred': grid_org['id'],
153 |                      'all': grid_org['id']
154 |                  }))
155 |     }
156 | 
157 | 
158 | def getExternalIds(external_ids):
159 |     if 'ROR' in external_ids: del external_ids['ROR']
160 |     return external_ids
161 | 
162 | 
163 | def get_ids(data):
164 |     ids = {}
165 |     for d in data:
166 |         ids[d['external_ids']['GRID']['all']] = d['id']
167 |     return ids
168 | 
169 | 
170 | def get_grid(record, ids):
171 |     if record['relationships']:
172 |         for r in record['relationships']:
173 |             r['id'] = ids[r['id']]
174 | 
175 |     return record
176 | 
177 | 
178 | class Command(BaseCommand):
179 |     help = 'Converts GRID dataset to ROR schema'
180 | 
181 |     def handle(self, *args, **options):
182 |         os.makedirs(ROR_DUMP['DIR'], exist_ok=True)
183 |         # make sure we are not overwriting an existing ROR JSON file
184 |         # with new ROR identifiers
185 |         if zipfile.is_zipfile(ROR_DUMP['ROR_ZIP_PATH']):
186 |             self.stdout.write('ROR dataset already exists')
187 |             return
188 | 
189 |         if not os.path.isfile(ROR_DUMP['ROR_JSON_PATH']):
190 |             with open(GRID['GRID_JSON_PATH'], 'r') as it:
191 |                 grid_data = json.load(it)
192 | 
193 |             self.stdout.write('Converting GRID dataset to ROR schema')
194 |             intermediate_ror_data = [
195 |                 convert_organization(org, ES)
196 |                 for org in grid_data['institutes'] if org['status'] == 'active'
197 |             ]
198 |             ids = get_ids(intermediate_ror_data)
199 |             ror_data = [get_grid(rec, ids) for rec in intermediate_ror_data]
200 |             with open(ROR_DUMP['ROR_JSON_PATH'], 'w') as outfile:
201 |                 json.dump(ror_data, outfile, indent=4)
202 |             self.stdout.write('ROR dataset created')
203 | 
204 |         # generate zip archive
205 |         with zipfile.ZipFile(ROR_DUMP['ROR_ZIP_PATH'], 'w') as zipArchive:
206 |             zipArchive.write(ROR_DUMP['ROR_JSON_PATH'],
207 |                              arcname='ror.json',
208 |                              compress_type=zipfile.ZIP_DEFLATED)
209 |             self.stdout.write('ROR dataset ZIP archive created')
210 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/legacydownloadgrid.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import zipfile
 4 | 
 5 | from django.core.management.base import BaseCommand
 6 | from rorapi.settings import GRID
 7 | 
 8 | # Previously used to download latest GRID dataset configured in settings.py
 9 | # which was used to generate a new ROR datasets
10 | # As of Mar 2022 ROR is no longer based on GRID
11 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3
12 | # Individual record files in S3 are indexed with indexror.py
13 | # Entire dataset zip files in https://github.com/ror-community/ror-data
14 | # can be indexed with setup.py, which uses indexrordump.py
15 | 
16 | class Command(BaseCommand):
17 |     help = 'Downloads GRID dataset'
18 | 
19 |     def handle(self, *args, **options):
20 |         os.makedirs(GRID['DIR'], exist_ok=True)
21 | 
22 |         # make sure we are not overwriting an existing ROR JSON file
23 |         # with new ROR identifiers
24 |         if zipfile.is_zipfile(GRID['GRID_ZIP_PATH']):
25 |             self.stdout.write('Already downloaded GRID version {}'.format(
26 |                 GRID['VERSION']))
27 |             return
28 | 
29 |         self.stdout.write('Downloading GRID version {}'.format(
30 |             GRID['VERSION']))
31 |         r = requests.get(GRID['URL'])
32 |         with open(GRID['GRID_ZIP_PATH'], 'wb') as f:
33 |             f.write(r.content)
34 | 
35 |         with zipfile.ZipFile(GRID['GRID_ZIP_PATH'], 'r') as zip_ref:
36 |             zip_ref.extractall(GRID['DIR'])
37 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/legacyindexgrid.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | import zipfile
 4 | from rorapi.settings import ES, ES_VARS, LEGACY_ROR_DUMP
 5 | 
 6 | from django.core.management.base import BaseCommand
 7 | from elasticsearch import TransportError
 8 | 
 9 | 
10 | def get_nested_names(org):
11 |     yield org['name']
12 |     for label in org['labels']:
13 |         yield label['label']
14 |     for alias in org['aliases']:
15 |         yield alias
16 |     for acronym in org['acronyms']:
17 |         yield acronym
18 | 
19 | 
20 | def get_nested_ids(org):
21 |     yield org['id']
22 |     yield re.sub('https://', '', org['id'])
23 |     yield re.sub('https://ror.org/', '', org['id'])
24 |     for ext_name, ext_id in org['external_ids'].items():
25 |         if ext_name == 'GRID':
26 |             yield ext_id['all']
27 |         else:
28 |             for eid in ext_id['all']:
29 |                 yield eid
30 | 
31 | 
32 | class Command(BaseCommand):
33 |     help = 'Indexes ROR dataset'
34 | 
35 |     def handle(self, *args, **options):
36 |         with zipfile.ZipFile(LEGACY_ROR_DUMP['ROR_ZIP_PATH'], 'r') as zip_ref:
37 |             zip_ref.extractall(LEGACY_ROR_DUMP['DIR'])
38 | 
39 |         with open(LEGACY_ROR_DUMP['ROR_JSON_PATH'], 'r') as it:
40 |             dataset = json.load(it)
41 | 
42 |         self.stdout.write('Indexing ROR dataset')
43 | 
44 |         index = ES_VARS['INDEX']
45 |         backup_index = '{}-tmp'.format(index)
46 |         ES.reindex(body={
47 |             'source': {
48 |                 'index': index
49 |             },
50 |             'dest': {
51 |                 'index': backup_index
52 |             }
53 |         })
54 | 
55 |         try:
56 |             for i in range(0, len(dataset), ES_VARS['BULK_SIZE']):
57 |                 body = []
58 |                 for org in dataset[i:i + ES_VARS['BULK_SIZE']]:
59 |                     body.append({
60 |                         'index': {
61 |                             '_index': index,
62 |                             '_type': 'org',
63 |                             '_id': org['id']
64 |                         }
65 |                     })
66 |                     org['names_ids'] = [{
67 |                         'name': n
68 |                     } for n in get_nested_names(org)]
69 |                     org['names_ids'] += [{
70 |                         'id': n
71 |                     } for n in get_nested_ids(org)]
72 |                     body.append(org)
73 |                 ES.bulk(body)
74 |         except TransportError:
75 |             self.stdout.write(TransportError)
76 |             ES.reindex(body={
77 |                 'source': {
78 |                     'index': backup_index
79 |                 },
80 |                 'dest': {
81 |                     'index': index
82 |                 }
83 |             })
84 | 
85 |         if ES.indices.exists(backup_index):
86 |             ES.indices.delete(backup_index)
87 |         self.stdout.write('ROR dataset ' + LEGACY_ROR_DUMP['VERSION'] + ' indexed')
88 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/legacyseeschema.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from rorapi.settings import ES, ES_VARS
 3 | 
 4 | from django.core.management.base import BaseCommand
 5 | 
 6 | 
 7 | class Command(BaseCommand):
 8 |     help = 'Create ROR API index'
 9 | 
10 |     def handle(self, *args, **options):
11 |         index = ES_VARS['INDEX']
12 |         if ES.indices.exists(index):
13 |             raw_data = ES.indices.get_mapping( index )
14 |             schema = raw_data[ index ]["mappings"]["org"]
15 |             print (json.dumps(schema, indent=4))
16 |         else:
17 |             with open(ES_VARS['INDEX_TEMPLATE'], 'r') as it:
18 |                 template = json.load(it)
19 |             ES.indices.create(index=index, body=template)
20 |             self.stdout.write('Created index {}'.format(index))
21 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/legacyupgrade.py:
--------------------------------------------------------------------------------
 1 | from django.core.management.base import BaseCommand
 2 | from .downloadgrid import Command as DownloadGridCommand
 3 | from .convertgrid import Command as ConvertGridCommand
 4 | 
 5 | # Previously used to generate ROR dataset
 6 | # based on the latest GRID dataset configured in settings.py
 7 | # As of Mar 2022 ROR is no longer based on GRID
 8 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3
 9 | # Individual record files in S3 are indexed with indexror.py
10 | # Entire dataset zip files in https://github.com/ror-community/ror-data
11 | # can be indexed with setup.py, which uses indexrordump.py
12 | 
13 | class Command(BaseCommand):
14 |     help = 'Generate up-to-date ror.zip from GRID data'
15 | 
16 |     def handle(self, *args, **options):
17 |         DownloadGridCommand().handle(args, options)
18 |         ConvertGridCommand().handle(args, options)
19 | 


--------------------------------------------------------------------------------
/rorapi/management/commands/setup.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import zipfile
 3 | import base64
 4 | from django.core.management.base import BaseCommand
 5 | from rorapi.management.commands.deleteindex import Command as DeleteIndexCommand
 6 | from rorapi.management.commands.createindex import Command as CreateIndexCommand
 7 | from rorapi.management.commands.indexrordump import Command as IndexRorDumpCommand
 8 | from rorapi.management.commands.getrordump import Command as GetRorDumpCommand
 9 | from rorapi.settings import ROR_DUMP
10 | 
11 | HEADERS = {'Accept': 'application/vnd.github.v3+json'}
12 | 
13 | HEADERS = {'Authorization': 'token {}'.format(ROR_DUMP['GITHUB_TOKEN']), 'Accept': 'application/vnd.github.v3+json'}
14 | 
15 | def get_ror_dump_sha(filename, use_test_data):
16 |     sha = ''
17 |     if use_test_data:
18 |         contents_url = ROR_DUMP['TEST_REPO_URL'] + '/contents'
19 |     else:
20 |         contents_url = ROR_DUMP['PROD_REPO_URL'] + '/contents'
21 |     try:
22 |         response = requests.get(contents_url, headers=HEADERS)
23 |     except requests.exceptions.RequestException as e:
24 |         raise SystemExit(f"{contents_url}: is Not reachable \nErr: {e}")
25 |     try:
26 |         repo_contents = response.json()
27 |         for file in repo_contents:
28 |             if filename in file['name']:
29 |                 sha = file['sha']
30 |         return sha
31 |     except:
32 |         return None
33 | 
34 | class Command(BaseCommand):
35 |     help = 'Setup ROR API'
36 | 
37 |     def add_arguments(self, parser):
38 |         parser.add_argument('filename', type=str, help='Name of data dump zip file to index without extension')
39 |         parser.add_argument('-s', '--schema', type=int, choices=[1, 2], help='Schema version to index if only indexing 1 version. Only set if not indexing both versions.')
40 |         parser.add_argument('-t', '--testdata', action='store_true', help='Set flag to pull data dump from ror-data-test instead of ror-data')
41 | 
42 |     def handle(self, *args, **options):
43 |         msg = None
44 |         # make sure ROR dump file exists
45 |         filename = options['filename']
46 |         use_test_data = options['testdata']
47 |         if use_test_data:
48 |             print("Using ror-data-test repo")
49 |         else:
50 |             print("Using ror-data repo")
51 | 
52 |         sha = get_ror_dump_sha(filename, use_test_data)
53 | 
54 |         if sha:
55 |             try:
56 |                 GetRorDumpCommand().handle(*args, **options)
57 |                 DeleteIndexCommand().handle(*args, **options)
58 |                 CreateIndexCommand().handle(*args, **options)
59 |                 IndexRorDumpCommand().handle(*args, **options)
60 |                 msg = 'SUCCESS: ROR dataset {} indexed in version {}. Using test repo: {}'.format(filename, str(options['schema']), str(use_test_data))
61 |             except:
62 |                 msg = 'ERROR: Could not index ROR data dump. Check API logs for details.'
63 |         else:
64 |             msg = 'ERROR: ROR dataset for file {} not found. '.format(filename) \
65 |                 +'Please generate the data dump first.'
66 |             self.stdout.write(msg)
67 | 
68 |         return msg
69 | 
70 | 


--------------------------------------------------------------------------------
/rorapi/migrations/0001_create_client_model.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.28 on 2025-03-11 07:13
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     initial = True
 9 | 
10 |     dependencies = [
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.CreateModel(
15 |             name='Client',
16 |             fields=[
17 |                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
18 |                 ('email', models.EmailField(max_length=255)),
19 |                 ('name', models.CharField(blank=True, max_length=255)),
20 |                 ('institution_name', models.CharField(blank=True, max_length=255)),
21 |                 ('institution_ror', models.URLField(blank=True, max_length=255)),
22 |                 ('country_code', models.CharField(blank=True, max_length=2)),
23 |                 ('ror_use', models.TextField(blank=True, max_length=500)),
24 |                 ('client_id', models.CharField(editable=False, max_length=32, unique=True)),
25 |                 ('created_at', models.DateTimeField(auto_now_add=True)),
26 |                 ('last_request_at', models.DateTimeField(blank=True, null=True)),
27 |                 ('request_count', models.IntegerField(default=0)),
28 |             ],
29 |         ),
30 |     ]
31 | 


--------------------------------------------------------------------------------
/rorapi/migrations/0002_auto_20250326_1054.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.28 on 2025-03-26 10:54
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('rorapi', '0001_create_client_model'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='client',
15 |             name='email',
16 |             field=models.EmailField(max_length=255, unique=True),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/rorapi/migrations/0003_auto_20250415_1207.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.28 on 2025-04-15 12:07
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('rorapi', '0002_auto_20250326_1054'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='client',
15 |             name='country_code',
16 |             field=models.CharField(blank=True, max_length=2, null=True),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name='client',
20 |             name='email',
21 |             field=models.EmailField(max_length=255),
22 |         ),
23 |         migrations.AlterField(
24 |             model_name='client',
25 |             name='institution_name',
26 |             field=models.CharField(blank=True, max_length=255, null=True),
27 |         ),
28 |         migrations.AlterField(
29 |             model_name='client',
30 |             name='institution_ror',
31 |             field=models.URLField(blank=True, max_length=255, null=True),
32 |         ),
33 |         migrations.AlterField(
34 |             model_name='client',
35 |             name='name',
36 |             field=models.CharField(blank=True, max_length=255, null=True),
37 |         ),
38 |         migrations.AlterField(
39 |             model_name='client',
40 |             name='ror_use',
41 |             field=models.TextField(blank=True, max_length=500, null=True),
42 |         ),
43 |     ]
44 | 


--------------------------------------------------------------------------------
/rorapi/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/migrations/__init__.py


--------------------------------------------------------------------------------
/rorapi/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/tests/__init__.py


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/tests/tests_functional/__init__.py


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/evaluation.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | 
 4 | 
 5 | def escape_query(query):
 6 |     return re.sub(r'([\+\-=\&\|><!\(\)\{\}\[\]\^"\~\*\?:\\\/])',
 7 |                   lambda m: '\\' + m.group(), query)
 8 | 
 9 | 
10 | def search(url, param, query, version, escape=True):
11 |     if escape:
12 |         query = escape_query(query)
13 |     results = requests.get('{}/{}/organizations'.format(url, version), {
14 |         param: query
15 |     }).json()
16 |     if 'items' not in results:
17 |         return []
18 |     return results['items']


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/tests_matching_v1.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import re
 4 | import requests
 5 | 
 6 | from django.test import SimpleTestCase
 7 | from statsmodels.stats.api import proportion_confint
 8 | 
 9 | ACCURACY_MIN = 0.885741
10 | PRECISION_MIN = 0.915426
11 | RECALL_MIN = 0.920048
12 | 
13 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost')
14 | API_VERSION = 'v1'
15 | 
16 | 
17 | class AffiliationMatchingTestCase(SimpleTestCase):
18 |     def match(self, affiliation):
19 |         affiliation = re.sub(r'([\+\-=\&\|><!\(\)\{\}\[\]\^"\~\*\?:\\\/])',
20 |                              lambda m: '\\' + m.group(), affiliation)
21 |         results = requests.get('{}/{}/organizations'.format(API_URL, API_VERSION), {
22 |             'affiliation': affiliation
23 |         }).json()
24 |         return [
25 |             item.get('organization').get('id') for item in results.get('items')
26 |             if item.get('chosen')
27 |         ]
28 | 
29 |     def setUp(self):
30 |         with open(
31 |                 os.path.join(os.path.dirname(__file__),
32 |                              'data/dataset_affiliations.json')) as affs_file:
33 |             self.dataset = json.load(affs_file)
34 |         self.results = []
35 |         for i, d in enumerate(self.dataset):
36 |             self.results.append(self.match(d['affiliation']))
37 |             if i % 100 == 0:
38 |                 print('Progress: {0:.2f}%'.format(100 * i / len(self.dataset)))
39 |         with open('resresultsults.json', 'w') as f:
40 |             json.dump([[a, s]
41 |                        for a, s in zip(self.dataset, self.results)], f, indent=2)
42 | 
43 |     def test_matching(self):
44 |         correct = len([
45 |             d for d, r in zip(self.dataset, self.results)
46 |             if set(d.get('ror_ids')) == set(r)
47 |         ])
48 |         total = len(self.results)
49 |         accuracy = correct / total
50 | 
51 |         print('Accuracy: {} {}'.format(accuracy,
52 |                                        proportion_confint(correct, total)))
53 |         self.assertTrue(accuracy >= ACCURACY_MIN)
54 | 
55 |         correct = sum([
56 |             len(set(r).intersection(set(d.get('ror_ids'))))
57 |             for d, r in zip(self.dataset, self.results)
58 |         ])
59 |         total = sum([len(r) for r in self.results])
60 |         precision = correct / total
61 |         print('Precision: {} {}'.format(precision,
62 |                                         proportion_confint(correct, total)))
63 |         self.assertTrue(precision >= PRECISION_MIN)
64 | 
65 |         correct = sum([
66 |             len(set(r).intersection(set(d.get('ror_ids'))))
67 |             for d, r in zip(self.dataset, self.results)
68 |         ])
69 |         total = sum([len(d.get('ror_ids')) for d in self.dataset])
70 |         recall = correct / total
71 |         print('Recall: {} {}'.format(recall,
72 |                                      proportion_confint(correct, total)))
73 |         self.assertTrue(recall >= RECALL_MIN)
74 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/tests_matching_v2.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import re
 4 | import requests
 5 | 
 6 | from django.test import SimpleTestCase
 7 | from statsmodels.stats.api import proportion_confint
 8 | 
 9 | ACCURACY_MIN = 0.885741
10 | PRECISION_MIN = 0.915426
11 | RECALL_MIN = 0.920048
12 | 
13 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost')
14 | 
15 | 
16 | class AffiliationMatchingTestCase(SimpleTestCase):
17 |     def match(self, affiliation):
18 |         affiliation = re.sub(r'([\+\-=\&\|><!\(\)\{\}\[\]\^"\~\*\?:\\\/])',
19 |                              lambda m: '\\' + m.group(), affiliation)
20 |         results = requests.get('{}/v2/organizations'.format(API_URL), {
21 |             'affiliation': affiliation
22 |         }).json()
23 |         return [
24 |             item.get('organization').get('id') for item in results.get('items')
25 |             if item.get('chosen')
26 |         ]
27 | 
28 |     def setUp(self):
29 |         with open(
30 |                 os.path.join(os.path.dirname(__file__),
31 |                              'data/dataset_affiliations.json')) as affs_file:
32 |             self.dataset = json.load(affs_file)
33 |         self.results = []
34 |         for i, d in enumerate(self.dataset):
35 |             self.results.append(self.match(d['affiliation']))
36 |             if i % 100 == 0:
37 |                 print('Progress: {0:.2f}%'.format(100 * i / len(self.dataset)))
38 |         with open('resresultsults.json', 'w') as f:
39 |             json.dump([[a, s]
40 |                        for a, s in zip(self.dataset, self.results)], f, indent=2)
41 | 
42 |     def test_matching(self):
43 |         correct = len([
44 |             d for d, r in zip(self.dataset, self.results)
45 |             if set(d.get('ror_ids')) == set(r)
46 |         ])
47 |         total = len(self.results)
48 |         accuracy = correct / total
49 | 
50 |         print('Accuracy: {} {}'.format(accuracy,
51 |                                        proportion_confint(correct, total)))
52 |         self.assertTrue(accuracy >= ACCURACY_MIN)
53 | 
54 |         correct = sum([
55 |             len(set(r).intersection(set(d.get('ror_ids'))))
56 |             for d, r in zip(self.dataset, self.results)
57 |         ])
58 |         total = sum([len(r) for r in self.results])
59 |         precision = correct / total
60 |         print('Precision: {} {}'.format(precision,
61 |                                         proportion_confint(correct, total)))
62 |         self.assertTrue(precision >= PRECISION_MIN)
63 | 
64 |         correct = sum([
65 |             len(set(r).intersection(set(d.get('ror_ids'))))
66 |             for d, r in zip(self.dataset, self.results)
67 |         ])
68 |         total = sum([len(d.get('ror_ids')) for d in self.dataset])
69 |         recall = correct / total
70 |         print('Recall: {} {}'.format(recall,
71 |                                      proportion_confint(correct, total)))
72 |         self.assertTrue(recall >= RECALL_MIN)
73 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/tests_search_v1.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | 
  5 | from .evaluation import search, escape_query
  6 | from django.test import SimpleTestCase
  7 | from statsmodels.stats.api import DescrStatsW, proportion_confint
  8 | 
  9 | RANK_MAX_QUERY = 2.315534
 10 | R1_MIN_QUERY = 0.749118
 11 | R5_MIN_QUERY = 0.913082
 12 | 
 13 | RANK_MAX_QUERY_FUZZY = 2.619402
 14 | R1_MIN_QUERY_FUZZY = 0.728343
 15 | R5_MIN_QUERY_FUZZY = 0.902090
 16 | 
 17 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost')
 18 | API_VERSION = 'v1'
 19 | 
 20 | 
 21 | def get_rank(ror_id, items):
 22 |     for i, item in enumerate(items):
 23 |         if ror_id == item['id']:
 24 |             return i + 1
 25 |     return 21
 26 | 
 27 | 
 28 | def mean_rank(ranks):
 29 |     return sum(ranks) / len(ranks), DescrStatsW(ranks).tconfint_mean()
 30 | 
 31 | 
 32 | def recall_at_n(ranks, n):
 33 |     s = len([r for r in ranks if r <= n])
 34 |     a = len(ranks)
 35 |     return s / a, proportion_confint(s, a)
 36 | 
 37 | 
 38 | class SearchTestCase(SimpleTestCase):
 39 |     def set_up(self, param, rank_max, r1_min, r5_min):
 40 |         with open(
 41 |                 os.path.join(os.path.dirname(__file__),
 42 |                              'data/dataset_names.json')) as names_file:
 43 |             data = json.load(names_file)
 44 |         data_query = []
 45 |         for i, d in enumerate(data):
 46 |             data_query.append((d, search(API_URL, param, d['affiliation'], API_VERSION)))
 47 |             if i % 100 == 0:
 48 |                 print('Progress: {0:.2f}%'.format(100 * i / len(data)))
 49 |         self.ranks = [
 50 |             get_rank(case['ror-id'], items) for case, items in data_query
 51 |         ]
 52 |         self.rank_max = rank_max
 53 |         self.r1_min = r1_min
 54 |         self.r5_min = r5_min
 55 | 
 56 |     def validate(self, name):
 57 |         mean, ci = mean_rank(self.ranks)
 58 |         print('\nMean rank for {}: {} {}'.format(name, mean, ci))
 59 |         self.assertTrue(mean <= self.rank_max)
 60 | 
 61 |         recall_1, ci = recall_at_n(self.ranks, 1)
 62 |         print('Recall@1 for {}: {} {}'.format(name, recall_1, ci))
 63 |         self.assertTrue(recall_1 >= self.r1_min)
 64 | 
 65 |         recall_5, ci = recall_at_n(self.ranks, 5)
 66 |         print('Recall@5 for {}: {} {}'.format(name, recall_5, ci))
 67 |         self.assertTrue(recall_5 >= self.r5_min)
 68 | 
 69 | 
 70 | class QueryFuzzySearchTestCase(SearchTestCase):
 71 |     def setUp(self):
 72 |         self.param = 'query'
 73 |         with open(
 74 |                 os.path.join(os.path.dirname(__file__),
 75 |                              'data/dataset_names.json')) as names_file:
 76 |             data = json.load(names_file)
 77 |         data_query = []
 78 |         for i, d in enumerate(data):
 79 |             data_query.append((d,
 80 |                                search(API_URL,
 81 |                                       'query',
 82 |                                       re.sub('([^ ])(?= |$)', r'\g<1>~',
 83 |                                              escape_query(d['affiliation'])),
 84 |                                       API_VERSION,
 85 |                                       escape=False)))
 86 |             if i % 100 == 0:
 87 |                 print('Progress: {0:.2f}%'.format(100 * i / len(data)))
 88 |         self.ranks = [
 89 |             get_rank(case['ror-id'], items) for case, items in data_query
 90 |         ]
 91 |         self.rank_max = RANK_MAX_QUERY_FUZZY
 92 |         self.r1_min = R1_MIN_QUERY_FUZZY
 93 |         self.r5_min = R5_MIN_QUERY_FUZZY
 94 | 
 95 |     def test_search_query(self):
 96 |         self.validate('query (fuzzy)')
 97 | 
 98 | 
 99 | class QuerySearchTestCase(SearchTestCase):
100 |     def setUp(self):
101 |         self.set_up('query', RANK_MAX_QUERY, R1_MIN_QUERY, R5_MIN_QUERY)
102 | 
103 |     def test_search_query(self):
104 |         self.validate('query')
105 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_functional/tests_search_v2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | 
  5 | from .evaluation import search, escape_query
  6 | from django.test import SimpleTestCase
  7 | from statsmodels.stats.api import DescrStatsW, proportion_confint
  8 | 
  9 | RANK_MAX_QUERY = 2.315534
 10 | R1_MIN_QUERY = 0.749118
 11 | R5_MIN_QUERY = 0.913082
 12 | 
 13 | RANK_MAX_QUERY_FUZZY = 2.619402
 14 | R1_MIN_QUERY_FUZZY = 0.728343
 15 | R5_MIN_QUERY_FUZZY = 0.902090
 16 | 
 17 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost')
 18 | API_VERSION = 'v2'
 19 | 
 20 | 
 21 | def get_rank(ror_id, items):
 22 |     for i, item in enumerate(items):
 23 |         if ror_id == item['id']:
 24 |             return i + 1
 25 |     return 21
 26 | 
 27 | 
 28 | def mean_rank(ranks):
 29 |     return sum(ranks) / len(ranks), DescrStatsW(ranks).tconfint_mean()
 30 | 
 31 | 
 32 | def recall_at_n(ranks, n):
 33 |     s = len([r for r in ranks if r <= n])
 34 |     a = len(ranks)
 35 |     return s / a, proportion_confint(s, a)
 36 | 
 37 | 
 38 | class SearchTestCase(SimpleTestCase):
 39 |     def set_up(self, param, rank_max, r1_min, r5_min):
 40 |         with open(
 41 |                 os.path.join(os.path.dirname(__file__),
 42 |                              'data/dataset_names.json')) as names_file:
 43 |             data = json.load(names_file)
 44 |         data_query = []
 45 |         for i, d in enumerate(data):
 46 |             data_query.append((d, search(API_URL, param, d['affiliation'], API_VERSION)))
 47 |             if i % 100 == 0:
 48 |                 print('Progress: {0:.2f}%'.format(100 * i / len(data)))
 49 |         self.ranks = [
 50 |             get_rank(case['ror-id'], items) for case, items in data_query
 51 |         ]
 52 |         self.rank_max = rank_max
 53 |         self.r1_min = r1_min
 54 |         self.r5_min = r5_min
 55 | 
 56 |     def validate(self, name):
 57 |         mean, ci = mean_rank(self.ranks)
 58 |         print('\nMean rank for {}: {} {}'.format(name, mean, ci))
 59 |         self.assertTrue(mean <= self.rank_max)
 60 | 
 61 |         recall_1, ci = recall_at_n(self.ranks, 1)
 62 |         print('Recall@1 for {}: {} {}'.format(name, recall_1, ci))
 63 |         self.assertTrue(recall_1 >= self.r1_min)
 64 | 
 65 |         recall_5, ci = recall_at_n(self.ranks, 5)
 66 |         print('Recall@5 for {}: {} {}'.format(name, recall_5, ci))
 67 |         self.assertTrue(recall_5 >= self.r5_min)
 68 | 
 69 | 
 70 | class QueryFuzzySearchTestCase(SearchTestCase):
 71 |     def setUp(self):
 72 |         self.param = 'query'
 73 |         with open(
 74 |                 os.path.join(os.path.dirname(__file__),
 75 |                              'data/dataset_names.json')) as names_file:
 76 |             data = json.load(names_file)
 77 |         data_query = []
 78 |         for i, d in enumerate(data):
 79 |             data_query.append((d,
 80 |                                search(API_URL,
 81 |                                       'query',
 82 |                                       re.sub('([^ ])(?= |$)', r'\g<1>~',
 83 |                                             escape_query(d['affiliation'])),
 84 |                                       API_VERSION,
 85 |                                       escape=False)))
 86 |             if i % 100 == 0:
 87 |                 print('Progress: {0:.2f}%'.format(100 * i / len(data)))
 88 |         self.ranks = [
 89 |             get_rank(case['ror-id'], items) for case, items in data_query
 90 |         ]
 91 |         self.rank_max = RANK_MAX_QUERY_FUZZY
 92 |         self.r1_min = R1_MIN_QUERY_FUZZY
 93 |         self.r5_min = R5_MIN_QUERY_FUZZY
 94 | 
 95 |     def test_search_query(self):
 96 |         self.validate('query (fuzzy)')
 97 | 
 98 | 
 99 | class QuerySearchTestCase(SearchTestCase):
100 |     def setUp(self):
101 |         self.set_up('query', RANK_MAX_QUERY, R1_MIN_QUERY, R5_MIN_QUERY)
102 | 
103 |     def test_search_query(self):
104 |         self.validate('query')
105 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/tests/tests_integration/__init__.py


--------------------------------------------------------------------------------
/rorapi/tests/tests_integration/tests_matching_v1.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import requests
 4 | 
 5 | from django.test import SimpleTestCase
 6 | 
 7 | BASE_URL = '{}/v1/organizations'.format(
 8 |     os.environ.get('ROR_BASE_URL', 'http://localhost'))
 9 | 
10 | 
11 | class APIMatchingTestCase(SimpleTestCase):
12 |     def test_query_organizations(self):
13 |         output = requests.get(BASE_URL, {
14 |             'affiliation': 'university of warsaw'
15 |         }).json()
16 | 
17 |         self.assertTrue(output['number_of_results'] > 1)
18 | 
19 |         for k in ['number_of_results', 'items']:
20 |             self.assertTrue(k in output)
21 | 
22 |         prev = 1
23 |         for i in output['items']:
24 |             for k in [
25 |                     'substring', 'score', 'matching_type', 'chosen',
26 |                     'organization'
27 |             ]:
28 |                 self.assertTrue(k in i)
29 | 
30 |             for k in ['id', 'name']:
31 |                 self.assertTrue(k in i.get('organization'))
32 |                 self.assertIsNotNone(
33 |                     re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}',
34 |                              i.get('organization').get('id')))
35 | 
36 |             self.assertEqual(i.get('substring'), 'university of warsaw')
37 |             self.assertTrue(i.get('score') > 0)
38 |             self.assertTrue(i.get('score') <= 1)
39 |             self.assertTrue(i.get('score') <= prev)
40 |             prev = i.get('score')
41 |             self.assertTrue(
42 |                 i.get('matching_type') in
43 |                 ['PHRASE', 'ACRONYM', 'FUZZY', 'HEURISTICS', 'COMMON TERMS', 'EXACT'])
44 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_integration/tests_matching_v2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import requests
 4 | 
 5 | from django.test import SimpleTestCase
 6 | 
 7 | BASE_URL = '{}/v2/organizations'.format(
 8 |     os.environ.get('ROR_BASE_URL', 'http://localhost'))
 9 | 
10 | 
11 | class APIMatchingTestCase(SimpleTestCase):
12 |     def test_query_organizations(self):
13 |         output = requests.get(BASE_URL, {
14 |             'affiliation': 'university of warsaw'
15 |         }).json()
16 | 
17 |         self.assertTrue(output['number_of_results'] > 1)
18 | 
19 |         for k in ['number_of_results', 'items']:
20 |             self.assertTrue(k in output)
21 | 
22 |         prev = 1
23 |         for i in output['items']:
24 |             for k in [
25 |                     'substring', 'score', 'matching_type', 'chosen',
26 |                     'organization'
27 |             ]:
28 |                 self.assertTrue(k in i)
29 | 
30 |             for k in ['id', 'names']:
31 |                 self.assertTrue(k in i.get('organization'))
32 |                 self.assertIsNotNone(
33 |                     re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}',
34 |                              i.get('organization').get('id')))
35 | 
36 |             self.assertEqual(i.get('substring'), 'university of warsaw')
37 |             self.assertTrue(i.get('score') > 0)
38 |             self.assertTrue(i.get('score') <= 1)
39 |             self.assertTrue(i.get('score') <= prev)
40 |             prev = i.get('score')
41 |             self.assertTrue(
42 |                 i.get('matching_type') in
43 |                 ['PHRASE', 'ACRONYM', 'FUZZY', 'HEURISTICS', 'COMMON TERMS', 'EXACT'])
44 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_integration/tests_search_v1.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | 
  4 | from django.test import SimpleTestCase
  5 | 
  6 | BASE_URL = '{}/v1/organizations'.format(
  7 |     os.environ.get('ROR_BASE_URL', 'http://localhost'))
  8 | 
  9 | 
 10 | class QueryTestCase(SimpleTestCase):
 11 |     def test_exact(self):
 12 |         items = requests.get(BASE_URL, {
 13 |             'query': 'Centro Universitário do Maranhão'
 14 |         }).json()
 15 |         self.assertTrue(items['number_of_results'] > 0)
 16 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 17 | 
 18 |         items = requests.get(BASE_URL, {
 19 |             'query': 'Julius-Maximilians-Universität Würzburg'
 20 |         }).json()
 21 |         self.assertTrue(items['number_of_results'] > 0)
 22 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 23 | 
 24 |     def test_lowercase(self):
 25 |         items = requests.get(BASE_URL, {
 26 |             'query': 'centro universitário do maranhão'
 27 |         }).json()
 28 |         self.assertTrue(items['number_of_results'] > 0)
 29 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 30 | 
 31 |         items = requests.get(BASE_URL, {
 32 |             'query': 'julius-maximilians-universität würzburg'
 33 |         }).json()
 34 |         self.assertTrue(items['number_of_results'] > 0)
 35 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 36 | 
 37 |     def test_accents_stripped(self):
 38 |         items = requests.get(BASE_URL, {
 39 |             'query': 'centro universitario do maranhao'
 40 |         }).json()
 41 |         self.assertTrue(items['number_of_results'] > 0)
 42 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 43 | 
 44 |         items = requests.get(BASE_URL, {
 45 |             'query': 'julius-maximilians-universitat wurzburg'
 46 |         }).json()
 47 |         self.assertTrue(items['number_of_results'] > 0)
 48 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 49 | 
 50 |     def test_extra_word(self):
 51 |         items = requests.get(BASE_URL, {
 52 |             'query': 'Centro Universitário do Maranhão School'
 53 |         }).json()
 54 |         self.assertTrue(items['number_of_results'] > 0)
 55 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 56 | 
 57 |         items = requests.get(
 58 |             BASE_URL, {
 59 |                 'query': 'Julius-Maximilians-Universität Würzburg School'
 60 |             }).json()
 61 |         self.assertTrue(items['number_of_results'] > 0)
 62 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 63 | 
 64 | 
 65 | class QueryFuzzyTestCase(SimpleTestCase):
 66 |     def test_exact(self):
 67 |         items = requests.get(BASE_URL, {
 68 |             'query': 'Centro~ Universitário~ do~ Maranhão~'
 69 |         }).json()
 70 |         self.assertTrue(items['number_of_results'] > 0)
 71 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 72 | 
 73 |         items = requests.get(
 74 |             BASE_URL, {
 75 |                 'query': 'Julius~ Maximilians~ Universität~ Würzburg~'
 76 |             }).json()
 77 |         self.assertTrue(items['number_of_results'] > 0)
 78 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 79 | 
 80 |     def test_lowercase(self):
 81 |         items = requests.get(BASE_URL, {
 82 |             'query': 'centro~ universitário~ do~ maranhão~'
 83 |         }).json()
 84 |         self.assertTrue(items['number_of_results'] > 0)
 85 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 86 | 
 87 |         items = requests.get(
 88 |             BASE_URL, {
 89 |                 'query': 'julius~ maximilians~ universität~ würzburg~'
 90 |             }).json()
 91 |         self.assertTrue(items['number_of_results'] > 0)
 92 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 93 | 
 94 |     def test_accents_stripped(self):
 95 |         items = requests.get(BASE_URL, {
 96 |             'query': 'centro~ universitario~ do~ maranhao~'
 97 |         }).json()
 98 |         self.assertTrue(items['number_of_results'] > 0)
 99 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
100 | 
101 |         items = requests.get(
102 |             BASE_URL, {
103 |                 'query': 'julius~ maximilians~ universitat~ wurzburg~'
104 |             }).json()
105 |         self.assertTrue(items['number_of_results'] > 0)
106 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
107 | 
108 |     def test_typos(self):
109 |         items = requests.get(BASE_URL, {
110 |             'query': 'centre~ universitario~ do~ marahao~'
111 |         }).json()
112 |         self.assertTrue(items['number_of_results'] > 0)
113 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
114 | 
115 |         items = requests.get(
116 |             BASE_URL, {
117 |                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
118 |             }).json()
119 |         self.assertTrue(items['number_of_results'] > 0)
120 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
121 | 
122 |     def test_extra_word(self):
123 |         items = requests.get(
124 |             BASE_URL, {
125 |                 'query': 'Centro~ Universitário~ do~ Maranhão~ School~'
126 |             }).json()
127 |         self.assertTrue(items['number_of_results'] > 0)
128 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
129 | 
130 |         items = requests.get(
131 |             BASE_URL, {
132 |                 'query': 'Julius~ Maximilians~ Universität~ Würzburg~ School~'
133 |             }).json()
134 |         self.assertTrue(items['number_of_results'] > 0)
135 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
136 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_integration/tests_search_v2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import requests
  3 | 
  4 | from django.test import SimpleTestCase
  5 | 
  6 | BASE_URL = '{}/v2/organizations'.format(
  7 |     os.environ.get('ROR_BASE_URL', 'http://localhost'))
  8 | 
  9 | 
 10 | class QueryTestCase(SimpleTestCase):
 11 |     def test_exact(self):
 12 |         items = requests.get(BASE_URL, {
 13 |             'query': 'Centro Universitário do Maranhão'
 14 |         }).json()
 15 |         self.assertTrue(items['number_of_results'] > 0)
 16 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 17 | 
 18 |         items = requests.get(BASE_URL, {
 19 |             'query': 'Julius-Maximilians-Universität Würzburg'
 20 |         }).json()
 21 |         self.assertTrue(items['number_of_results'] > 0)
 22 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 23 | 
 24 |     def test_lowercase(self):
 25 |         items = requests.get(BASE_URL, {
 26 |             'query': 'centro universitário do maranhão'
 27 |         }).json()
 28 |         self.assertTrue(items['number_of_results'] > 0)
 29 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 30 | 
 31 |         items = requests.get(BASE_URL, {
 32 |             'query': 'julius-maximilians-universität würzburg'
 33 |         }).json()
 34 |         self.assertTrue(items['number_of_results'] > 0)
 35 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 36 | 
 37 |     def test_accents_stripped(self):
 38 |         items = requests.get(BASE_URL, {
 39 |             'query': 'centro universitario do maranhao'
 40 |         }).json()
 41 |         self.assertTrue(items['number_of_results'] > 0)
 42 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 43 | 
 44 |         items = requests.get(BASE_URL, {
 45 |             'query': 'julius-maximilians-universitat wurzburg'
 46 |         }).json()
 47 |         self.assertTrue(items['number_of_results'] > 0)
 48 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 49 | 
 50 |     def test_extra_word(self):
 51 |         items = requests.get(BASE_URL, {
 52 |             'query': 'Centro Universitário do Maranhão School'
 53 |         }).json()
 54 |         self.assertTrue(items['number_of_results'] > 0)
 55 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 56 | 
 57 |         items = requests.get(
 58 |             BASE_URL, {
 59 |                 'query': 'Julius-Maximilians-Universität Würzburg School'
 60 |             }).json()
 61 |         self.assertTrue(items['number_of_results'] > 0)
 62 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 63 | 
 64 | 
 65 | class QueryFuzzyTestCase(SimpleTestCase):
 66 |     def test_exact(self):
 67 |         items = requests.get(BASE_URL, {
 68 |             'query': 'Centro~ Universitário~ do~ Maranhão~'
 69 |         }).json()
 70 |         self.assertTrue(items['number_of_results'] > 0)
 71 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 72 | 
 73 |         items = requests.get(
 74 |             BASE_URL, {
 75 |                 'query': 'Julius~ Maximilians~ Universität~ Würzburg~'
 76 |             }).json()
 77 |         self.assertTrue(items['number_of_results'] > 0)
 78 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 79 | 
 80 |     def test_lowercase(self):
 81 |         items = requests.get(BASE_URL, {
 82 |             'query': 'centro~ universitário~ do~ maranhão~'
 83 |         }).json()
 84 |         self.assertTrue(items['number_of_results'] > 0)
 85 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
 86 | 
 87 |         items = requests.get(
 88 |             BASE_URL, {
 89 |                 'query': 'julius~ maximilians~ universität~ würzburg~'
 90 |             }).json()
 91 |         self.assertTrue(items['number_of_results'] > 0)
 92 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
 93 | 
 94 |     def test_accents_stripped(self):
 95 |         items = requests.get(BASE_URL, {
 96 |             'query': 'centro~ universitario~ do~ maranhao~'
 97 |         }).json()
 98 |         self.assertTrue(items['number_of_results'] > 0)
 99 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
100 | 
101 |         items = requests.get(
102 |             BASE_URL, {
103 |                 'query': 'julius~ maximilians~ universitat~ wurzburg~'
104 |             }).json()
105 |         self.assertTrue(items['number_of_results'] > 0)
106 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
107 | 
108 |     def test_typos(self):
109 |         items = requests.get(BASE_URL, {
110 |             'query': 'centre~ universitario~ do~ marahao~'
111 |         }).json()
112 |         self.assertTrue(items['number_of_results'] > 0)
113 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
114 | 
115 |         items = requests.get(
116 |             BASE_URL, {
117 |                 'query': 'julius~ maximilian~ universitat~ wuerzburg~'
118 |             }).json()
119 |         self.assertTrue(items['number_of_results'] > 0)
120 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57')
121 | 
122 |     def test_extra_word(self):
123 |         items = requests.get(
124 |             BASE_URL, {
125 |                 'query': 'Centro~ Universitário~ do~ Maranhão~ School~'
126 |             }).json()
127 |         self.assertTrue(items['number_of_results'] > 0)
128 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936')
129 | 
130 |         items = requests.get(
131 |             BASE_URL, {
132 |                 'query': 'Julius~ Maximilians~ Universität~ Würzburg~ School~'
133 |             }).json()
134 |         self.assertTrue(items['number_of_results'] > 0)
135 |         self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24')
136 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/tests/tests_unit/__init__.py


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_address.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "address": {
 3 |         "lat": 37.80437,
 4 |         "lng": -122.2708,
 5 |         "state": null,
 6 |         "state_code": null,
 7 |         "country_geonames_id": 6252001,
 8 |         "city": "Oakland",
 9 |         "geonames_city": {
10 |             "id": "5378538",
11 |             "city": "Oakland",
12 |             "geonames_admin1": {
13 |                 "name": "California",
14 |                 "ascii_name": "California",
15 |                 "id": 5332921,
16 |                 "code": "US.CA"
17 |             },
18 |             "geonames_admin2": {
19 |                 "name": "Alameda",
20 |                 "id": 5322745,
21 |                 "ascii_name": "Alameda",
22 |                 "code": "US.CA.001"
23 |             },
24 |             "nuts_level1": {
25 |                 "name": null,
26 |                 "code": null
27 |             },
28 |             "nuts_level2": {
29 |                 "name": null,
30 |                 "code": null
31 |             },
32 |             "nuts_level3": {
33 |                 "name": null,
34 |                 "code": null
35 |             }
36 |         }
37 |     },
38 |     "country": {
39 |         "country_code": "US",
40 |         "country_name": "United States"
41 |     }
42 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_address_empty.json:
--------------------------------------------------------------------------------
1 | {
2 |     "msg": "Expecting value",
3 |     "doc": "",
4 |     "pos": 0,
5 |     "lineno": 1,
6 |     "colno": 1
7 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_create_valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "locations": [
 3 |         {
 4 |             "geonames_id": 2661552,
 5 |             "geonames_details": {
 6 |                 "continent_code": "EU",
 7 |                 "contient_name": "Europe",
 8 |                 "country_code": "CH",
 9 |                 "country_name": "Switzerland",
10 |                 "country_subdivision_code": "BE",
11 |                 "country_subdivision_name": "Bern",
12 |                 "lat": 46.94809,
13 |                 "lng": 7.44744,
14 |                 "name": "Bern"
15 |             }
16 |         }
17 |     ],
18 |     "established": null,
19 |     "external_ids": [
20 |         {
21 |             "type": "grid",
22 |             "all": [
23 |                 "grid.426225.5"
24 |             ],
25 |             "preferred": "grid.426225.5"
26 |         }
27 |     ],
28 |     "id": "https://ror.org/00wz65j53",
29 |     "domains": ["wisc.edu"],
30 |     "links": [
31 |         {
32 |             "type": "website",
33 |             "value": "https://www.jdsu.com"
34 |         }
35 |     ],
36 |     "names": [
37 |         {
38 |             "value": "JDSU (Switzerland)",
39 |             "types": [
40 |                 "ror_display",
41 |                 "label"
42 |             ],
43 |             "lang": null
44 |         }
45 |     ],
46 |     "relationships": [
47 |         {
48 |             "label": "JDSU (United States)",
49 |             "type": "parent",
50 |             "id": "https://ror.org/01a5v8x09"
51 |         },
52 |         {
53 |             "label": "Viavi Solutions (United States)",
54 |             "type": "successor",
55 |             "id": "https://ror.org/059a9e323"
56 |         }
57 |     ],
58 |     "status": "inactive",
59 |     "types": [
60 |         "company"
61 |     ],
62 |     "admin": {
63 |         "created": {
64 |             "date": "2023-07-28",
65 |             "schema_version": "1.0"
66 |         },
67 |         "last_modified": {
68 |             "date": "2023-07-28",
69 |             "schema_version": "2.0"
70 |         }
71 |     }
72 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_empty_es7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "took" : 32,
 3 |   "timed_out" : false,
 4 |   "_shards" : {
 5 |     "total" : 1,
 6 |     "successful" : 1,
 7 |     "skipped" : 0,
 8 |     "failed" : 0
 9 |   },
10 |   "hits" : {
11 |     "total" : {
12 |       "value" : 0,
13 |       "relation" : "eq"
14 |     },
15 |     "max_score" : null,
16 |     "hits" : [ ]
17 |   },
18 |   "aggregations" : {
19 |     "types" : {
20 |       "doc_count_error_upper_bound" : 0,
21 |       "sum_other_doc_count" : 0,
22 |       "buckets" : [ ]
23 |     },
24 |     "statuses" : {
25 |       "doc_count_error_upper_bound" : 0,
26 |       "sum_other_doc_count" : 0,
27 |       "buckets" : [ ]
28 |     },
29 |     "countries" : {
30 |       "doc_count_error_upper_bound" : 0,
31 |       "sum_other_doc_count" : 0,
32 |       "buckets" : [ ]
33 |     }
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "locations": [
 3 |         {
 4 |             "geonames_id": 2661552,
 5 |             "geonames_details": {
 6 |                 "continent_code": "EU",
 7 |                 "contient_name": "Europe",
 8 |                 "country_code": "CH",
 9 |                 "country_name": "Switzerland",
10 |                 "country_subdivision_code": "BE",
11 |                 "country_subdivision_name": "Bern",
12 |                 "lat": 46.94809,
13 |                 "lng": 7.44744,
14 |                 "name": "Bern"
15 |             }
16 |         }
17 |     ],
18 |     "names": [
19 |         {
20 |             "value": "JDSU (Switzerland)",
21 |             "types": [
22 |                 "ror_display",
23 |                 "label"
24 |             ],
25 |             "lang": null
26 |         }
27 |     ],
28 |     "status": "active",
29 |     "types": [
30 |         "company"
31 |     ],
32 |     "foo": "bar"
33 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "locations": [
 3 |         {
 4 |             "geonames_id": 2661552,
 5 |             "geonames_details": {
 6 |                 "continent_code": "EU",
 7 |                 "contient_name": "Europe",
 8 |                 "country_code": "CH",
 9 |                 "country_name": "Switzerland",
10 |                 "country_subdivision_code": "BE",
11 |                 "country_subdivision_name": "Bern",
12 |                 "lat": 46.94809,
13 |                 "lng": 7.44744,
14 |                 "name": "Bern"
15 |             }
16 |         }
17 |     ],
18 |     "names": [
19 |         {
20 |             "value": "JDSU (Switzerland)",
21 |             "types": [
22 |                 "ror_display",
23 |                 "label"
24 |             ],
25 |             "lang": null
26 |         }
27 |     ],
28 |     "status": "active",
29 |     "types": [
30 |         "company"
31 |     ]
32 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_retrieve_es7.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "took" : 7,
  3 |   "timed_out" : false,
  4 |   "_shards" : {
  5 |     "total" : 1,
  6 |     "successful" : 1,
  7 |     "skipped" : 0,
  8 |     "failed" : 0
  9 |   },
 10 |   "hits" : {
 11 |     "total" : {
 12 |       "value" : 1,
 13 |       "relation" : "eq"
 14 |     },
 15 |     "max_score" : 11.155889,
 16 |     "hits" : [
 17 |       {
 18 |         "_index" : "organizations",
 19 |         "_type" : "_doc",
 20 |         "_id" : "https://ror.org/02atag894",
 21 |         "_score" : 11.155889,
 22 |         "_source" : {
 23 |           "id" : "https://ror.org/02atag894",
 24 |           "name" : "Office of Nuclear Physics",
 25 |           "types" : [
 26 |             "Government"
 27 |           ],
 28 |           "links" : [
 29 |             "https://science.osti.gov/np"
 30 |           ],
 31 |           "aliases" : [ ],
 32 |           "acronyms" : [
 33 |             "NP"
 34 |           ],
 35 |           "status" : "active",
 36 |           "wikipedia_url" : "",
 37 |           "labels" : [ ],
 38 |           "email_address" : null,
 39 |           "ip_addresses" : [ ],
 40 |           "established" : 1996,
 41 |           "country" : {
 42 |             "country_code" : "US",
 43 |             "country_name" : "United States"
 44 |           },
 45 |           "relationships" : [
 46 |             {
 47 |               "type" : "Parent",
 48 |               "label" : "Office of Science",
 49 |               "id" : "https://ror.org/00mmn6b08"
 50 |             }
 51 |           ],
 52 |           "addresses" : [
 53 |             {
 54 |               "line" : null,
 55 |               "lat" : 38.88758,
 56 |               "lng" : -77.025728,
 57 |               "postcode" : null,
 58 |               "primary" : false,
 59 |               "city" : "Washington D.C.",
 60 |               "state" : "District of Columbia",
 61 |               "state_code" : "US-DC",
 62 |               "country_geonames_id" : 6252001,
 63 |               "geonames_city" : {
 64 |                 "id" : 4140963,
 65 |                 "city" : "Washington, D.C.",
 66 |                 "nuts_level1" : {
 67 |                   "code" : null,
 68 |                   "name" : null
 69 |                 },
 70 |                 "nuts_level2" : {
 71 |                   "code" : null,
 72 |                   "name" : null
 73 |                 },
 74 |                 "nuts_level3" : {
 75 |                   "code" : null,
 76 |                   "name" : null
 77 |                 },
 78 |                 "geonames_admin1" : {
 79 |                   "id" : 4138106,
 80 |                   "name" : "Washington, D.C.",
 81 |                   "ascii_name" : "Washington, D.C.",
 82 |                   "code" : "US.DC"
 83 |                 },
 84 |                 "geonames_admin2" : {
 85 |                   "id" : 4140987,
 86 |                   "name" : "Washington County",
 87 |                   "ascii_name" : "Washington County",
 88 |                   "code" : "US.DC.001"
 89 |                 },
 90 |                 "license" : {
 91 |                   "attribution" : "Data from geonames.org under a CC-BY 3.0 license",
 92 |                   "license" : "http://creativecommons.org/licenses/by/3.0/"
 93 |                 }
 94 |               }
 95 |             }
 96 |           ],
 97 |           "external_ids" : {
 98 |             "ISNI" : {
 99 |               "preferred" : null,
100 |               "all" : [
101 |                 "0000 0004 5897 7463"
102 |               ]
103 |             },
104 |             "FundRef" : {
105 |               "preferred" : null,
106 |               "all" : [
107 |                 "100006209"
108 |               ]
109 |             },
110 |             "Wikidata" : {
111 |               "preferred" : null,
112 |               "all" : [
113 |                 "Q30296535"
114 |               ]
115 |             },
116 |             "GRID" : {
117 |               "preferred" : "grid.453025.5",
118 |               "all" : "grid.453025.5"
119 |             }
120 |           }
121 |         }
122 |       }
123 |     ]
124 |   }
125 | }
126 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "took": 85,
  3 |     "timed_out": false,
  4 |     "_shards": {
  5 |         "total": 1,
  6 |         "successful": 1,
  7 |         "skipped": 0,
  8 |         "failed": 0
  9 |     },
 10 |     "hits": {
 11 |         "total": {
 12 |             "value": 1,
 13 |             "relation": "eq"
 14 |         },
 15 |         "max_score": 11.164284,
 16 |         "hits": [
 17 |             {
 18 |                 "_index": "organizations-v2",
 19 |                 "_type": "_doc",
 20 |                 "_id": "https://ror.org/02atag894",
 21 |                 "_score": 11.164284,
 22 |                 "_source": {
 23 |                     "admin": {
 24 |                         "created": {
 25 |                             "date": "2023-07-28",
 26 |                             "schema_version": "1.0"
 27 |                         },
 28 |                         "last_modified": {
 29 |                             "date": "2023-07-28",
 30 |                             "schema_version": "2.0"
 31 |                         }
 32 |                     },
 33 |                     "domains": [],
 34 |                     "established": 1996,
 35 |                     "external_ids": [
 36 |                         {
 37 |                             "all": [
 38 |                                 "0000 0004 5897 7463"
 39 |                             ],
 40 |                             "preferred": null,
 41 |                             "type": "isni"
 42 |                         },
 43 |                         {
 44 |                             "all": [
 45 |                                 "100006209"
 46 |                             ],
 47 |                             "preferred": null,
 48 |                             "type": "fundref"
 49 |                         },
 50 |                         {
 51 |                             "all": [
 52 |                                 "Q30296535"
 53 |                             ],
 54 |                             "preferred": null,
 55 |                             "type": "wikidata"
 56 |                         },
 57 |                         {
 58 |                             "all": [
 59 |                                 "grid.453025.5"
 60 |                             ],
 61 |                             "preferred": "grid.453025.5",
 62 |                             "type": "grid"
 63 |                         }
 64 |                     ],
 65 |                     "id": "https://ror.org/02atag894",
 66 |                     "links": [
 67 |                         {
 68 |                             "type": "website",
 69 |                             "value": "https://science.osti.gov/np"
 70 |                         }
 71 |                     ],
 72 |                     "locations": [
 73 |                         {
 74 |                             "geonames_details": {
 75 |                                 "continent_code": "NA",
 76 |                                 "continent_name": "North America",
 77 |                                 "country_code": "US",
 78 |                                 "country_name": "United States",
 79 |                                 "country_subdivision_code": "DC",
 80 |                                 "country_subdivision_name": "District of Columbia",
 81 |                                 "lat": 38.88758,
 82 |                                 "lng": -77.025728,
 83 |                                 "name": "Washington, D.C."
 84 |                             },
 85 |                             "geonames_id": 4140963
 86 |                         }
 87 |                     ],
 88 |                     "names": [
 89 |                         {
 90 |                             "lang": null,
 91 |                             "types": [
 92 |                                 "ror_display",
 93 |                                 "label"
 94 |                             ],
 95 |                             "value": "Office of Nuclear Physics"
 96 |                         },
 97 |                         {
 98 |                             "lang": null,
 99 |                             "types": [
100 |                                 "acronym"
101 |                             ],
102 |                             "value": "NP"
103 |                         }
104 |                     ],
105 |                     "relationships": [
106 |                         {
107 |                             "id": "https://ror.org/00mmn6b08",
108 |                             "label": "Office of Science",
109 |                             "type": "parent"
110 |                         }
111 |                     ],
112 |                     "status": "active",
113 |                     "types": [
114 |                         "government"
115 |                     ]
116 |                 }
117 |             }
118 |         ]
119 |     }
120 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_update_valid.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "locations": [
 3 |         {
 4 |             "geonames_id": 2661552,
 5 |             "geonames_details": {
 6 |                 "continent_code": "EU",
 7 |                 "contient_name": "Europe",
 8 |                 "country_code": "CH",
 9 |                 "country_name": "Switzerland",
10 |                 "country_subdivision_code": "BE",
11 |                 "country_subdivision_name": "Bern",
12 |                 "lat": 46.94809,
13 |                 "lng": 7.44744,
14 |                 "name": "Bern"
15 |             }
16 |         }
17 |     ],
18 |     "established": null,
19 |     "external_ids": [
20 |         {
21 |             "type": "grid",
22 |             "all": [
23 |                 "grid.426225.5"
24 |             ],
25 |             "preferred": "grid.426225.5"
26 |         }
27 |     ],
28 |     "id": "https://ror.org/00wz65j53",
29 |     "domains": ["wisc.edu"],
30 |     "links": [
31 |         {
32 |             "type": "website",
33 |             "value": "https://www.jdsu.com"
34 |         }
35 |     ],
36 |     "names": [
37 |         {
38 |             "value": "JDSU (Switzerland)",
39 |             "types": [
40 |                 "ror_display",
41 |                 "label"
42 |             ],
43 |             "lang": null
44 |         }
45 |     ],
46 |     "relationships": [
47 |         {
48 |             "label": "JDSU (United States)",
49 |             "type": "parent",
50 |             "id": "https://ror.org/01a5v8x09"
51 |         },
52 |         {
53 |             "label": "Viavi Solutions (United States)",
54 |             "type": "successor",
55 |             "id": "https://ror.org/059a9e323"
56 |         }
57 |     ],
58 |     "status": "inactive",
59 |     "types": [
60 |         "company"
61 |     ],
62 |     "admin": {
63 |         "created": {
64 |             "date": "2023-07-28",
65 |             "schema_version": "1.0"
66 |         },
67 |         "last_modified": {
68 |             "date": "2023-07-28",
69 |             "schema_version": "2.0"
70 |         }
71 |     }
72 | }


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/data/test_upload_csv.csv:
--------------------------------------------------------------------------------
1 | html_url,id,names.types.ror_display,status,types,names.types.alias,names.types.label,names.types.acronym,links.type.website,links.type.wikipedia,domains,established,external_ids.type.fundref.all,external_ids.type.fundref.preferred,external_ids.type.grid.all,external_ids.type.grid.preferred,external_ids.type.isni.all,external_ids.type.isni.preferred,external_ids.type.wikidata.all,external_ids.type.wikidata.preferred,city,country,locations.geonames_id
2 | https://github.com/ror-community/ror-updates/issues/9185,,Jizzakh branch of the National University of Uzbekistan named after Mirzo Ulugbek,active,Education,Jizzakh branch of the National University of Uzbekistan; Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali; Джизакский филиал Национального университета Узбекистана имени Мирзо Улугбека,Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali*Uzbek,,https://jbnuu.uz,https://uz.wikipedia.org/wiki/O%CA%BBzbekiston_milliy_universitetining_Jizzax_filiali,,2019,,,,,,,Q72342707,Q72342707,Jizzakh,Uzbekistan,1513886
3 | https://github.com/ror-community/ror-updates/issues/9389,,Znanstveno-raziskovalno središče Koper,active,Facility; Government,SRC Koper; ZRS Koper;,Science and Research Centre of Koper*English; Centro di ricerche scientifiche Capodistria*Italian,,https://www.zrs-kp.si;,,,,,,,,0000 0004 0398 0403,0000 0004 0398 0403,Q49569044,Q49569044,Koper,Slovenia,3197753


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_client.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | from rorapi.v2.models import Client
3 | 
4 | class ClientTests(TestCase):
5 |     def test_client_registration(self):
6 |         client = Client.objects.create(email='test@example.com')
7 |         self.assertIsNotNone(client.client_id)


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_es_utils_v1.py:
--------------------------------------------------------------------------------
  1 | from django.test import SimpleTestCase
  2 | from rorapi.common.es_utils import ESQueryBuilder
  3 | 
  4 | class QueryBuilderTestCaseV1(SimpleTestCase):
  5 |     V1_VERSION = 'v1'
  6 |     def test_id_query(self):
  7 |         qb = ESQueryBuilder(self.V1_VERSION)
  8 |         qb.add_id_query('ror-id')
  9 | 
 10 |         self.assertEqual(qb.get_query().to_dict(), {
 11 |             'query': {
 12 |                 'match': {
 13 |                     'id': {
 14 |                         'query': 'ror-id',
 15 |                         'operator': 'and'
 16 |                     }
 17 |                 }
 18 |             },
 19 |             'track_total_hits': True
 20 |         })
 21 | 
 22 |     def test_match_all_query(self):
 23 |         qb = ESQueryBuilder(self.V1_VERSION)
 24 |         qb.add_match_all_query()
 25 | 
 26 |         self.assertEqual(qb.get_query().to_dict(),
 27 |                          {'query': {
 28 |                              'match_all': {}
 29 |                          },
 30 |                          'track_total_hits': True
 31 |                          })
 32 | 
 33 |     def test_string_query(self):
 34 |         qb = ESQueryBuilder(self.V1_VERSION)
 35 |         qb.add_string_query('query terms')
 36 | 
 37 |         self.assertEqual(
 38 |             qb.get_query().to_dict(), {
 39 |                 'query': {
 40 |                     'nested': {
 41 |                         'path': 'names_ids',
 42 |                         'score_mode': 'max',
 43 |                         'query': {
 44 |                             'query_string': {
 45 |                                 'query': 'query terms',
 46 |                                 'fuzzy_max_expansions': 1
 47 |                             }
 48 |                         }
 49 |                     }
 50 |                 },
 51 |                 'track_total_hits': True
 52 |             })
 53 |     def test_string_query_advanced(self):
 54 |         qb = ESQueryBuilder(self.V1_VERSION)
 55 |         qb.add_string_query_advanced('query terms')
 56 | 
 57 |         self.assertEqual(
 58 |             qb.get_query().to_dict(), {
 59 |                 'query': {
 60 |                     'bool': {
 61 |                         'must': [{
 62 |                             'query_string': {
 63 |                                 'query': 'query terms',
 64 |                                 'default_field': '*',
 65 |                                 'default_operator':'and',
 66 |                                 'fuzzy_max_expansions': 1
 67 |                             }
 68 |                         }]
 69 |                     }
 70 |                 },
 71 |                 'track_total_hits': True
 72 |             })
 73 | 
 74 |     def test_phrase_query(self):
 75 |         qb = ESQueryBuilder(self.V1_VERSION)
 76 |         qb.add_phrase_query(['f1', 'f2'], 'query terms')
 77 | 
 78 |         self.assertEqual(
 79 |             qb.get_query().to_dict(), {
 80 |                 'query': {
 81 |                     'dis_max': {
 82 |                         'queries': [{
 83 |                             'match_phrase': {
 84 |                                 'f1': 'query terms'
 85 |                             }
 86 |                         }, {
 87 |                             'match_phrase': {
 88 |                                 'f2': 'query terms'
 89 |                             }
 90 |                         }]
 91 |                     }
 92 |                 },
 93 |                 'track_total_hits': True
 94 |             })
 95 | 
 96 |     def test_common_query(self):
 97 |         qb = ESQueryBuilder(self.V1_VERSION)
 98 |         qb.add_common_query(['f1', 'f2'], 'query terms')
 99 | 
100 |         self.assertEqual(
101 |             qb.get_query().to_dict(), {
102 |                 'query': {
103 |                     'dis_max': {
104 |                         'queries': [{
105 |                             'common': {
106 |                                 'f1': {
107 |                                     'query': 'query terms',
108 |                                     'cutoff_frequency': 0.001
109 |                                 }
110 |                             }
111 |                         }, {
112 |                             'common': {
113 |                                 'f2': {
114 |                                     'query': 'query terms',
115 |                                     'cutoff_frequency': 0.001
116 |                                 }
117 |                             }
118 |                         }]
119 |                     }
120 |                 },
121 |                 'track_total_hits': True
122 |             })
123 | 
124 |     def test_match_query(self):
125 |         qb = ESQueryBuilder(self.V1_VERSION)
126 |         qb.add_match_query('query terms')
127 | 
128 |         self.assertEqual(qb.get_query().to_dict(),
129 |                          {'query': {
130 |                              'match': {
131 |                                  'acronyms': 'query terms'
132 |                              }
133 |                          },
134 |                          'track_total_hits': True
135 |                          })
136 | 
137 |     def test_fuzzy_query(self):
138 |         qb = ESQueryBuilder(self.V1_VERSION)
139 |         qb.add_fuzzy_query(['f1', 'f2'], 'query terms')
140 | 
141 |         self.assertEqual(
142 |             qb.get_query().to_dict(), {
143 |                 'query': {
144 |                     'dis_max': {
145 |                         'queries': [{
146 |                             'match': {
147 |                                 'f1': {
148 |                                     'query': 'query terms',
149 |                                     'fuzziness': 'AUTO'
150 |                                 }
151 |                             }
152 |                         }, {
153 |                             'match': {
154 |                                 'f2': {
155 |                                     'query': 'query terms',
156 |                                     'fuzziness': 'AUTO'
157 |                                 }
158 |                             }
159 |                         }]
160 |                     }
161 |                 },
162 |                 'track_total_hits': True
163 |             })
164 | 
165 |     def test_add_filters(self):
166 |         qb = ESQueryBuilder(self.V1_VERSION)
167 |         qb.add_match_all_query()
168 |         qb.add_filters({'key1': ['val1'], 'k2': ['value2']})
169 | 
170 |         self.assertEqual(
171 |             qb.get_query().to_dict(), {
172 |                 'query': {
173 |                     'bool': {
174 |                         'filter': [{
175 |                             'terms': {
176 |                                 'key1': ['val1']
177 |                             }
178 |                         }, {
179 |                             'terms': {
180 |                                 'k2': ['value2']
181 |                             }
182 |                         }]
183 |                     }
184 |                 },
185 |                 'track_total_hits': True
186 |             })
187 | 
188 |     def test_add_aggregations(self):
189 |         qb = ESQueryBuilder(self.V1_VERSION)
190 |         qb.add_match_all_query()
191 |         qb.add_aggregations([('countries', 'code'), ('types', 'type')])
192 | 
193 |         self.assertEqual(
194 |             qb.get_query().to_dict(), {
195 |                 'query': {
196 |                     'match_all': {}
197 |                 },
198 |                 'track_total_hits': True,
199 |                 'aggs': {
200 |                     'countries': {
201 |                         'terms': {
202 |                             'field': 'code',
203 |                             'min_doc_count': 1,
204 |                             'size': 10
205 |                         }
206 |                     },
207 |                     'types': {
208 |                         'terms': {
209 |                             'field': 'type',
210 |                             'min_doc_count': 1,
211 |                             'size': 10
212 |                         }
213 |                     }
214 |                 }
215 |             })
216 | 
217 |     def test_paginate(self):
218 |         qb = ESQueryBuilder(self.V1_VERSION)
219 |         qb.add_match_all_query()
220 |         qb.paginate(10)
221 | 
222 |         self.assertEqual(qb.get_query().to_dict(), {
223 |             'query': {
224 |                 'match_all': {}
225 |             },
226 |             'from': 180,
227 |             'size': 20,
228 |             'track_total_hits': True
229 |         })
230 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_es_utils_v2.py:
--------------------------------------------------------------------------------
  1 | from django.test import SimpleTestCase
  2 | from rorapi.common.es_utils import ESQueryBuilder
  3 | 
  4 | class QueryBuilderTestCaseV2(SimpleTestCase):
  5 |     V2_VERSION = 'v2'
  6 |     def test_id_query(self):
  7 |         qb = ESQueryBuilder(self.V2_VERSION)
  8 |         qb.add_id_query('ror-id')
  9 | 
 10 |         self.assertEqual(qb.get_query().to_dict(), {
 11 |             'query': {
 12 |                 'match': {
 13 |                     'id': {
 14 |                         'query': 'ror-id',
 15 |                         'operator': 'and'
 16 |                     }
 17 |                 }
 18 |             },
 19 |             'track_total_hits': True
 20 |         })
 21 | 
 22 |     def test_match_all_query(self):
 23 |         qb = ESQueryBuilder(self.V2_VERSION)
 24 |         qb.add_match_all_query()
 25 | 
 26 |         self.assertEqual(qb.get_query().to_dict(),
 27 |                          {'query': {
 28 |                              'match_all': {}
 29 |                          },
 30 |                          'track_total_hits': True
 31 |                          })
 32 | 
 33 |     def test_string_query(self):
 34 |         qb = ESQueryBuilder(self.V2_VERSION)
 35 |         qb.add_string_query('query terms')
 36 | 
 37 |         self.assertEqual(
 38 |             qb.get_query().to_dict(), {
 39 |                 'query': {
 40 |                     'nested': {
 41 |                         'path': 'names_ids',
 42 |                         'score_mode': 'max',
 43 |                         'query': {
 44 |                             'query_string': {
 45 |                                 'query': 'query terms',
 46 |                                 'fuzzy_max_expansions': 1
 47 |                             }
 48 |                         }
 49 |                     }
 50 |                 },
 51 |                 'track_total_hits': True
 52 |             })
 53 |     def test_string_query_advanced(self):
 54 |         qb = ESQueryBuilder(self.V2_VERSION)
 55 |         qb.add_string_query_advanced('query terms')
 56 | 
 57 |         self.assertEqual(
 58 |             qb.get_query().to_dict(), {
 59 |                 'query': {
 60 |                     'bool': {
 61 |                         'must': [{
 62 |                             'query_string': {
 63 |                                 'query': 'query terms',
 64 |                                 'default_field': '*',
 65 |                                 'default_operator':'and',
 66 |                                 'fuzzy_max_expansions': 1
 67 |                             }
 68 |                         }]
 69 |                     }
 70 |                 },
 71 |                 'track_total_hits': True
 72 |             })
 73 | 
 74 |     def test_phrase_query(self):
 75 |         qb = ESQueryBuilder(self.V2_VERSION)
 76 |         qb.add_phrase_query(['f1', 'f2'], 'query terms')
 77 | 
 78 |         self.assertEqual(
 79 |             qb.get_query().to_dict(), {
 80 |                 'query': {
 81 |                     'dis_max': {
 82 |                         'queries': [{
 83 |                             'match_phrase': {
 84 |                                 'f1': 'query terms'
 85 |                             }
 86 |                         }, {
 87 |                             'match_phrase': {
 88 |                                 'f2': 'query terms'
 89 |                             }
 90 |                         }]
 91 |                     }
 92 |                 },
 93 |                 'track_total_hits': True
 94 |             })
 95 | 
 96 |     def test_common_query(self):
 97 |         qb = ESQueryBuilder(self.V2_VERSION)
 98 |         qb.add_common_query(['f1', 'f2'], 'query terms')
 99 | 
100 |         self.assertEqual(
101 |             qb.get_query().to_dict(), {
102 |                 'query': {
103 |                     'dis_max': {
104 |                         'queries': [{
105 |                             'common': {
106 |                                 'f1': {
107 |                                     'query': 'query terms',
108 |                                     'cutoff_frequency': 0.001
109 |                                 }
110 |                             }
111 |                         }, {
112 |                             'common': {
113 |                                 'f2': {
114 |                                     'query': 'query terms',
115 |                                     'cutoff_frequency': 0.001
116 |                                 }
117 |                             }
118 |                         }]
119 |                     }
120 |                 },
121 |                 'track_total_hits': True
122 |             })
123 | 
124 |     def test_match_query(self):
125 |         qb = ESQueryBuilder(self.V2_VERSION)
126 |         qb.add_match_query('query terms')
127 | 
128 |         self.assertEqual(qb.get_query().to_dict(),
129 |                          {'query': {
130 |                              'match': {
131 |                                  'acronyms': 'query terms'
132 |                              }
133 |                          },
134 |                          'track_total_hits': True
135 |                          })
136 | 
137 |     def test_fuzzy_query(self):
138 |         qb = ESQueryBuilder(self.V2_VERSION)
139 |         qb.add_fuzzy_query(['f1', 'f2'], 'query terms')
140 | 
141 |         self.assertEqual(
142 |             qb.get_query().to_dict(), {
143 |                 'query': {
144 |                     'dis_max': {
145 |                         'queries': [{
146 |                             'match': {
147 |                                 'f1': {
148 |                                     'query': 'query terms',
149 |                                     'fuzziness': 'AUTO'
150 |                                 }
151 |                             }
152 |                         }, {
153 |                             'match': {
154 |                                 'f2': {
155 |                                     'query': 'query terms',
156 |                                     'fuzziness': 'AUTO'
157 |                                 }
158 |                             }
159 |                         }]
160 |                     }
161 |                 },
162 |                 'track_total_hits': True
163 |             })
164 | 
165 |     def test_add_filters(self):
166 |         qb = ESQueryBuilder(self.V2_VERSION)
167 |         qb.add_match_all_query()
168 |         qb.add_filters({'key1': ['val1'], 'k2': ['value2']})
169 | 
170 |         self.assertEqual(
171 |             qb.get_query().to_dict(), {
172 |                 'query': {
173 |                     'bool': {
174 |                         'filter': [{
175 |                             'terms': {
176 |                                 'key1': ['val1']
177 |                             }
178 |                         }, {
179 |                             'terms': {
180 |                                 'k2': ['value2']
181 |                             }
182 |                         }]
183 |                     }
184 |                 },
185 |                 'track_total_hits': True
186 |             })
187 | 
188 |     def test_add_aggregations(self):
189 |         qb = ESQueryBuilder(self.V2_VERSION)
190 |         qb.add_match_all_query()
191 |         qb.add_aggregations([('countries', 'code'), ('types', 'type')])
192 | 
193 |         self.assertEqual(
194 |             qb.get_query().to_dict(), {
195 |                 'query': {
196 |                     'match_all': {}
197 |                 },
198 |                 'track_total_hits': True,
199 |                 'aggs': {
200 |                     'countries': {
201 |                         'terms': {
202 |                             'field': 'code',
203 |                             'min_doc_count': 1,
204 |                             'size': 10
205 |                         }
206 |                     },
207 |                     'types': {
208 |                         'terms': {
209 |                             'field': 'type',
210 |                             'min_doc_count': 1,
211 |                             'size': 10
212 |                         }
213 |                     }
214 |                 }
215 |             })
216 | 
217 |     def test_paginate(self):
218 |         qb = ESQueryBuilder(self.V2_VERSION)
219 |         qb.add_match_all_query()
220 |         qb.paginate(10)
221 | 
222 |         self.assertEqual(qb.get_query().to_dict(), {
223 |             'query': {
224 |                 'match_all': {}
225 |             },
226 |             'from': 180,
227 |             'size': 20,
228 |             'track_total_hits': True
229 |         })
230 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_generaterorid_v1.py:
--------------------------------------------------------------------------------
 1 | from django.test import SimpleTestCase
 2 | from unittest.mock import patch
 3 | from rorapi.management.commands import generaterorid
 4 | from rorapi.common.models import Errors
 5 | from rorapi.settings import ROR_API
 6 | 
 7 | DUPLICATE_ID_RAW = "duplicateid"
 8 | UNIQUE_ID_RAW = "uniqueid"
 9 | DUPLICATE_ROR_ID = f"{ROR_API['ID_PREFIX']}{DUPLICATE_ID_RAW}"
10 | UNIQUE_ROR_ID = f"{ROR_API['ID_PREFIX']}{UNIQUE_ID_RAW}"
11 | TEST_VERSION = 'v1'
12 | 
13 | class GenerateRorIdCommandTestCase(SimpleTestCase):
14 | 
15 |     @patch('rorapi.management.commands.generaterorid.get_ror_id')
16 |     @patch('rorapi.management.commands.generaterorid.retrieve_organization')
17 |     @patch('rorapi.management.commands.generaterorid.generate_ror_id')
18 |     def test_check_ror_id_handles_collision_and_returns_unique(
19 |         self, mock_generate_ror_id, mock_retrieve_organization, mock_get_ror_id
20 |     ):
21 |         mock_generate_ror_id.side_effect = [
22 |             DUPLICATE_ROR_ID,
23 |             UNIQUE_ROR_ID
24 |         ]
25 | 
26 |         mock_get_ror_id.side_effect = lambda x: x
27 | 
28 |         mock_retrieve_organization.side_effect = [
29 |             (None, {'id': DUPLICATE_ROR_ID, 'name': 'Mock Duplicate Org'}),
30 |             (Errors(f"ROR ID '{UNIQUE_ROR_ID}' does not exist"), None)
31 |         ]
32 | 
33 |         result_ror_id = generaterorid.check_ror_id(TEST_VERSION)
34 | 
35 |         self.assertEqual(result_ror_id, UNIQUE_ROR_ID)


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_generaterorid_v2.py:
--------------------------------------------------------------------------------
 1 | from django.test import SimpleTestCase
 2 | from unittest.mock import patch
 3 | from rorapi.management.commands import generaterorid
 4 | from rorapi.common.models import Errors
 5 | from rorapi.settings import ROR_API
 6 | 
 7 | DUPLICATE_ID_RAW = "duplicateid"
 8 | UNIQUE_ID_RAW = "uniqueid"
 9 | DUPLICATE_ROR_ID = f"{ROR_API['ID_PREFIX']}{DUPLICATE_ID_RAW}"
10 | UNIQUE_ROR_ID = f"{ROR_API['ID_PREFIX']}{UNIQUE_ID_RAW}"
11 | TEST_VERSION = 'v2'
12 | 
13 | class GenerateRorIdCommandTestCase(SimpleTestCase):
14 | 
15 |     @patch('rorapi.management.commands.generaterorid.get_ror_id')
16 |     @patch('rorapi.management.commands.generaterorid.retrieve_organization')
17 |     @patch('rorapi.management.commands.generaterorid.generate_ror_id')
18 |     def test_check_ror_id_handles_collision_and_returns_unique(
19 |         self, mock_generate_ror_id, mock_retrieve_organization, mock_get_ror_id
20 |     ):
21 |         mock_generate_ror_id.side_effect = [
22 |             DUPLICATE_ROR_ID,
23 |             UNIQUE_ROR_ID
24 |         ]
25 | 
26 |         mock_get_ror_id.side_effect = lambda x: x
27 | 
28 |         mock_retrieve_organization.side_effect = [
29 |             (None, {'id': DUPLICATE_ROR_ID, 'name': 'Mock Duplicate Org'}),
30 |             (Errors(f"ROR ID '{UNIQUE_ROR_ID}' does not exist"), None)
31 |         ]
32 | 
33 |         result_ror_id = generaterorid.check_ror_id(TEST_VERSION)
34 | 
35 |         self.assertEqual(result_ror_id, UNIQUE_ROR_ID)


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/tests_models_common.py:
--------------------------------------------------------------------------------
 1 | from django.test import SimpleTestCase
 2 | 
 3 | from rorapi.common.models import CountryBucket, Entity, Errors, TypeBucket
 4 | from .utils import AttrDict
 5 | 
 6 | 
 7 | class EntityTestCase(SimpleTestCase):
 8 |     def test_attributes_exist(self):
 9 |         data = {"a": 0, "b": 123, "third": "a thousand"}
10 |         entity = Entity(AttrDict(data), ["a", "third", "b"])
11 |         self.assertEqual(entity.a, data["a"])
12 |         self.assertEqual(entity.b, data["b"])
13 |         self.assertEqual(entity.third, data["third"])
14 | 
15 |     def test_omits_attributes(self):
16 |         data = {"a": 0, "b": 123, "third": "a thousand"}
17 |         entity = Entity(AttrDict(data), ["a"])
18 |         self.assertEqual(entity.a, data["a"])
19 |         msg = "'Entity' object has no attribute '{}'"
20 |         with self.assertRaisesMessage(AttributeError, msg.format("b")):
21 |             entity.b
22 |         with self.assertRaisesMessage(AttributeError, msg.format("third")):
23 |             entity.third
24 | 
25 | 
26 | class TypeBucketTestCase(SimpleTestCase):
27 |     def test_attributes_exist(self):
28 |         bucket = TypeBucket(AttrDict({"key": "Type", "doc_count": 482}))
29 |         self.assertEqual(bucket.id, "type")
30 |         self.assertEqual(bucket.title, "Type")
31 |         self.assertEqual(bucket.count, 482)
32 | 
33 | 
34 | class CountryBucketTestCase(SimpleTestCase):
35 |     def test_attributes_exist(self):
36 |         bucket = CountryBucket(AttrDict({"key": "IE", "doc_count": 4821}))
37 |         self.assertEqual(bucket.id, "ie")
38 |         self.assertEqual(bucket.title, "Ireland")
39 |         self.assertEqual(bucket.count, 4821)
40 | 
41 | 
42 | class ErrorsTestCase(SimpleTestCase):
43 |     def test_attributes_exist(self):
44 |         data = ["err1", "e2", "terrible error 3"]
45 |         error = Errors(data)
46 |         self.assertEqual(error.errors, data)
47 | 


--------------------------------------------------------------------------------
/rorapi/tests/tests_unit/utils.py:
--------------------------------------------------------------------------------
 1 | class AttrDict(dict):
 2 |     def __init__(self, nested_dict):
 3 |         for k, v in nested_dict.items():
 4 |             if isinstance(v, dict):
 5 |                 self[k] = AttrDict(v)
 6 |             elif isinstance(v, list):
 7 |                 self[k] = [
 8 |                     AttrDict(e) if isinstance(e, dict) else e for e in v
 9 |                 ]
10 |             else:
11 |                 self[k] = v
12 | 
13 |     def __getattr__(self, attr):
14 |         if attr not in self:
15 |             raise AttributeError(
16 |                 '\'AttrDict\' object has no attribute \'{}\''.format(attr))
17 |         return self[attr]
18 | 
19 | 
20 | class IterableAttrDict():
21 |     def __init__(self, nested_dict, iter_list):
22 |         self.attr_dict = AttrDict(nested_dict)
23 |         self.iter_list = [AttrDict(i) for i in iter_list]
24 | 
25 |     def __iter__(self):
26 |         return iter(self.iter_list)
27 | 
28 |     def __getitem__(self, key):
29 |         return self.iter_list[key]
30 | 
31 |     def __getattr__(self, attr):
32 |         return self.attr_dict.__getattr__(attr)
33 | 


--------------------------------------------------------------------------------
/rorapi/v1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/v1/__init__.py


--------------------------------------------------------------------------------
/rorapi/v1/index_template_es7.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "index_patterns": [
  3 |     "organizations"
  4 |   ],
  5 |   "settings": {
  6 |     "number_of_shards": 1,
  7 |     "analysis": {
  8 |       "analyzer": {
  9 |         "string_lowercase": {
 10 |           "tokenizer": "standard",
 11 |           "filter": [
 12 |             "lowercase",
 13 |             "ascii_folding"
 14 |           ]
 15 |         }
 16 |       },
 17 |       "filter": {
 18 |         "ascii_folding": {
 19 |           "type": "asciifolding",
 20 |           "preserve_original": true
 21 |         }
 22 |       }
 23 |     }
 24 |   },
 25 |   "mappings": {
 26 |     "properties": {
 27 |       "id": {
 28 |         "type": "keyword"
 29 |       },
 30 |       "name": {
 31 |         "type": "text",
 32 |         "fields": {
 33 |           "keyword": {
 34 |             "type": "keyword"
 35 |           },
 36 |           "norm": {
 37 |             "type": "text",
 38 |             "analyzer": "string_lowercase",
 39 |             "fielddata": true
 40 |           }
 41 |         }
 42 |       },
 43 |       "links": {
 44 |         "type": "text",
 45 |         "analyzer": "simple"
 46 |       },
 47 |       "wikipedia_url": {
 48 |         "type": "text",
 49 |         "analyzer": "simple"
 50 |       },
 51 |       "aliases": {
 52 |         "type": "text",
 53 |         "fields": {
 54 |           "keyword": {
 55 |             "type": "keyword"
 56 |           },
 57 |           "norm": {
 58 |             "type": "text",
 59 |             "analyzer": "string_lowercase",
 60 |             "fielddata": true
 61 |           }
 62 |         }
 63 |       },
 64 |       "acronyms": {
 65 |         "type": "text",
 66 |         "fields": {
 67 |           "keyword": {
 68 |             "type": "keyword"
 69 |           },
 70 |           "norm": {
 71 |             "type": "text",
 72 |             "analyzer": "string_lowercase",
 73 |             "fielddata": true
 74 |           }
 75 |         }
 76 |       },
 77 |       "status": {
 78 |         "type": "keyword"
 79 |       },
 80 |       "labels": {
 81 |         "properties": {
 82 |           "label": {
 83 |             "type": "text",
 84 |             "fields": {
 85 |               "keyword": {
 86 |                 "type": "keyword"
 87 |               },
 88 |               "norm": {
 89 |                 "type": "text",
 90 |                 "analyzer": "string_lowercase",
 91 |                 "fielddata": true
 92 |               }
 93 |             }
 94 |           },
 95 |           "iso639": {
 96 |             "type": "keyword"
 97 |           }
 98 |         }
 99 |       },
100 |       "country": {
101 |         "properties": {
102 |           "country_code": {
103 |             "type": "keyword"
104 |           },
105 |           "country_name": {
106 |             "type": "keyword"
107 |           }
108 |         }
109 |       },
110 |       "types": {
111 |         "type": "keyword"
112 |       },
113 |       "email_address": {
114 |         "type": "text"
115 |       },
116 |       "established": {
117 |         "type": "date"
118 |       },
119 |       "ip_addresses": {
120 |         "type": "text"
121 |       },
122 |       "addresses": {
123 |         "properties": {
124 |           "line": {
125 |             "type": "text"
126 |           },
127 |           "lat": {
128 |             "type": "float"
129 |           },
130 |           "lng": {
131 |             "type": "float"
132 |           },
133 |           "postcode": {
134 |             "type": "keyword"
135 |           },
136 |           "primary": {
137 |             "type": "boolean"
138 |           },
139 |           "city": {
140 |             "type": "keyword"
141 |           },
142 |           "state": {
143 |             "type": "keyword"
144 |           },
145 |           "state_code": {
146 |             "type": "keyword"
147 |           },
148 |           "geonames_city": {
149 |             "properties": {
150 |               "id": {
151 |                 "type": "integer"
152 |               },
153 |               "city": {
154 |                 "type": "keyword"
155 |               },
156 |               "nuts_level1": {
157 |                 "properties": {
158 |                   "name": {
159 |                     "type": "text"
160 |                   },
161 |                   "code": {
162 |                     "type": "keyword"
163 |                   }
164 |                 }
165 |               },
166 |               "nuts_level2": {
167 |                 "properties": {
168 |                   "name": {
169 |                     "type": "text"
170 |                   },
171 |                   "code": {
172 |                     "type": "keyword"
173 |                   }
174 |                 }
175 |               },
176 |               "nuts_level3": {
177 |                 "properties": {
178 |                   "name": {
179 |                     "type": "text"
180 |                   },
181 |                   "code": {
182 |                     "type": "keyword"
183 |                   }
184 |                 }
185 |               },
186 |               "geonames_admin1": {
187 |                 "properties": {
188 |                   "ascii_name": {
189 |                     "type": "keyword"
190 |                   },
191 |                   "name": {
192 |                     "type": "keyword"
193 |                   },
194 |                   "code": {
195 |                     "type": "keyword"
196 |                   }
197 |                 }
198 |               },
199 |               "geonames_admin2": {
200 |                 "properties": {
201 |                   "ascii_name": {
202 |                     "type": "keyword"
203 |                   },
204 |                   "name": {
205 |                     "type": "keyword"
206 |                   },
207 |                   "code": {
208 |                     "type": "keyword"
209 |                   }
210 |                 }
211 |               },
212 |               "license": {
213 |                 "properties": {
214 |                   "attribution": {
215 |                     "type": "text"
216 |                   },
217 |                   "license": {
218 |                     "type": "text"
219 |                   }
220 |                 }
221 |               }
222 |             }
223 |           }
224 |         }
225 |       },
226 |       "relationships": {
227 |         "properties": {
228 |           "type": {
229 |             "type": "keyword"
230 |           },
231 |           "label": {
232 |             "type": "text",
233 |             "fields": {
234 |               "keyword": {
235 |                 "type": "keyword"
236 |               },
237 |               "norm": {
238 |                 "type": "text",
239 |                 "analyzer": "string_lowercase",
240 |                 "fielddata": true
241 |               }
242 |             }
243 |           },
244 |           "id": {
245 |             "type": "keyword"
246 |           }
247 |         }
248 |       },
249 |       "external_ids": {
250 |         "properties": {
251 |           "GRID": {
252 |             "properties": {
253 |               "preferred": {
254 |                 "type": "keyword"
255 |               },
256 |               "all": {
257 |                 "type": "keyword"
258 |               }
259 |             }
260 |           },
261 |           "ISNI": {
262 |             "properties": {
263 |               "preferred": {
264 |                 "type": "keyword"
265 |               },
266 |               "all": {
267 |                 "type": "keyword"
268 |               }
269 |             }
270 |           },
271 |           "FundRef": {
272 |             "properties": {
273 |               "preferred": {
274 |                 "type": "keyword"
275 |               },
276 |               "all": {
277 |                 "type": "keyword"
278 |               }
279 |             }
280 |           },
281 |           "Wikidata": {
282 |             "properties": {
283 |               "preferred": {
284 |                 "type": "keyword"
285 |               },
286 |               "all": {
287 |                 "type": "keyword"
288 |               }
289 |             }
290 |           }
291 |         }
292 |       },
293 |       "names_ids": {
294 |         "type": "nested",
295 |         "properties": {
296 |           "id": {
297 |             "type": "keyword"
298 |           },
299 |           "name": {
300 |             "type": "text",
301 |             "analyzer": "string_lowercase"
302 |           }
303 |         }
304 |       }
305 |     }
306 |   }
307 | }


--------------------------------------------------------------------------------
/rorapi/v1/models.py:
--------------------------------------------------------------------------------
  1 | from geonamescache.mappers import country
  2 | from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity
  3 | 
  4 | class Aggregations:
  5 |     """Aggregations model class"""
  6 | 
  7 |     def __init__(self, data):
  8 |         self.types = [TypeBucket(b) for b in data.types.buckets]
  9 |         self.countries = [CountryBucket(b) for b in data.countries.buckets]
 10 |         self.statuses = [StatusBucket(b) for b in data.statuses.buckets]
 11 | 
 12 | class GeoAdmin:
 13 |     def __init__(self, data):
 14 |         if hasattr(data, 'id'):
 15 |             self.id = data.id
 16 |         else:
 17 |             self.id = None
 18 |         if hasattr(data, 'code'):
 19 |             self.code = data.code
 20 |         else:
 21 |             self.code = None
 22 |         if hasattr(data, 'name'):
 23 |             self.name = data.name
 24 |         else:
 25 |             self.name = None
 26 |         if hasattr(data, 'ascii_name'):
 27 |             self.ascii_name = data.ascii_name
 28 |         else:
 29 |             self.ascii_name = None
 30 | 
 31 | 
 32 | class Nuts:
 33 |     """A model class for storing the NUTS metadata"""
 34 |     def __init__(self, data):
 35 |         self.code = getattr(data, 'code', None)
 36 |         self.name = getattr(data, 'name', None)
 37 | 
 38 | 
 39 | class License:
 40 |     """A model class for storing license metadata"""
 41 |     def __init__(self, data):
 42 |         self.attribution = getattr(data, 'attribution', None)
 43 |         self.license = getattr(data, 'license', None)
 44 | 
 45 | 
 46 | class GeoNamesCity:
 47 |     """A model class for storing geonames city hash"""
 48 |     def __init__(self, data):
 49 |         self.id = getattr(data, 'id', None)
 50 |         self.city = getattr(data, 'city', None)
 51 |         if hasattr(data, 'license'):
 52 |             self.license = License(data.license)
 53 |         else:
 54 |             self.license = None
 55 |         if hasattr(data, 'geonames_admin1'):
 56 |             self.geonames_admin1 = GeoAdmin(data.geonames_admin1)
 57 |         else:
 58 |             self.geonames_admin1 = None
 59 |         if hasattr(data, 'geonames_admin2'):
 60 |             self.geonames_admin2 = GeoAdmin(data.geonames_admin2)
 61 |         else:
 62 |             self.geonames_admin2 = None
 63 |         if hasattr(data, 'nuts_level1'):
 64 |             self.nuts_level1 = GeoAdmin(data.nuts_level1)
 65 |         else:
 66 |             self.nuts_level1 = None
 67 |         if hasattr(data, 'nuts_level2'):
 68 |             self.nuts_level2 = GeoAdmin(data.nuts_level2)
 69 |         else:
 70 |             self.nuts_level2 = None
 71 |         if hasattr(data, 'nuts_level3'):
 72 |             self.nuts_level3 = GeoAdmin(data.nuts_level3)
 73 |         else:
 74 |             self.nuts_level3 = None
 75 | 
 76 | 
 77 | class Addresses:
 78 |     """A model class for storing addresses"""
 79 |     def __init__(self, data):
 80 |         self.country_geonames_id = data.country_geonames_id
 81 |         self.lat = data.lat
 82 |         self.lng = data.lng
 83 |         self.line = data.line
 84 |         self.state_code = data.state_code
 85 |         self.state = getattr(data, 'state', None)
 86 |         self.postcode = data.postcode
 87 |         self.city = data.city
 88 |         self.primary = data.primary
 89 |         self.geonames_city = GeoNamesCity(data.geonames_city)
 90 | 
 91 | 
 92 | class ExternalIds:
 93 |     """A model class for storing external identifiers"""
 94 |     def __init__(self, data):
 95 |         for a in [
 96 |                 'ISNI', 'FundRef', 'HESA', 'UCAS', 'UKPRN', 'CNRS', 'OrgRef',
 97 |                 'Wikidata', 'GRID'
 98 |         ]:
 99 |             try:
100 |                 setattr(self, a, Entity(getattr(data, a),
101 |                                         ['preferred', 'all']))
102 |             except AttributeError:
103 |                 pass
104 | 
105 | 
106 | class Organization(Entity):
107 |     """Organization model class"""
108 |     def __init__(self, data):
109 |         if "_source" in data:
110 |             data = data["_source"]
111 |         super(Organization, self).__init__(data, [
112 |             'id', 'name', 'types', 'links', 'aliases', 'acronyms', 'status',
113 |             'wikipedia_url', 'established', 'relationships', 'addresses'
114 |         ])
115 |         self.labels = [Entity(l, ['label', 'iso639']) for l in data.labels]
116 |         self.country = Entity(data.country, ['country_name', 'country_code'])
117 |         self.ip_addresses = data.ip_addresses
118 |         self.established = getattr(data, 'established', None)
119 |         self.email_address = getattr(data, 'email_address', None)
120 |         self.relationships = [
121 |             Entity(r, ['type', 'label', 'id']) for r in data.relationships
122 |         ]
123 |         self.addresses = [Addresses(a) for a in data.addresses]
124 |         self.external_ids = ExternalIds(data.external_ids)
125 | 
126 | 
127 | class ListResult:
128 |     """A model class for the list of organizations returned from the search"""
129 |     def __init__(self, data):
130 |         self.number_of_results = data.hits.total.value
131 |         self.time_taken = data.took
132 |         self.items = [Organization(x) for x in data]
133 |         self.meta = Aggregations(data.aggregations)
134 | 
135 | 
136 | class MatchedOrganization:
137 |     """A model class for an organization matched based on an affiliation
138 |     string"""
139 |     def __init__(self, data):
140 |         self.substring = data.substring
141 |         self.score = data.score
142 |         self.matching_type = data.matching_type
143 |         self.chosen = data.chosen
144 |         self.organization = Organization(data.organization)
145 | 
146 | 
147 | class MatchingResult:
148 |     """A model class for the result of affiliation matching"""
149 |     def __init__(self, data):
150 |         self.number_of_results = len(data)
151 |         self.items = [MatchedOrganization(x) for x in data]


--------------------------------------------------------------------------------
/rorapi/v1/serializers.py:
--------------------------------------------------------------------------------
  1 | from rest_framework import serializers
  2 | from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer
  3 | 
  4 | class AggregationsSerializer(serializers.Serializer):
  5 |     types = BucketSerializer(many=True)
  6 |     countries = BucketSerializer(many=True)
  7 |     statuses = BucketSerializer(many=True)
  8 | 
  9 | 
 10 | class OrganizationLabelSerializer(serializers.Serializer):
 11 |     label = serializers.CharField()
 12 |     iso639 = serializers.CharField()
 13 | 
 14 | 
 15 | class CountrySerializer(serializers.Serializer):
 16 |     country_name = serializers.CharField()
 17 |     country_code = serializers.CharField()
 18 | 
 19 | 
 20 | class LicenseSerializer(serializers.Serializer):
 21 |     attribution = serializers.StringRelatedField()
 22 |     license = serializers.StringRelatedField()
 23 | 
 24 | 
 25 | class NutsSerializer(serializers.Serializer):
 26 |     name = serializers.CharField()
 27 |     code = serializers.CharField()
 28 | 
 29 | 
 30 | class AddressGeoNamesSerializer(serializers.Serializer):
 31 |     name = serializers.CharField()
 32 |     id = serializers.IntegerField()
 33 |     ascii_name = serializers.CharField()
 34 |     code = serializers.CharField()
 35 | 
 36 | 
 37 | class GeoNamesCitySerializer(serializers.Serializer):
 38 |     id = serializers.IntegerField()
 39 |     city = serializers.StringRelatedField()
 40 |     geonames_admin1 = AddressGeoNamesSerializer()
 41 |     geonames_admin2 = AddressGeoNamesSerializer()
 42 |     license = LicenseSerializer()
 43 |     nuts_level1 = NutsSerializer()
 44 |     nuts_level2 = NutsSerializer()
 45 |     nuts_level3 = NutsSerializer()
 46 | 
 47 | 
 48 | class OrganizationAddressesSerializer(serializers.Serializer):
 49 |     lat = serializers.DecimalField(max_digits=None,
 50 |                                    decimal_places=10,
 51 |                                    coerce_to_string=False)
 52 |     lng = serializers.DecimalField(max_digits=None,
 53 |                                    decimal_places=10,
 54 |                                    coerce_to_string=False)
 55 |     state = serializers.StringRelatedField()
 56 |     state_code = serializers.CharField()
 57 |     city = serializers.CharField()
 58 |     geonames_city = GeoNamesCitySerializer()
 59 |     postcode = serializers.CharField()
 60 |     primary = serializers.BooleanField()
 61 |     line = serializers.CharField()
 62 |     country_geonames_id = serializers.IntegerField()
 63 | 
 64 | 
 65 | class ExternalIdSerializer(serializers.Serializer):
 66 |     preferred = serializers.CharField()
 67 |     all = serializers.StringRelatedField(many=True)
 68 | 
 69 | 
 70 | class GridExternalIdSerializer(serializers.Serializer):
 71 |     preferred = serializers.CharField()
 72 |     all = serializers.StringRelatedField()
 73 | 
 74 | 
 75 | class ExternalIdsSerializer(serializers.Serializer):
 76 |     ISNI = ExternalIdSerializer(required=False)
 77 |     FundRef = ExternalIdSerializer(required=False)
 78 |     HESA = ExternalIdSerializer(required=False)
 79 |     UCAS = ExternalIdSerializer(required=False)
 80 |     UKPRN = ExternalIdSerializer(required=False)
 81 |     CNRS = ExternalIdSerializer(required=False)
 82 |     OrgRef = ExternalIdSerializer(required=False)
 83 |     Wikidata = ExternalIdSerializer(required=False)
 84 |     GRID = GridExternalIdSerializer(required=False)
 85 | 
 86 | 
 87 | class OrganizationSerializer(serializers.Serializer):
 88 |     id = serializers.CharField()
 89 |     name = serializers.CharField()
 90 |     email_address = serializers.StringRelatedField()
 91 |     ip_addresses = serializers.StringRelatedField(many=True)
 92 |     established = serializers.IntegerField()
 93 |     types = serializers.StringRelatedField(many=True)
 94 |     relationships = OrganizationRelationshipsSerializer(many=True)
 95 |     addresses = OrganizationAddressesSerializer(many=True)
 96 |     links = serializers.StringRelatedField(many=True)
 97 |     aliases = serializers.StringRelatedField(many=True)
 98 |     acronyms = serializers.StringRelatedField(many=True)
 99 |     status = serializers.CharField()
100 |     wikipedia_url = serializers.CharField()
101 |     labels = OrganizationLabelSerializer(many=True)
102 |     country = CountrySerializer()
103 |     external_ids = ExternalIdsSerializer()
104 | 
105 | 
106 | class ListResultSerializer(serializers.Serializer):
107 |     number_of_results = serializers.IntegerField()
108 |     time_taken = serializers.IntegerField()
109 |     items = OrganizationSerializer(many=True)
110 |     meta = AggregationsSerializer()
111 | 
112 | 
113 | class MatchedOrganizationSerializer(serializers.Serializer):
114 |     substring = serializers.CharField()
115 |     score = serializers.FloatField()
116 |     matching_type = serializers.CharField()
117 |     chosen = serializers.BooleanField()
118 |     organization = OrganizationSerializer()
119 | 
120 | 
121 | class MatchingResultSerializer(serializers.Serializer):
122 |     number_of_results = serializers.IntegerField()
123 |     items = MatchedOrganizationSerializer(many=True)
124 | 


--------------------------------------------------------------------------------
/rorapi/v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ror-community/ror-api/e76a164581c2c0e23fd51aa25c4b38b69bce625c/rorapi/v2/__init__.py


--------------------------------------------------------------------------------
/rorapi/v2/index_template_es7.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "index_patterns": [
  3 |         "organizations-v2"
  4 |     ],
  5 |     "settings": {
  6 |         "number_of_shards": 1,
  7 |         "analysis": {
  8 |             "analyzer": {
  9 |                 "string_lowercase": {
 10 |                     "tokenizer": "standard",
 11 |                     "filter": [
 12 |                         "lowercase",
 13 |                         "ascii_folding"
 14 |                     ]
 15 |                 }
 16 |             },
 17 |             "filter": {
 18 |                 "ascii_folding": {
 19 |                     "type": "asciifolding",
 20 |                     "preserve_original": true
 21 |                 }
 22 |             }
 23 |         }
 24 |     },
 25 |     "mappings": {
 26 |         "properties": {
 27 |             "admin": {
 28 |                 "properties": {
 29 |                     "created": {
 30 |                         "properties": {
 31 |                             "date": {
 32 |                                 "type": "date"
 33 |                             },
 34 |                             "schema_version": {
 35 |                                 "type": "keyword"
 36 |                             }
 37 |                         }
 38 |                     },
 39 |                     "last_modified": {
 40 |                         "properties": {
 41 |                             "date": {
 42 |                                 "type": "date"
 43 |                             },
 44 |                             "schema_version": {
 45 |                                 "type": "keyword"
 46 |                             }
 47 |                         }
 48 |                     }
 49 |                 }
 50 |             },
 51 |             "domains": {
 52 |                 "type": "text",
 53 |                 "analyzer": "simple"
 54 |             },
 55 |             "established": {
 56 |                 "type": "date"
 57 |             },
 58 |             "external_ids": {
 59 |                 "properties": {
 60 |                     "all": {
 61 |                         "type": "keyword"
 62 |                     },
 63 |                     "type": {
 64 |                         "type": "keyword"
 65 |                     },
 66 |                     "preferred": {
 67 |                         "type": "keyword"
 68 |                     }
 69 |                 }
 70 |             },
 71 |             "id": {
 72 |                 "type": "keyword"
 73 |             },
 74 |             "links": {
 75 |                 "properties": {
 76 |                     "value": {
 77 |                         "type": "text",
 78 |                         "analyzer": "simple"
 79 |                     },
 80 |                     "type": {
 81 |                         "type": "keyword"
 82 |                     }
 83 |                 }
 84 |             },
 85 |             "locations": {
 86 |                 "properties": {
 87 |                     "geonames_id": {
 88 |                         "type": "integer"
 89 |                     },
 90 |                     "geonames_details": {
 91 |                         "properties": {
 92 |                             "continent_code": {
 93 |                                 "type": "keyword"
 94 |                             },
 95 |                             "continent_name": {
 96 |                                 "type": "keyword"
 97 |                             },
 98 |                             "country_code": {
 99 |                                 "type": "keyword"
100 |                             },
101 |                             "country_name": {
102 |                                 "type": "keyword"
103 |                             },
104 |                             "country_subdivision_code": {
105 |                                 "type": "keyword"
106 |                             },
107 |                             "country_subdivision_name": {
108 |                                 "type": "keyword"
109 |                             },
110 |                             "lat": {
111 |                                 "type": "float"
112 |                             },
113 |                             "lng": {
114 |                                 "type": "float"
115 |                             },
116 |                             "name": {
117 |                                 "type": "keyword"
118 |                             }
119 |                         }
120 |                     }
121 |                 }
122 |             },
123 |             "names": {
124 |                 "properties": {
125 |                     "value": {
126 |                         "type": "text",
127 |                         "fields": {
128 |                             "keyword": {
129 |                                 "type": "keyword"
130 |                             },
131 |                             "norm": {
132 |                                 "type": "text",
133 |                                 "analyzer": "string_lowercase",
134 |                                 "fielddata": true
135 |                             }
136 |                         }
137 |                     },
138 |                     "lang": {
139 |                         "type": "keyword"
140 |                     },
141 |                     "types": {
142 |                         "type": "keyword"
143 |                     }
144 |                 }
145 |             },
146 |             "types": {
147 |                 "type": "keyword"
148 |             },
149 |             "relationships": {
150 |                 "properties": {
151 |                     "type": {
152 |                         "type": "keyword"
153 |                     },
154 |                     "label": {
155 |                         "type": "text",
156 |                         "fields": {
157 |                             "keyword": {
158 |                                 "type": "keyword"
159 |                             },
160 |                             "norm": {
161 |                                 "type": "text",
162 |                                 "analyzer": "string_lowercase",
163 |                                 "fielddata": true
164 |                             }
165 |                         }
166 |                     },
167 |                     "id": {
168 |                         "type": "keyword"
169 |                     }
170 |                 }
171 |             },
172 |             "status": {
173 |                 "type": "keyword"
174 |             },
175 |             "names_ids": {
176 |                 "type": "nested",
177 |                 "properties": {
178 |                     "id": {
179 |                         "type": "keyword"
180 |                     },
181 |                     "name": {
182 |                         "type": "text",
183 |                         "analyzer": "string_lowercase"
184 |                     }
185 |                 }
186 |             }
187 |         }
188 |     }
189 | }


--------------------------------------------------------------------------------
/rorapi/v2/models.py:
--------------------------------------------------------------------------------
  1 | from geonamescache.mappers import country
  2 | import random
  3 | import string
  4 | from django.db import models
  5 | from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity
  6 | from rorapi.v2.record_constants import continent_code_to_name
  7 | 
  8 | class ContinentBucket:
  9 |     """A model class for country aggregation bucket"""
 10 | 
 11 |     def __init__(self, data):
 12 |         self.id = data.key.lower()
 13 |         self.title = continent_code_to_name(data.key)
 14 |         self.count = data.doc_count
 15 | 
 16 | class CountryBucket:
 17 |     """A model class for country aggregation bucket"""
 18 | 
 19 |     def __init__(self, data):
 20 |         self.id = data.key.lower()
 21 |         mapper = country(from_key="iso", to_key="name")
 22 |         try:
 23 |             self.title = mapper(data.key)
 24 |         except AttributeError:
 25 |             # if we have a country code with no name mapping, skip it to prevent 500
 26 |             pass
 27 |         self.count = data.doc_count
 28 | 
 29 | 
 30 | class Aggregations:
 31 |     """Aggregations model class"""
 32 | 
 33 |     def __init__(self, data):
 34 |         self.types = [TypeBucket(b) for b in data.types.buckets]
 35 |         self.countries = [CountryBucket(b) for b in data.countries.buckets]
 36 |         self.continents = [ContinentBucket(b) for b in data.continents.buckets]
 37 |         self.statuses = [StatusBucket(b) for b in data.statuses.buckets]
 38 | 
 39 | class GeoNamesDetails:
 40 |     """A model class for storing geonames city hash"""
 41 | 
 42 |     def __init__(self, data):
 43 |         self.continent_code = getattr(data, 'continent_code', None)
 44 |         self.continent_name = getattr(data, 'continent_name', None)
 45 |         self.country_code = data.country_code
 46 |         self.country_name = data.country_name
 47 |         self.country_subdivision_code = getattr(data, 'country_subdivision_code', None)
 48 |         self.country_subdivision_name = getattr(data, 'country_subdivision_name', None)
 49 |         self.name = data.name
 50 |         self.lat = data.lat
 51 |         self.lng = data.lng
 52 | 
 53 | 
 54 | class Location:
 55 |     """A model class for storing addresses"""
 56 | 
 57 |     def __init__(self, data):
 58 |         self.geonames_id = data.geonames_id
 59 |         self.geonames_details = GeoNamesDetails(data.geonames_details)
 60 | 
 61 | 
 62 | class ExternalId:
 63 |     """A model class for storing external id"""
 64 | 
 65 |     def __init__(self, data):
 66 |         self.type = data.type
 67 |         self.preferred = data.preferred
 68 |         self.all = [a for a in data.all]
 69 | 
 70 | 
 71 | class Admin:
 72 |     """A model class for storing admin information"""
 73 | 
 74 |     def __init__(self, data):
 75 |         for a in ["created", "last_modified"]:
 76 |             try:
 77 |                 setattr(self, a, Entity(getattr(data, a), ["date", "schema_version"]))
 78 |             except AttributeError:
 79 |                 pass
 80 | 
 81 | 
 82 | class Organization(Entity):
 83 |     """Organization model class"""
 84 | 
 85 |     def __init__(self, data):
 86 |         if "_source" in data:
 87 |             data = data["_source"]
 88 |         super(Organization, self).__init__(
 89 |             data, ["established", "id", "status"]
 90 |         )
 91 |         self.admin = Admin(data.admin)
 92 |         self.domains = sorted(data.domains)
 93 |         sorted_ext_ids = sorted(data.external_ids, key=lambda x: x['type'])
 94 |         self.external_ids = [
 95 |             Entity(e, ["type", "preferred", "all"]) for e in sorted_ext_ids
 96 |         ]
 97 |         sorted_links = sorted(data.links, key=lambda x: x['type'])
 98 |         self.links = [Entity(l, ["value", "type"]) for l in sorted_links]
 99 |         self.locations = [Location(l) for l in data.locations]
100 |         sorted_names = sorted(data.names, key=lambda x: x['value'])
101 |         self.names = [Entity(n, ["value", "lang", "types"]) for n in sorted_names]
102 |         sorted_rels = sorted(data.relationships, key=lambda x: x['type'])
103 |         self.relationships = [
104 |             Entity(r, ["type", "label", "id"]) for r in sorted_rels
105 |         ]
106 |         self.types = sorted(data.types)
107 | 
108 | 
109 | class ListResult:
110 |     """A model class for the list of organizations returned from the search"""
111 | 
112 |     def __init__(self, data):
113 |         self.number_of_results = data.hits.total.value
114 |         self.time_taken = data.took
115 |         self.items = [Organization(x) for x in data]
116 |         self.meta = Aggregations(data.aggregations)
117 | 
118 | 
119 | class MatchedOrganization:
120 |     """A model class for an organization matched based on an affiliation
121 |     string"""
122 | 
123 |     def __init__(self, data):
124 |         self.substring = data.substring
125 |         self.score = data.score
126 |         self.matching_type = data.matching_type
127 |         self.chosen = data.chosen
128 |         self.organization = Organization(data.organization)
129 | 
130 | 
131 | class MatchingResult:
132 |     """A model class for the result of affiliation matching"""
133 | 
134 |     def __init__(self, data):
135 |         self.number_of_results = len(data)
136 |         self.items = [MatchedOrganization(x) for x in data]
137 | 
138 | 
139 | class Client(models.Model):
140 |     # Required fields
141 |     email = models.EmailField(max_length=255)
142 | 
143 |     # Optional fields
144 |     name = models.CharField(max_length=255, blank=True, null=True)
145 |     institution_name = models.CharField(max_length=255, blank=True, null=True)
146 |     institution_ror = models.URLField(max_length=255, blank=True, null=True)
147 |     country_code = models.CharField(max_length=2, blank=True, null=True)
148 |     ror_use = models.TextField(max_length=500, blank=True, null=True)
149 | 
150 |     # System fields
151 |     client_id = models.CharField(
152 |         max_length=32,
153 |         unique=True,
154 |         editable=False
155 |     )
156 |     created_at = models.DateTimeField(auto_now_add=True)
157 |     last_request_at = models.DateTimeField(null=True, blank=True)
158 |     request_count = models.IntegerField(default=0)
159 | 
160 |     def __str__(self):
161 |         return f"{self.email} - {self.client_id}"
162 | 
163 |     @staticmethod
164 |     def generate_client_id():
165 |         """Generate a unique 32-character client ID"""
166 |         return ''.join(random.choices(string.ascii_uppercase + string.digits, k=32))
167 | 
168 |     def save(self, *args, **kwargs):
169 |         # Ensure client_id is generated before saving
170 |         if not self.client_id:  # Only generate if it's empty
171 |             self.client_id = self.generate_client_id()
172 |         super().save(*args, **kwargs)
173 | 


--------------------------------------------------------------------------------
/rorapi/v2/record_constants.py:
--------------------------------------------------------------------------------
 1 | V2_ADMIN = {
 2 |     "created": {
 3 |         "date": "",
 4 |         "schema_version": "2.1"
 5 |     },
 6 |     "last_modified": {
 7 |         "date": "",
 8 |         "schema_version": "2.1"
 9 |     }
10 | }
11 | 
12 | V2_LAST_MOD = {
13 |     "date": "",
14 |     "schema_version": "2.1"
15 | }
16 | 
17 | V2_OPTIONAL_FIELD_DEFAULTS = {
18 |     "domains": [],
19 |     "established": None,
20 |     "external_ids": [],
21 |     "links": [],
22 |     "relationships": []
23 | }
24 | 
25 | V2_TEMPLATE = {
26 |     "locations": [],
27 |     "established": None,
28 |     "external_ids": [],
29 |     "id": "",
30 |     "domains": [],
31 |     "links": [],
32 |     "names": [],
33 |     "relationships": [],
34 |     "status": "",
35 |     "types": [],
36 |     "admin": {}
37 | }
38 | 
39 | V2_EXTERNAL_ID_TYPES = {
40 |                         "FUNDREF" : "fundref",
41 |                         "GRID" : "grid",
42 |                         "ISNI" : "isni",
43 |                         "WIKIDATA" : "wikidata"
44 |                     }
45 | 
46 | V2_LINK_TYPES = {
47 |                 "WEBSITE" : "website",
48 |                 "WIKIPEDIA" : "wikipedia"
49 |             }
50 | 
51 | V2_NAME_TYPES = {
52 |                 "ACRONYM" : "acronym",
53 |                 "ALIAS" : "alias",
54 |                 "LABEL" : "label",
55 |                 "ROR_DISPLAY" : "ror_display"
56 |             }
57 | 
58 | V2_SORT_KEYS = {
59 |     "domains": None,
60 |     "external_ids": "type",
61 |     "links": "type",
62 |     "names": "value",
63 |     "relationships": "type",
64 |     "types": None
65 | }
66 | 
67 | V2_CONTINENT_CODES_NAMES = {
68 |     "AF": "Africa",
69 |     "AN": "Antarctica",
70 |     "AS": "Asia",
71 |     "EU": "Europe",
72 |     "NA": "North America",
73 |     "OC": "Oceania",
74 |     "SA": "South America"
75 | }
76 | 
77 | def continent_code_to_name(continent_code):
78 |     if continent_code.upper() in V2_CONTINENT_CODES_NAMES.keys():
79 |         return V2_CONTINENT_CODES_NAMES[continent_code.upper()]
80 |     return None
81 | 


--------------------------------------------------------------------------------
/rorapi/v2/record_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "locations": [],
 3 |     "established": null,
 4 |     "external_ids": [],
 5 |     "id": "",
 6 |     "domains": [],
 7 |     "links": [],
 8 |     "names": [],
 9 |     "relationships": [],
10 |     "status": "",
11 |     "types": [],
12 |     "admin": {}
13 | }


--------------------------------------------------------------------------------
/rorapi/v2/serializers.py:
--------------------------------------------------------------------------------
  1 | from rest_framework import serializers
  2 | import bleach
  3 | import pycountry
  4 | import re
  5 | from rorapi.v2.models import Client
  6 | from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer
  7 | 
  8 | class AggregationsSerializer(serializers.Serializer):
  9 |     types = BucketSerializer(many=True)
 10 |     countries = BucketSerializer(many=True)
 11 |     continents = BucketSerializer(many=True)
 12 |     statuses = BucketSerializer(many=True)
 13 | 
 14 | class AdminDetailsSerializer(serializers.Serializer):
 15 |     date = serializers.DateTimeField()
 16 |     schema_version = serializers.CharField()
 17 | 
 18 | 
 19 | class AdminSerializer(serializers.Serializer):
 20 |     created = AdminDetailsSerializer()
 21 |     last_modified = AdminDetailsSerializer()
 22 | 
 23 | 
 24 | class OrganizationNameSerializer(serializers.Serializer):
 25 |     lang = serializers.CharField()
 26 |     types = serializers.StringRelatedField(many=True)
 27 |     value = serializers.CharField()
 28 | 
 29 | 
 30 | class ExternalIdSerializer(serializers.Serializer):
 31 |     all = serializers.StringRelatedField(many=True)
 32 |     preferred = serializers.CharField()
 33 |     type = serializers.CharField()
 34 | 
 35 | 
 36 | class LinkSerializer(serializers.Serializer):
 37 |     type = serializers.CharField()
 38 |     value = serializers.CharField()
 39 | 
 40 | 
 41 | class GeoNamesDetailsSerializer(serializers.Serializer):
 42 |     continent_code = serializers.CharField()
 43 |     continent_name = serializers.CharField()
 44 |     country_code = serializers.CharField()
 45 |     country_name = serializers.CharField()
 46 |     country_subdivision_code = serializers.CharField()
 47 |     country_subdivision_name = serializers.CharField()
 48 |     lat = serializers.DecimalField(
 49 |         max_digits=None, decimal_places=10, coerce_to_string=False
 50 |     )
 51 |     lng = serializers.DecimalField(
 52 |         max_digits=None, decimal_places=10, coerce_to_string=False
 53 |     )
 54 |     name = serializers.StringRelatedField()
 55 | 
 56 | 
 57 | class OrganizationLocationSerializer(serializers.Serializer):
 58 |     geonames_details = GeoNamesDetailsSerializer()
 59 |     geonames_id = serializers.IntegerField()
 60 | 
 61 | 
 62 | class OrganizationSerializer(serializers.Serializer):
 63 |     admin = AdminSerializer()
 64 |     domains = serializers.StringRelatedField(many=True)
 65 |     established = serializers.IntegerField()
 66 |     external_ids = ExternalIdSerializer(many=True)
 67 |     id = serializers.CharField()
 68 |     links = LinkSerializer(many=True)
 69 |     locations = OrganizationLocationSerializer(many=True)
 70 |     names = OrganizationNameSerializer(many=True)
 71 |     relationships = OrganizationRelationshipsSerializer(many=True)
 72 |     status = serializers.CharField()
 73 |     types = serializers.StringRelatedField(many=True)
 74 | 
 75 | 
 76 | class ListResultSerializer(serializers.Serializer):
 77 |     number_of_results = serializers.IntegerField()
 78 |     time_taken = serializers.IntegerField()
 79 |     items = OrganizationSerializer(many=True)
 80 |     meta = AggregationsSerializer()
 81 | 
 82 | 
 83 | class MatchedOrganizationSerializer(serializers.Serializer):
 84 |     substring = serializers.CharField()
 85 |     score = serializers.FloatField()
 86 |     matching_type = serializers.CharField()
 87 |     chosen = serializers.BooleanField()
 88 |     organization = OrganizationSerializer()
 89 | 
 90 | 
 91 | class MatchingResultSerializer(serializers.Serializer):
 92 |     number_of_results = serializers.IntegerField()
 93 |     items = MatchedOrganizationSerializer(many=True)
 94 | 
 95 | 
 96 | class ClientSerializer(serializers.ModelSerializer):
 97 |     class Meta:
 98 |         model = Client
 99 |         fields = ['email', 'name', 'institution_name', 'institution_ror', 'country_code', 'ror_use']
100 |         extra_kwargs = {
101 |             'name': {'required': False, 'allow_null': True},
102 |             'institution_name': {'required': False, 'allow_null': True},
103 |             'institution_ror': {'required': False, 'allow_null': True},
104 |             'country_code': {'required': False, 'allow_null': True},
105 |             'ror_use': {'required': False, 'allow_null': True},
106 |         }
107 | 
108 |     def validate_email(self, value):
109 |         """Validate the email format and ensure it's unique."""
110 |         if value is None:
111 |             raise serializers.ValidationError("Email cannot be null.")
112 |         return value
113 | 
114 |     def validate_name(self, value):
115 |         """Sanitize name and validate length. Reject empty string."""
116 |         if value is not None:
117 |             if value == "":
118 |                 raise serializers.ValidationError("Name cannot be an empty string.")
119 |             value = bleach.clean(value)  # Sanitize to strip HTML
120 |             if len(value) > 255:
121 |                 raise serializers.ValidationError("Name cannot be longer than 255 characters.")
122 |         return value
123 | 
124 |     def validate_institution_name(self, value):
125 |         """Sanitize institution name and validate length. Reject empty string."""
126 |         if value is not None:
127 |             if value == "":
128 |                 raise serializers.ValidationError("Institution name cannot be an empty string.")
129 |             value = bleach.clean(value)  # Sanitize to strip HTML
130 |             if len(value) > 255:
131 |                 raise serializers.ValidationError("Institution name cannot be longer than 255 characters.")
132 |         return value
133 | 
134 |     def validate_institution_ror(self, value):
135 |         """Validate and format institution ROR to match 'https://ror.org/XXXXX'. Reject empty string."""
136 |         if value is not None:
137 |             if value == "":
138 |                 raise serializers.ValidationError("Institution ROR cannot be an empty string.")
139 |             value = bleach.clean(value)  # Sanitize to strip HTML
140 |             ror_regex = r'https://ror\.org/[A-Za-z0-9]+'
141 |             if not re.match(ror_regex, value):
142 |                 raise serializers.ValidationError("Institution ROR must be in the format 'https://ror.org/XXXXX'.")
143 |         return value
144 | 
145 |     def validate_country_code(self, value):
146 |         """Validate that the country code is a valid ISO 3166-1 alpha-2 country code. Reject empty string."""
147 |         if value is not None:
148 |             if value == "":
149 |                 raise serializers.ValidationError("Country code cannot be an empty string.")
150 |             value = value.strip().upper()  # Normalize to uppercase
151 |             if len(value) != 2 or not pycountry.countries.get(alpha_2=value):
152 |                 raise serializers.ValidationError(f"{value} is not a valid ISO 3166-1 alpha-2 country code.")
153 |         return value
154 | 
155 |     def validate_ror_use(self, value):
156 |         """Sanitize ror_use and validate length. Reject empty string."""
157 |         if value is not None:
158 |             if value == "":
159 |                 raise serializers.ValidationError("ROR use cannot be an empty string.")
160 |             value = bleach.clean(value)  # Sanitize to strip HTML
161 |             if len(value) > 500:
162 |                 raise serializers.ValidationError("ROR use cannot be longer than 500 characters.")
163 |         return value
164 | 


--------------------------------------------------------------------------------
/rorapi/v2/tests.py:
--------------------------------------------------------------------------------
 1 | from django.test import TestCase
 2 | from .models.client import Client
 3 | 
 4 | class ClientTests(TestCase):
 5 |     def test_client_registration(self):
 6 |         client = Client.objects.create(email='test@example.com')
 7 |         self.assertIsNotNone(client.client_id)
 8 | 
 9 |     def test_rate_limiting(self):
10 |         response = self.client.get('/client-id/', HTTP_CLIENT_ID="INVALID_ID")
11 |         self.assertEqual(response.status_code, 429)
12 | 


--------------------------------------------------------------------------------
/rorapi/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for rorapi project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rorapi.settings')
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/vendor/docker/00_app_env.conf:
--------------------------------------------------------------------------------
1 | # File will be overwritten if user runs the container with `-e PASSENGER_APP_ENV=...`!
2 | passenger_app_env development;
3 | 


--------------------------------------------------------------------------------
/vendor/docker/10_ssh.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | if [ "${PUBLIC_KEY}" ]; then
3 |   echo "${PUBLIC_KEY}" > /root/.ssh/authorized_keys
4 | fi
5 | 


--------------------------------------------------------------------------------
/vendor/docker/_ror-api-dev.auto.tfvars.tmpl:
--------------------------------------------------------------------------------
1 | ror-api-dev_tags = {
2 |   sha = "{{ .Env.GIT_SHA }}"
3 |   version = "{{ .Env.GIT_TAG }}"
4 | }
5 | 


--------------------------------------------------------------------------------
/vendor/docker/_ror-api.auto.tfvars.tmpl:
--------------------------------------------------------------------------------
1 | ror-api_tags = {
2 |   sha = "{{ .Env.GIT_SHA }}"
3 |   version = "{{ .Env.GIT_TAG }}"
4 | }
5 | 


--------------------------------------------------------------------------------
/vendor/docker/ntp.conf:
--------------------------------------------------------------------------------
1 | server 0.amazon.pool.ntp.org iburst
2 | server 1.amazon.pool.ntp.org iburst
3 | server 2.amazon.pool.ntp.org iburst
4 | server 3.amazon.pool.ntp.org iburst
5 | 


--------------------------------------------------------------------------------
/vendor/docker/webapp.conf:
--------------------------------------------------------------------------------
 1 | server {
 2 |     listen 80 default_server;
 3 |     server_name _;
 4 | 
 5 |     root /home/app/webapp/;
 6 |     passenger_enabled on;
 7 |     passenger_python /usr/bin/python;
 8 |     passenger_app_root /home/app/webapp/;
 9 |     passenger_user app;
10 |     passenger_app_type wsgi;
11 |     passenger_startup_file rorapi/wsgi.py;
12 | 
13 |     merge_slashes off;
14 |     resolver 8.8.8.8;
15 | 
16 |     location /static {
17 |         alias /home/app/webapp/static;
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------