├── .dockerignore ├── .env.travis ├── .gitattributes ├── .github └── workflows │ ├── dev.yml │ ├── release.yml │ └── staging.yml ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.yml ├── manage.py ├── requirements.txt ├── rorapi ├── __init__.py ├── common │ ├── __init__.py │ ├── countries.txt │ ├── create_update.py │ ├── csv_bulk.py │ ├── csv_create.py │ ├── csv_update.py │ ├── csv_utils.py │ ├── es_utils.py │ ├── features.py │ ├── matching.py │ ├── models.py │ ├── queries.py │ ├── record_utils.py │ ├── serializers.py │ ├── urls.py │ └── views.py ├── management │ └── commands │ │ ├── __init__.py │ │ ├── createindex.py │ │ ├── deleteindex.py │ │ ├── generaterorid.py │ │ ├── getrordump.py │ │ ├── indexror.py │ │ ├── indexrordump.py │ │ ├── legacyconvertgrid.py │ │ ├── legacydownloadgrid.py │ │ ├── legacyindexgrid.py │ │ ├── legacyseeschema.py │ │ ├── legacyupgrade.py │ │ └── setup.py ├── migrations │ ├── 0001_create_client_model.py │ ├── 0002_auto_20250326_1054.py │ ├── 0003_auto_20250415_1207.py │ └── __init__.py ├── settings.py ├── tests │ ├── __init__.py │ ├── tests_functional │ │ ├── __init__.py │ │ ├── data │ │ │ ├── dataset_affiliations.json │ │ │ └── dataset_names.json │ │ ├── evaluation.py │ │ ├── tests_matching_v1.py │ │ ├── tests_matching_v2.py │ │ ├── tests_search_v1.py │ │ └── tests_search_v2.py │ ├── tests_integration │ │ ├── __init__.py │ │ ├── tests.py │ │ ├── tests_matching_v1.py │ │ ├── tests_matching_v2.py │ │ ├── tests_search_v1.py │ │ ├── tests_search_v2.py │ │ ├── tests_v1.py │ │ └── tests_v2.py │ └── tests_unit │ │ ├── __init__.py │ │ ├── data │ │ ├── test_data_address.json │ │ ├── test_data_address_empty.json │ │ ├── test_data_create_valid.json │ │ ├── test_data_empty_es7.json │ │ ├── test_data_new_record_invalid_v2.json │ │ ├── test_data_new_record_valid_v2.json │ │ ├── test_data_retrieve_es7.json │ │ ├── test_data_retrieve_es7_v2.json │ │ ├── test_data_search_es7.json │ │ ├── test_data_search_es7_v2.json │ │ ├── test_update_valid.json │ │ └── test_upload_csv.csv │ │ ├── tests_client.py │ │ ├── tests_es_utils_v1.py │ │ ├── tests_es_utils_v2.py │ │ ├── tests_matching_v1.py │ │ ├── tests_matching_v2.py │ │ ├── tests_models_common.py │ │ ├── tests_models_v1.py │ │ ├── tests_models_v2.py │ │ ├── tests_queries_v1.py │ │ ├── tests_queries_v2.py │ │ ├── tests_views_v1.py │ │ ├── tests_views_v2.py │ │ └── utils.py ├── v1 │ ├── __init__.py │ ├── index_template_es7.json │ ├── models.py │ └── serializers.py ├── v2 │ ├── __init__.py │ ├── index_template_es7.json │ ├── models.py │ ├── record_constants.py │ ├── record_template.json │ ├── serializers.py │ └── tests.py └── wsgi.py └── vendor └── docker ├── 00_app_env.conf ├── 10_ssh.sh ├── _ror-api-dev.auto.tfvars.tmpl ├── _ror-api.auto.tfvars.tmpl ├── ntp.conf └── webapp.conf /.dockerignore: -------------------------------------------------------------------------------- 1 | .env 2 | .env.* 3 | -------------------------------------------------------------------------------- /.env.travis: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/.env.travis -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Override core.autocrlf and set line endings to *nix 2 | * text eol=lf -------------------------------------------------------------------------------- /.github/workflows/dev.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Dev 2 | on: 3 | push: 4 | branches: 5 | - "dev" 6 | paths-ignore: 7 | - '**docker-compose.yml' 8 | - ./github/* 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | env: 13 | ELASTIC7_HOST: "localhost" 14 | ELASTIC7_PORT: "9200" 15 | ELASTIC_PASSWORD: "changeme" 16 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 17 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 18 | AWS_REGION: ${{ secrets.AWS_REGION }} 19 | GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 20 | LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_DEV }} 21 | DB_HOST: 127.0.0.1 # Will not work with 'localhost', since that will try a Unix socket connection (!) 22 | services: 23 | elasticsearch7: 24 | image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0 25 | env: 26 | discovery.type: single-node 27 | ES_JAVA_OPTS: -Xms512m -Xmx512m 28 | ELASTIC_PASSWORD: changeme 29 | xpack.security.enabled: "false" 30 | http.cors.enabled: "true" 31 | http.cors.allow-origin: "*" 32 | ports: 33 | - 9200:9200 34 | db: 35 | image: mysql:8.0 36 | env: 37 | MYSQL_DATABASE: "rorapi" 38 | MYSQL_USER: "ror_user" 39 | MYSQL_PASSWORD: "password" 40 | MYSQL_ROOT_PASSWORD: "password" 41 | ports: 42 | - 3306:3306 43 | options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 44 | steps: 45 | - name: Checkout ror-api code 46 | uses: actions/checkout@v2 47 | with: 48 | path: ror-api 49 | - name: Checkout ror-data-test 50 | uses: actions/checkout@v2 51 | with: 52 | repository: ror-community/ror-data-test 53 | token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 54 | path: ror-data-test 55 | - name: Get last data dump name 56 | working-directory: ./ror-data-test 57 | run: | 58 | FILE="$(ls -Art *.zip | tail -n 1)" 59 | echo ${FILE%.*} 60 | echo "LATEST_DUMP_FILE=${FILE%.*}" >> $GITHUB_ENV 61 | - name: Cache dependency 62 | uses: actions/cache@v4 63 | with: 64 | path: ~/.cache/pip 65 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 66 | restore-keys: | 67 | ${{ runner.os }}-pip- 68 | - name: Set up Python environment 69 | uses: actions/setup-python@v2 70 | with: 71 | python-version: "3.8" 72 | - name: Install requirements 73 | working-directory: ./ror-api 74 | run: | 75 | python -m pip install --upgrade pip 76 | pip install -r requirements.txt 77 | pip install yapf 78 | 79 | python manage.py collectstatic --noinput 80 | - name: Setup 81 | working-directory: ./ror-api 82 | run: | 83 | python manage.py setup v1.55-2024-10-31-ror-data -t 84 | # Dump file temp hard coded for v2.1 release 85 | # Pulled from ror-data-test per settings.py config 86 | - name: Test 87 | working-directory: ./ror-api 88 | run: | 89 | python manage.py test rorapi.tests.tests_unit 90 | # TODO fix these tests running in GitHub Action 91 | # python manage.py test rorapi.tests_integration 92 | # python manage.py test rorapi.tests_functional 93 | 94 | build: 95 | needs: test 96 | runs-on: ubuntu-latest 97 | steps: 98 | - name: Checkout 99 | uses: actions/checkout@v2 100 | - name: Set up Docker Buildx 101 | uses: docker/setup-buildx-action@v1 102 | - name: Cache Docker layers 103 | uses: actions/cache@v4 104 | with: 105 | path: /tmp/.buildx-cache 106 | key: ${{ runner.os }}-buildx-${{ github.sha }} 107 | restore-keys: | 108 | ${{ runner.os }}-buildx- 109 | - name: Login to DockerHub 110 | uses: docker/login-action@v1 111 | with: 112 | username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }} 113 | password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }} 114 | - name: Build and push 115 | uses: docker/build-push-action@v2 116 | with: 117 | context: . 118 | file: ./Dockerfile 119 | push: true 120 | tags: rorcommunity/ror-api:dev 121 | cache-from: type=local,src=/tmp/.buildx-cache 122 | cache-to: type=local,dest=/tmp/.buildx-cache 123 | 124 | deploy: 125 | needs: [test, build] 126 | runs-on: ubuntu-latest 127 | env: 128 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} 129 | steps: 130 | - name: Checkout 131 | uses: actions/checkout@v2 132 | with: 133 | ref: ${{ github.event.pull_request.head.sha }} 134 | - name: Extract variables 135 | shell: bash 136 | run: | 137 | echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" 138 | echo "::set-output name=TAG::$(git tag --points-at HEAD)" 139 | echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" 140 | echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" 141 | echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})" 142 | id: extract_variables 143 | 144 | - name: Checkout terraform config repo 145 | uses: actions/checkout@v2 146 | with: 147 | # public repo with terraform configuration 148 | repository: "ror-community/new-deployment" 149 | persist-credentials: false 150 | - name: Commit changes to terraform config repository 151 | # use go template in terraform config repository to update git sha and tag 152 | # commit and push changes to trigger terraform workflow 153 | run: | 154 | export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }} 155 | export GIT_TAG=${{ steps.extract_variables.outputs.GIT_TAG }} 156 | wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz 157 | tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz 158 | rm dockerize-linux-amd64-v0.6.0.tar.gz 159 | ./dockerize -template ror/services/api/_ror-api-dev.auto.tfvars.tmpl:ror/services/api/_ror-api-dev.auto.tfvars 160 | 161 | git config --local user.email "action@github.com" 162 | git config --local user.name "GitHub Action" 163 | git add ror/services/api/_ror-api-dev.auto.tfvars 164 | git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}" 165 | - name: Push changes 166 | uses: ad-m/github-push-action@v0.6.0 167 | with: 168 | github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 169 | repository: "ror-community/new-deployment" 170 | branch: "refs/heads/master" 171 | tags: false 172 | 173 | - name: Notify Slack 174 | uses: edge/simple-slack-notify@master 175 | with: 176 | channel: "#deployment-updates" 177 | color: "good" 178 | text: "A new version of the is deployed to dev." 179 | failure_text: "${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed" 180 | fields: | 181 | [{ "title": "Commit message", "value": "${{ steps.extract_variables.outputs.MESSAGE }}" }, 182 | { "title": "Committed by", "value": "", "short": true }, 183 | { "title": "Commit SHA", "value": "", "short": true }, 184 | { "title": "Repository", "value": "", "short": true }, 185 | { "title": "Branch", "value": "", "short": true }] -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | release: 4 | types: [published] 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | env: 9 | ELASTIC_PASSWORD: "changeme" 10 | ELASTIC7_HOST: "localhost" 11 | ELASTIC7_PORT: "9200" 12 | DB_HOST: 127.0.0.1 13 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 14 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 15 | AWS_REGION: ${{ secrets.AWS_REGION }} 16 | GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 17 | LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_PROD}} 18 | services: 19 | elasticsearch7: 20 | image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0 21 | env: 22 | discovery.type: single-node 23 | ES_JAVA_OPTS: -Xms512m -Xmx512m 24 | ELASTIC_PASSWORD: changeme 25 | xpack.security.enabled: "false" 26 | http.cors.enabled: "true" 27 | http.cors.allow-origin: "*" 28 | ports: 29 | - 9200:9200 30 | db: 31 | image: mysql:8.0 32 | env: 33 | MYSQL_DATABASE: "rorapi" 34 | MYSQL_USER: "ror_user" 35 | MYSQL_PASSWORD: "password" 36 | MYSQL_ROOT_PASSWORD: "password" 37 | ports: 38 | - 3306:3306 39 | options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 40 | steps: 41 | - name: Checkout ror-api code 42 | uses: actions/checkout@v2 43 | with: 44 | path: ror-api 45 | - name: Cache dependency 46 | uses: actions/cache@v4 47 | with: 48 | path: ~/.cache/pip 49 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 50 | restore-keys: | 51 | ${{ runner.os }}-pip- 52 | - name: Set up Python environment 53 | uses: actions/setup-python@v2 54 | with: 55 | python-version: "3.8" 56 | - name: Install requirements 57 | working-directory: ./ror-api 58 | run: | 59 | python -m pip install --upgrade pip 60 | pip install -r requirements.txt 61 | pip install yapf 62 | 63 | python manage.py collectstatic --noinput 64 | - name: Setup 65 | working-directory: ./ror-api 66 | run: | 67 | python manage.py setup v1.55-2024-10-31-ror-data -t 68 | # temp hard-coded dump file for v2.1 release 69 | - name: Test 70 | working-directory: ./ror-api 71 | run: | 72 | python manage.py test rorapi.tests.tests_unit 73 | # TODO fix these tests running in GitHub Action 74 | # python manage.py test rorapi.tests_integration 75 | # python manage.py test rorapi.tests_functional 76 | 77 | build: 78 | needs: test 79 | runs-on: ubuntu-latest 80 | steps: 81 | - name: Checkout 82 | uses: actions/checkout@v2 83 | - name: Set up Docker Buildx 84 | uses: docker/setup-buildx-action@v1 85 | - name: Cache Docker layers 86 | uses: actions/cache@v4 87 | with: 88 | path: /tmp/.buildx-cache 89 | key: ${{ runner.os }}-buildx-${{ github.sha }} 90 | restore-keys: | 91 | ${{ runner.os }}-buildx- 92 | - name: Login to DockerHub 93 | uses: docker/login-action@v1 94 | with: 95 | username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }} 96 | password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }} 97 | - name: Get git tag 98 | run: | 99 | echo "::set-output name=GIT_TAG::$(git tag --points-at HEAD)" 100 | id: set_git_vars 101 | - name: Build and push 102 | uses: docker/build-push-action@v2 103 | with: 104 | context: . 105 | file: ./Dockerfile 106 | push: true 107 | tags: rorcommunity/ror-api:${{ steps.set_git_vars.outputs.GIT_TAG }} 108 | cache-from: type=local,src=/tmp/.buildx-cache 109 | cache-to: type=local,dest=/tmp/.buildx-cache 110 | 111 | deploy: 112 | needs: [test, build] 113 | runs-on: ubuntu-latest 114 | env: 115 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} 116 | steps: 117 | - name: Checkout 118 | uses: actions/checkout@v2 119 | 120 | - name: Extract variables 121 | shell: bash 122 | run: | 123 | echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" 124 | echo "::set-output name=TAG::$(git tag --points-at HEAD)" 125 | echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" 126 | echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" 127 | id: extract_variables 128 | 129 | - name: Checkout terraform config repo 130 | uses: actions/checkout@v2 131 | with: 132 | # public repo with terraform configuration 133 | repository: 'ror-community/new-deployment' 134 | persist-credentials: false 135 | - name: Commit changes to terraform config repository 136 | # use go template in terraform config repository to update git sha and tag 137 | # commit and push changes to trigger terraform workflow 138 | run: | 139 | export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }} 140 | export GIT_TAG=${{ steps.extract_variables.outputs.TAG }} 141 | wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz 142 | tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz 143 | rm dockerize-linux-amd64-v0.6.0.tar.gz 144 | ./dockerize -template ror/services/api/_ror-api.auto.tfvars.tmpl:ror/services/api/_ror-api.auto.tfvars 145 | 146 | git config --local user.email "action@github.com" 147 | git config --local user.name "GitHub Action" 148 | git add ror/services/api/_ror-api.auto.tfvars 149 | git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}" 150 | - name: Push changes 151 | uses: ad-m/github-push-action@v0.6.0 152 | with: 153 | github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 154 | repository: 'ror-community/new-deployment' 155 | branch: 'refs/heads/master' 156 | tags: false 157 | 158 | - name: Notify Slack 159 | uses: edge/simple-slack-notify@master 160 | with: 161 | channel: '#deployment-updates' 162 | color: 'good' 163 | text: 'A new version of the is deployed to production.' 164 | failure_text: '${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed' 165 | fields: | 166 | [{ "title": "Committed by", "value": "", "short": true }, 167 | { "title": "Commit SHA", "value": "", "short": true }, 168 | { "title": "Repository", "value": "", "short": true }, 169 | { "title": "Release", "value": "", "short": true }] 170 | -------------------------------------------------------------------------------- /.github/workflows/staging.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Staging 2 | on: 3 | push: 4 | branches: 5 | - "staging" 6 | jobs: 7 | test: 8 | runs-on: ubuntu-latest 9 | env: 10 | ELASTIC_PASSWORD: "changeme" 11 | ELASTIC7_HOST: "localhost" 12 | ELASTIC7_PORT: "9200" 13 | DB_HOST: 127.0.0.1 14 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 15 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 16 | AWS_REGION: ${{ secrets.AWS_REGION }} 17 | GITHUB_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 18 | LAUNCH_DARKLY_KEY: ${{ secrets.LAUNCH_DARKLY_KEY_STAGING }} 19 | services: 20 | elasticsearch7: 21 | image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0 22 | env: 23 | discovery.type: single-node 24 | ES_JAVA_OPTS: -Xms512m -Xmx512m 25 | ELASTIC_PASSWORD: changeme 26 | xpack.security.enabled: "false" 27 | http.cors.enabled: "true" 28 | http.cors.allow-origin: "*" 29 | ports: 30 | - 9200:9200 31 | db: 32 | image: mysql:8.0 33 | env: 34 | MYSQL_DATABASE: "rorapi" 35 | MYSQL_USER: "ror_user" 36 | MYSQL_PASSWORD: "password" 37 | MYSQL_ROOT_PASSWORD: "password" 38 | ports: 39 | - 3306:3306 40 | options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 41 | steps: 42 | - name: Checkout ror-api code 43 | uses: actions/checkout@v2 44 | with: 45 | path: ror-api 46 | - name: Cache dependency 47 | uses: actions/cache@v4 48 | with: 49 | path: ~/.cache/pip 50 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 51 | restore-keys: | 52 | ${{ runner.os }}-pip- 53 | - name: Set up Python environment 54 | uses: actions/setup-python@v2 55 | with: 56 | python-version: "3.8" 57 | - name: Install requirements 58 | working-directory: ./ror-api 59 | run: | 60 | python -m pip install --upgrade pip 61 | pip install -r requirements.txt 62 | pip install yapf 63 | 64 | python manage.py collectstatic --noinput 65 | - name: Setup 66 | working-directory: ./ror-api 67 | run: | 68 | python manage.py setup v1.55-2024-10-31-ror-data -t 69 | # temp hard-coded dump file for v2.1 release 70 | - name: Test 71 | working-directory: ./ror-api 72 | run: | 73 | python manage.py test rorapi.tests.tests_unit 74 | # TODO fix these tests running in GitHub Action 75 | # python manage.py test rorapi.tests_integration 76 | # python manage.py test rorapi.tests_functional 77 | 78 | build: 79 | needs: test 80 | runs-on: ubuntu-latest 81 | steps: 82 | - name: Checkout 83 | uses: actions/checkout@v2 84 | - name: Set up Docker Buildx 85 | uses: docker/setup-buildx-action@v1 86 | - name: Cache Docker layers 87 | uses: actions/cache@v4 88 | with: 89 | path: /tmp/.buildx-cache 90 | key: ${{ runner.os }}-buildx-${{ github.sha }} 91 | restore-keys: | 92 | ${{ runner.os }}-buildx- 93 | - name: Login to DockerHub 94 | uses: docker/login-action@v1 95 | with: 96 | username: ${{ secrets.DOCKERHUB_RORAPI_USERNAME }} 97 | password: ${{ secrets.DOCKERHUB_RORAPI_TOKEN }} 98 | - name: Build and push 99 | uses: docker/build-push-action@v2 100 | with: 101 | context: . 102 | file: ./Dockerfile 103 | push: true 104 | tags: rorcommunity/ror-api:staging 105 | cache-from: type=local,src=/tmp/.buildx-cache 106 | cache-to: type=local,dest=/tmp/.buildx-cache 107 | 108 | deploy: 109 | needs: [test, build] 110 | runs-on: ubuntu-latest 111 | env: 112 | SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} 113 | steps: 114 | - name: Checkout 115 | uses: actions/checkout@v2 116 | with: 117 | ref: ${{ github.event.pull_request.head.sha }} 118 | - name: Extract variables 119 | shell: bash 120 | run: | 121 | echo "::set-output name=BRANCH::$(echo ${GITHUB_REF#refs/heads/} | sed 's/\//_/g')" 122 | echo "::set-output name=TAG::$(git tag --points-at HEAD)" 123 | echo "::set-output name=GIT_SHA::$(git rev-parse HEAD)" 124 | echo "::set-output name=GIT_SHA_SHORT::$(git rev-parse --short HEAD)" 125 | echo "::set-output name=MESSAGE::$(git log --format=%B -n 1 ${{ github.event.after }})" 126 | id: extract_variables 127 | 128 | - name: Checkout terraform config repo 129 | uses: actions/checkout@v2 130 | with: 131 | # public repo with terraform configuration 132 | repository: 'ror-community/new-deployment' 133 | persist-credentials: false 134 | - name: Commit changes to terraform config repository 135 | # use go template in terraform config repository to update git sha and tag 136 | # commit and push changes to trigger terraform workflow 137 | run: | 138 | export GIT_SHA=${{ steps.extract_variables.outputs.GIT_SHA_SHORT }} 139 | export GIT_TAG=${{ steps.extract_variables.outputs.GIT_TAG }} 140 | wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz 141 | tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz 142 | rm dockerize-linux-amd64-v0.6.0.tar.gz 143 | ./dockerize -template ror/services/api/_ror-api-staging.auto.tfvars.tmpl:ror/services/api/_ror-api-staging.auto.tfvars 144 | 145 | git config --local user.email "action@github.com" 146 | git config --local user.name "GitHub Action" 147 | git add ror/services/api/_ror-api-staging.auto.tfvars 148 | git commit -m "Adding ror-api git variables for commit ${{ steps.extract_variables.outputs.GIT_SHA }}" 149 | - name: Push changes 150 | uses: ad-m/github-push-action@v0.6.0 151 | with: 152 | github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} 153 | repository: 'ror-community/new-deployment' 154 | branch: 'refs/heads/master' 155 | tags: false 156 | 157 | - name: Notify Slack 158 | uses: edge/simple-slack-notify@master 159 | with: 160 | channel: '#deployment-updates' 161 | color: 'good' 162 | text: 'A new version of the is deployed to staging.' 163 | failure_text: '${env.GITHUB_WORKFLOW} (${env.GITHUB_RUN_NUMBER}) build failed' 164 | fields: | 165 | [{ "title": "Commit message", "value": "${{ steps.extract_variables.outputs.MESSAGE }}" }, 166 | { "title": "Committed by", "value": "", "short": true }, 167 | { "title": "Commit SHA", "value": "", "short": true }, 168 | { "title": "Repository", "value": "", "short": true }, 169 | { "title": "Branch", "value": "", "short": true }] 170 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .env.* 3 | !.env.example 4 | !.env.travis 5 | docker-compose.override.yml 6 | __pycache__/ 7 | *.pyc 8 | rorapi/data/** 9 | esdata/** 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | 5 | sudo: required 6 | 7 | services: 8 | - docker 9 | 10 | env: 11 | - DOCKER_COMPOSE_VERSION=1.23.2 12 | 13 | before_install: 14 | - pip3 install yapf 15 | - yapf -d -r . 16 | 17 | install: 18 | - sudo rm /usr/local/bin/docker-compose 19 | - curl -L https://github.com/docker/compose/releases/download/${DOCKER_COMPOSE_VERSION}/docker-compose-`uname -s`-`uname -m` > docker-compose 20 | - chmod +x docker-compose 21 | - sudo mv docker-compose /usr/local/bin 22 | - sudo pip install awscli 23 | 24 | before_script: 25 | - cp .env.travis .env 26 | - docker-compose up -d 27 | - sleep 120 28 | - docker-compose exec web python manage.py createindex 29 | - docker-compose exec web python manage.py upgrade 30 | - docker-compose exec web python manage.py setup 31 | 32 | script: 33 | - docker-compose exec web python manage.py test rorapi.tests 34 | - docker-compose exec web python manage.py test rorapi.tests_integration 35 | - docker-compose exec web python manage.py test rorapi.tests_functional 36 | 37 | after_success: 38 | - export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY 39 | - export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_KEY 40 | - docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD"; 41 | - REPO=rorcommunity/ror-api; 42 | - AUTO_DEPLOY=false; 43 | - if [ "${TRAVIS_TAG?}" ]; then 44 | docker build -f Dockerfile -t $REPO:$TRAVIS_TAG .; 45 | docker push $REPO:$TRAVIS_TAG; 46 | echo "Pushed to" $REPO:$TRAVIS_TAG; 47 | AUTO_DEPLOY=true; 48 | elif [[ "$TRAVIS_BRANCH" == "dev" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then 49 | docker build -f Dockerfile -t $REPO:$TRAVIS_BRANCH .; 50 | docker push $REPO:$TRAVIS_BRANCH; 51 | echo "Pushed to" $REPO:$TRAVIS_BRANCH; 52 | AUTO_DEPLOY=true; 53 | else 54 | docker build -f Dockerfile -t $REPO:$TRAVIS_BRANCH .; 55 | docker push $REPO:$TRAVIS_BRANCH; 56 | echo "Pushed to" $REPO:$TRAVIS_BRANCH; 57 | fi 58 | 59 | - if [ "$AUTO_DEPLOY" == "true" ]; then 60 | wget https://github.com/jwilder/dockerize/releases/download/v0.6.0/dockerize-linux-amd64-v0.6.0.tar.gz; 61 | tar -xzvf dockerize-linux-amd64-v0.6.0.tar.gz; 62 | rm dockerize-linux-amd64-v0.6.0.tar.gz; 63 | export GIT_SHA=$(git rev-parse --short HEAD); 64 | export GIT_TAG=$(git describe --tags $(git rev-list --tags --max-count=1)); 65 | git clone "https://${TRAVIS_SECURE_TOKEN}@github.com/ror-community/new-deployment.git"; 66 | 67 | sentry-cli releases new ror-api:${GIT_TAG} --finalize --project ror-api; 68 | 69 | if [ "${TRAVIS_TAG?}" ]; then 70 | ./dockerize -template vendor/docker/_ror-api.auto.tfvars.tmpl:new-deployment/ror/services/api/_ror-api.auto.tfvars; 71 | sentry-cli releases deploys ror-ap:${GIT_TAG} new -e production; 72 | else 73 | ./dockerize -template vendor/docker/_ror-api-dev.auto.tfvars.tmpl:new-deployment/ror/services/api/_ror-api-dev.auto.tfvars; 74 | sentry-cli releases deploys ror-api:${GIT_TAG} new -e dev; 75 | fi 76 | 77 | sentry-cli releases set-commits --auto ror-api:${GIT_TAG}; 78 | 79 | cd new-deployment; 80 | git remote; 81 | git config user.email ${DOCKER_EMAIL}; 82 | git config user.name ${DOCKER_USERNAME}; 83 | 84 | if [ "${TRAVIS_TAG?}" ]; then 85 | git add ror/services/api/_ror-api.auto.tfvars; 86 | git commit -m "Adding ror-api git variables for commit tagged ${TRAVIS_TAG?}"; 87 | else 88 | git add ror/services/api/_ror-api-dev.auto.tfvars; 89 | git commit -m "Adding ror-api git variables for latest commit on branch $TRAVIS_BRANCH"; 90 | fi 91 | 92 | git push "https://${TRAVIS_SECURE_TOKEN}@github.com/ror-community/new-deployment.git" master; 93 | fi 94 | 95 | notifications: 96 | email: false 97 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM phusion/passenger-full:1.0.12 2 | MAINTAINER Martin Fenner "mfenner@datacite.org" 3 | 4 | # Set correct environment variables 5 | ENV HOME /home/app 6 | 7 | # Allow app user to read /etc/container_environment 8 | RUN usermod -a -G docker_env app 9 | 10 | # Use baseimage-docker's init process 11 | CMD ["/sbin/my_init"] 12 | 13 | # Update installed APT packages, clean up when done 14 | RUN mv /etc/apt/sources.list.d /etc/apt/sources.list.d.bak && \ 15 | apt update && apt install -y ca-certificates && \ 16 | mv /etc/apt/sources.list.d.bak /etc/apt/sources.list.d && \ 17 | apt-get upgrade -y -o Dpkg::Options::="--force-confold" && \ 18 | apt-get clean && \ 19 | apt-get install ntp wget unzip tzdata python3-pip libmagic1 default-libmysqlclient-dev libcairo2-dev pkg-config -y && \ 20 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 21 | 22 | # Enable Passenger and Nginx and remove the default site 23 | # Preserve env variables for nginx 24 | RUN rm -f /etc/service/nginx/down && \ 25 | rm /etc/nginx/sites-enabled/default 26 | COPY vendor/docker/webapp.conf /etc/nginx/sites-enabled/webapp.conf 27 | COPY vendor/docker/00_app_env.conf /etc/nginx/conf.d/00_app_env.conf 28 | 29 | # Use Amazon NTP servers 30 | COPY vendor/docker/ntp.conf /etc/ntp.conf 31 | 32 | # Copy webapp folder 33 | COPY . /home/app/webapp/ 34 | RUN chown -R app:app /home/app/webapp && \ 35 | chmod -R 755 /home/app/webapp 36 | 37 | # enable SSH 38 | RUN rm -f /etc/service/sshd/down && \ 39 | /etc/my_init.d/00_regen_ssh_host_keys.sh 40 | 41 | # install custom ssh key during startup 42 | RUN mkdir -p /etc/my_init.d 43 | COPY vendor/docker/10_ssh.sh /etc/my_init.d/10_ssh.sh 44 | 45 | # workdir 46 | WORKDIR /home/app/webapp 47 | 48 | # point /usr/bin/python to Python3 49 | RUN ln -s -f /usr/bin/python3 /usr/bin/python 50 | 51 | # install Python packages 52 | RUN pip3 install --no-cache-dir --upgrade pip 53 | RUN pip3 install --no-cache-dir -r requirements.txt 54 | RUN pip3 install yapf 55 | 56 | # collect static files for Django 57 | ENV DJANGO_SKIP_DB_CHECK=True 58 | RUN python manage.py collectstatic --noinput 59 | 60 | # Expose web 61 | EXPOSE 80 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2017-2019 Crossref 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | elasticsearch7: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:7.10.1 6 | ports: 7 | - "9200:9200" 8 | - "9300:9300" 9 | environment: 10 | discovery.type: single-node 11 | ES_JAVA_OPTS: -Xmx256m -Xms256m 12 | ELASTIC_PASSWORD: changeme 13 | xpack.security.enabled: "false" 14 | http.cors.enabled: "true" 15 | http.cors.allow-origin: "*" 16 | healthcheck: 17 | test: curl -f http://elastic:changeme@elasticsearch:8200 18 | interval: 10s 19 | timeout: 1s 20 | volumes: 21 | - ./esdata:/usr/share/elasticsearch/data 22 | db: 23 | image: mysql:8.0 24 | volumes: 25 | - mysql_data:/var/lib/mysql 26 | env_file: 27 | - .env 28 | ports: 29 | - "3306:3306" 30 | healthcheck: 31 | test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] 32 | timeout: 20s 33 | retries: 10 34 | web: 35 | container_name: rorapiweb 36 | env_file: .env 37 | build: . 38 | #image: rorcommunity/ror-api 39 | ports: 40 | - "9292:80" 41 | - "2222:22" 42 | volumes: 43 | - ./rorapi:/home/app/webapp/rorapi 44 | depends_on: 45 | - elasticsearch7 46 | - db 47 | volumes: 48 | mysql_data: 49 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rorapi.settings') 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?") from exc 16 | execute_from_command_line(sys.argv) 17 | 18 | 19 | if __name__ == '__main__': 20 | main() 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django==2.2.28 2 | elasticsearch_dsl==7.4.1 3 | geonamescache==1.3.0 4 | requests==2.22.0 5 | requests-aws4auth==0.9 6 | mock==3.0.5 7 | base32_crockford==0.3.0 8 | elasticsearch==7.10.1 9 | djangorestframework==3.11.2 10 | coreapi==2.3.3 11 | django-prometheus==1.0.15 12 | sentry-sdk==0.12.2 13 | python-dotenv==0.10.3 14 | django-cors-headers==3.1.0 15 | unidecode==1.1.1 16 | fuzzywuzzy==0.17.0 17 | python-Levenshtein==0.12.1 18 | statsmodels==0.10.2 19 | boto3 20 | pandas==1.4.1 21 | numpy==1.22 22 | titlecase==2.3 23 | update_address @ git+https://github.com/ror-community/update_address.git 24 | launchdarkly-server-sdk==7.6.1 25 | jsonschema==3.2.0 26 | python-magic 27 | iso639-lang 28 | mysqlclient==2.2.7 29 | bleach==6.0.0 30 | pycountry==22.3.5 31 | django-ses==3.5.0 -------------------------------------------------------------------------------- /rorapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/__init__.py -------------------------------------------------------------------------------- /rorapi/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/common/__init__.py -------------------------------------------------------------------------------- /rorapi/common/create_update.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from datetime import datetime 3 | from rorapi.common.record_utils import * 4 | import update_address as ua 5 | from rorapi.v2.record_constants import * 6 | from rorapi.v2.serializers import ( 7 | OrganizationSerializer as OrganizationSerializerV2 8 | ) 9 | from rorapi.management.commands.generaterorid import check_ror_id 10 | 11 | V2_SCHEMA = get_file_from_url("https://raw.githubusercontent.com/ror-community/ror-schema/refs/heads/master/ror_schema_v2_1.json") 12 | 13 | 14 | def update_record(json_input, existing_record): 15 | record = copy.deepcopy(existing_record) 16 | for k, v in json_input.items(): 17 | record[k] = copy.deepcopy(v) 18 | return update_last_mod(record) 19 | 20 | def update_last_mod(record): 21 | record['admin']['last_modified'] = copy.deepcopy(V2_LAST_MOD) 22 | record['admin']['last_modified']['date'] = datetime.now().strftime("%Y-%m-%d") 23 | return record 24 | 25 | def check_optional_fields(record): 26 | for k in V2_OPTIONAL_FIELD_DEFAULTS: 27 | if k not in record: 28 | return True 29 | return False 30 | 31 | def add_missing_optional_fields(record): 32 | for k, v in V2_OPTIONAL_FIELD_DEFAULTS.items(): 33 | if k not in record: 34 | record[k] = v 35 | return record 36 | 37 | def add_created_last_mod(record): 38 | today = datetime.now().strftime("%Y-%m-%d") 39 | record['admin'] = copy.deepcopy(V2_ADMIN) 40 | record['admin']['created']['date'] = today 41 | record['admin']['last_modified']['date'] = today 42 | return record 43 | 44 | def update_locations(locations): 45 | error = None 46 | updated_locations = [] 47 | for location in locations: 48 | if 'geonames_id' in location: 49 | try: 50 | print(location['geonames_id']) 51 | updated_location = ua.new_geonames_v2(str(location['geonames_id'])) 52 | updated_locations.append(updated_location['location']) 53 | except: 54 | error = "Error retrieving Geonames data for ID {}. Please check that this is a valid Geonames ID".format(location['geonames_id']) 55 | return error, updated_locations 56 | 57 | def sort_list_fields(v2_record): 58 | for field in v2_record: 59 | if field in V2_SORT_KEYS: 60 | if V2_SORT_KEYS[field] is not None: 61 | sort_key = V2_SORT_KEYS[field] 62 | sorted_vals = sorted(v2_record[field], key=lambda x: x[sort_key]) 63 | else: 64 | sorted_vals = sorted(v2_record[field]) 65 | v2_record[field] = sorted_vals 66 | return v2_record 67 | 68 | 69 | def new_record_from_json(json_input, version): 70 | error = None 71 | valid_data = None 72 | new_record = copy.deepcopy(json_input) 73 | if check_optional_fields(new_record): 74 | new_record = add_missing_optional_fields(new_record) 75 | error, updated_locations = update_locations(new_record['locations']) 76 | if not error: 77 | new_record['locations'] = updated_locations 78 | new_record = add_created_last_mod(new_record) 79 | new_ror_id = check_ror_id(version) 80 | print("new ror id: " + new_ror_id) 81 | new_record['id'] = new_ror_id 82 | error, valid_data = validate_record(sort_list_fields(new_record), V2_SCHEMA) 83 | return error, valid_data 84 | 85 | 86 | def update_record_from_json(new_json, existing_org): 87 | error = None 88 | valid_data = None 89 | serializer = OrganizationSerializerV2(existing_org) 90 | existing_record = serializer.data 91 | updated_record = update_record(new_json, existing_record) 92 | error, updated_locations = update_locations(updated_record['locations']) 93 | if not error: 94 | updated_record['locations'] = updated_locations 95 | error, valid_data = validate_record(sort_list_fields(updated_record), V2_SCHEMA) 96 | return error, valid_data 97 | -------------------------------------------------------------------------------- /rorapi/common/csv_bulk.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | import io 4 | import os 5 | import shutil 6 | import urllib 7 | from datetime import datetime 8 | from rest_framework.renderers import JSONRenderer 9 | from rorapi.settings import DATA 10 | from rorapi.v2.serializers import ( 11 | OrganizationSerializer as OrganizationSerializerV2 12 | ) 13 | from rorapi.common.csv_update import update_record_from_csv 14 | from rorapi.common.csv_create import new_record_from_csv 15 | 16 | 17 | def save_record_file(ror_id, updated, json_obj, dir_name): 18 | dir_path = os.path.join(DATA['DIR'],dir_name) 19 | if not os.path.exists(dir_path): 20 | os.makedirs(dir_path) 21 | subdir = 'updates' if updated else 'new' 22 | if not os.path.exists(os.path.join(dir_path, subdir)): 23 | os.mkdir(os.path.join(dir_path, subdir)) 24 | full_path = os.path.join(dir_path, subdir, ror_id.split('https://ror.org/')[1] + '.json') 25 | with open(full_path, "w") as outfile: 26 | json.dump(json_obj, outfile, ensure_ascii=False, indent=2) 27 | 28 | def save_report_file(report, report_fields, csv_file, dir_name, validate_only): 29 | dir_path = os.path.join(DATA['DIR'],dir_name) 30 | if not os.path.exists(dir_path): 31 | os.makedirs(dir_path) 32 | filepath = os.path.join(dir_path, 'report.csv') 33 | with open(filepath, 'w') as csvfile: 34 | writer = csv.DictWriter(csvfile, fieldnames=report_fields) 35 | writer.writeheader() 36 | writer.writerows(report) 37 | if not validate_only: 38 | # save copy of input file 39 | filepath = os.path.join(dir_path, 'input.csv') 40 | csv_file.seek(0) 41 | with open(filepath, 'wb+') as f: 42 | for chunk in csv_file.chunks(): 43 | f.write(chunk) 44 | 45 | def process_csv(csv_file, version, validate_only): 46 | print("Processing CSV") 47 | dir_name = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + "-ror-records" 48 | success_msg = None 49 | error = None 50 | report = [] 51 | report_fields = ['row', 'html_url', 'ror_id', 'action', 'errors'] 52 | skipped_count = 0 53 | updated_count = 0 54 | new_count = 0 55 | read_file = csv_file.read().decode('utf-8') 56 | print(read_file) 57 | reader = csv.DictReader(io.StringIO(read_file)) 58 | row_num = 2 59 | for row in reader: 60 | html_url = None 61 | ror_id = None 62 | updated = False 63 | print("Row data") 64 | print(row) 65 | if row['html_url']: 66 | html_url = row['html_url'] 67 | if row['id']: 68 | ror_id = row['id'] 69 | updated = True 70 | row_errors, v2_record = update_record_from_csv(row, version) 71 | else: 72 | row_errors, v2_record = new_record_from_csv(row, version) 73 | if not row_errors: 74 | if updated: 75 | action = 'updated' 76 | updated_count += 1 77 | else: 78 | action = 'created' 79 | new_count += 1 80 | ror_id = v2_record['id'] 81 | serializer = OrganizationSerializerV2(v2_record) 82 | json_obj = json.loads(JSONRenderer().render(serializer.data)) 83 | print(json_obj) 84 | if not validate_only: 85 | #create file 86 | file = save_record_file(ror_id, updated, json_obj, dir_name) 87 | else: 88 | action = 'skipped' 89 | skipped_count += 1 90 | if validate_only and action == 'created': 91 | ror_id = None 92 | report.append({"row": row_num, "html_url": html_url, "ror_id": ror_id if ror_id else '', "action": action, "errors": "; ".join(row_errors) if row_errors else ''}) 93 | row_num += 1 94 | if new_count > 0 or updated_count > 0 or skipped_count > 0: 95 | try: 96 | if validate_only: 97 | try: 98 | save_report_file(report, report_fields, csv_file, dir_name, validate_only) 99 | success_msg = os.path.join(DATA['DIR'], dir_name, 'report.csv') 100 | except Exception as e: 101 | error = f"Error creating validation report: {e}" 102 | else: 103 | #create report file 104 | save_report_file(report, report_fields, csv_file, dir_name, validate_only) 105 | # create zip file 106 | zipfile = shutil.make_archive(os.path.join(DATA['DIR'], dir_name), 'zip', DATA['DIR'], dir_name) 107 | # upload to S3 108 | try: 109 | DATA['CLIENT'].upload_file(zipfile, DATA['PUBLIC_STORE'], dir_name + '.zip') 110 | zipfile = f"https://s3.eu-west-1.amazonaws.com/{DATA['PUBLIC_STORE']}/{urllib.parse.quote(dir_name)}.zip" 111 | success_msg = {"file": zipfile, 112 | "rows processed": new_count + updated_count + skipped_count, 113 | "created": new_count, 114 | "updated": updated_count, 115 | "skipped": skipped_count} 116 | except Exception as e: 117 | error = f"Error uploading zipfile to S3: {e}" 118 | except Exception as e: 119 | error = f"Unexpected error generating records: {e}" 120 | 121 | return error, success_msg -------------------------------------------------------------------------------- /rorapi/common/csv_create.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from rorapi.common.record_utils import * 3 | from rorapi.common.csv_utils import * 4 | from rorapi.v2.record_constants import * 5 | from rorapi.common.serializers import ErrorsSerializer 6 | from rorapi.common.create_update import new_record_from_json 7 | 8 | 9 | def new_record_from_csv(csv_data, version): 10 | v2_data = copy.deepcopy(V2_TEMPLATE) 11 | errors = [] 12 | #domains 13 | if csv_data['domains']: 14 | v2_data['domains'] = [d.strip() for d in csv_data['domains'].strip(';').split(';')] 15 | 16 | #established 17 | if csv_data['established']: 18 | v2_data['established'] = int(csv_data['established'].strip()) 19 | 20 | #external ids 21 | for k,v in V2_EXTERNAL_ID_TYPES.items(): 22 | if csv_data['external_ids.type.' + v + '.all']: 23 | all_ids = [i.strip() for i in csv_data['external_ids.type.' + v + '.all'].strip(';').split(';')] 24 | ext_id_obj = { 25 | "type": v, 26 | "all": all_ids, 27 | "preferred": csv_data['external_ids.type.' + v + '.preferred'].strip() if csv_data['external_ids.type.' + v + '.preferred'] else all_ids[0] 28 | } 29 | v2_data['external_ids'].append(ext_id_obj) 30 | 31 | #links 32 | for k,v in V2_LINK_TYPES.items(): 33 | if csv_data['links.type.' + v]: 34 | for l in csv_data['links.type.' + v].strip(';').split(';'): 35 | link_obj = { 36 | "type": v, 37 | "value": l.strip() 38 | } 39 | v2_data['links'].append(link_obj) 40 | 41 | #locations 42 | if csv_data['locations.geonames_id']: 43 | geonames_ids = [i.strip() for i in csv_data['locations.geonames_id'].strip(';').split(';')] 44 | for geonames_id in geonames_ids: 45 | location_obj = { 46 | "geonames_id": geonames_id, 47 | "geonames_details": {} 48 | } 49 | v2_data['locations'].append(location_obj) 50 | 51 | #names 52 | temp_names = [] 53 | for k,v in V2_NAME_TYPES.items(): 54 | if csv_data['names.types.' + v]: 55 | for n in csv_data['names.types.' + v].strip(';').split(';'): 56 | if LANG_DELIMITER in n: 57 | if n.count(LANG_DELIMITER) == 1: 58 | name_val, lang = n.split("*") 59 | if lang: 60 | lang_errors, lang_code = get_lang_code(lang.strip()) 61 | if lang_errors: 62 | errors.append("Could not convert language value to ISO code: {}".format(lang)) 63 | else: 64 | name_val = None 65 | lang_code = None 66 | errors.append("Could not parse name value {} in names.types.{} because it contains multiple {} lang delimiter chars.".format(n, v, LANG_DELIMITER)) 67 | else: 68 | name_val = n 69 | lang_code = None 70 | if name_val: 71 | name_obj = { 72 | "types": [v], 73 | "value": name_val.strip(), 74 | "lang": lang_code 75 | } 76 | temp_names.append(name_obj) 77 | print("temp names 1:") 78 | print(temp_names) 79 | name_vals = [n['value'] for n in temp_names] 80 | dup_names = [] 81 | for n in name_vals: 82 | if name_vals.count(n) > 1: 83 | if n not in dup_names: 84 | dup_names.append(n) 85 | for d in dup_names: 86 | dup_names_objs = [t for t in temp_names if t['value'] == d] 87 | lang_codes = [dno['lang'] for dno in dup_names_objs] 88 | for lang_code in lang_codes: 89 | if lang_codes.count(lang_code) > 1: 90 | name_lang_dups = [dno for dno in dup_names_objs if dno['lang'] == lang_code] 91 | types = [] 92 | for n in name_lang_dups: 93 | types.extend(n['types']) 94 | name_obj = { 95 | "types": types, 96 | "value": d, 97 | "lang": lang_code 98 | } 99 | if name_obj not in temp_names: 100 | temp_names = [t for t in temp_names if t not in name_lang_dups] 101 | temp_names.append(name_obj) 102 | print("temp names 2:") 103 | print(temp_names) 104 | v2_data['names'] = temp_names 105 | 106 | #status 107 | if csv_data['status']: 108 | v2_data['status'] = csv_data['status'].strip().lower() 109 | 110 | #types 111 | if csv_data['types']: 112 | v2_data['types'] = [t.strip().lower() for t in csv_data['types'].strip(';').split(';')] 113 | 114 | validation_error, new_record = new_record_from_json(v2_data, version) 115 | if validation_error: 116 | errors.append(validation_error) 117 | return errors, new_record -------------------------------------------------------------------------------- /rorapi/common/csv_utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import io 3 | import re 4 | 5 | UPDATE_ACTIONS = { 6 | "ADD": "add", 7 | "DELETE": "delete", 8 | "REPLACE": "replace" 9 | } 10 | 11 | UPDATE_ACTIONS_MULTI = [UPDATE_ACTIONS["ADD"], UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]] 12 | 13 | UPDATE_ACTIONS_SINGLE = [UPDATE_ACTIONS["DELETE"], UPDATE_ACTIONS["REPLACE"]] 14 | 15 | NO_DELETE_FIELDS = ["id", "locations.geonames_id", "names.types.ror_display", "status", "types"] 16 | 17 | CSV_REQUIRED_FIELDS_ACTIONS = { 18 | "id": None, 19 | "domains": UPDATE_ACTIONS_MULTI, 20 | "established": UPDATE_ACTIONS_SINGLE, 21 | "external_ids.type.fundref.all": UPDATE_ACTIONS_MULTI, 22 | "external_ids.type.fundref.preferred": UPDATE_ACTIONS_SINGLE, 23 | "external_ids.type.grid.all": UPDATE_ACTIONS_MULTI, 24 | "external_ids.type.grid.preferred": UPDATE_ACTIONS_SINGLE, 25 | "external_ids.type.isni.all": UPDATE_ACTIONS_MULTI, 26 | "external_ids.type.isni.preferred": UPDATE_ACTIONS_SINGLE, 27 | "external_ids.type.wikidata.all": UPDATE_ACTIONS_MULTI, 28 | "external_ids.type.wikidata.preferred": UPDATE_ACTIONS_SINGLE, 29 | "links.type.website": UPDATE_ACTIONS_MULTI, 30 | "links.type.wikipedia": UPDATE_ACTIONS_MULTI, 31 | "locations.geonames_id": UPDATE_ACTIONS_MULTI, 32 | "names.types.acronym": UPDATE_ACTIONS_MULTI, 33 | "names.types.alias": UPDATE_ACTIONS_MULTI, 34 | "names.types.label": UPDATE_ACTIONS_MULTI, 35 | "names.types.ror_display": [UPDATE_ACTIONS["REPLACE"]], 36 | "status": [UPDATE_ACTIONS["REPLACE"]], 37 | "types": UPDATE_ACTIONS_MULTI 38 | } 39 | 40 | LANG_DELIMITER = "*" 41 | 42 | UPDATE_DELIMITER = "==" 43 | 44 | 45 | def get_actions_values(csv_field): 46 | print("getting actions values:") 47 | actions_values = {} 48 | if csv_field.lower() == UPDATE_ACTIONS["DELETE"]: 49 | actions_values[UPDATE_ACTIONS["DELETE"]] = None 50 | elif UPDATE_DELIMITER in csv_field: 51 | for ua in list(UPDATE_ACTIONS.values()): 52 | print(ua) 53 | if ua + UPDATE_DELIMITER in csv_field: 54 | print("doing regex:") 55 | regex = r"(" + re.escape( 56 | ua + UPDATE_DELIMITER) + r")(.*?)(?=$|(add|delete|replace)==)" 57 | result = re.search(regex, csv_field) 58 | print(result[0]) 59 | temp_val = result[0].replace(ua + UPDATE_DELIMITER, '') 60 | print("temp val:") 61 | print(temp_val) 62 | actions_values[ua] = [v.strip() for v in temp_val.split(';') if v] 63 | 64 | else: 65 | actions_values[UPDATE_ACTIONS["REPLACE"]] = [v.strip() for v in csv_field.split(';') if v] 66 | print(actions_values) 67 | return actions_values 68 | 69 | def validate_csv(csv_file): 70 | errors = [] 71 | try: 72 | read_file = csv_file.read().decode('utf-8') 73 | reader = csv.DictReader(io.StringIO(read_file)) 74 | rowcount = 0 75 | for row in reader: 76 | rowcount += 1 77 | if rowcount > 0: 78 | csv_fields = reader.fieldnames 79 | missing_fields = [] 80 | for field in CSV_REQUIRED_FIELDS_ACTIONS.keys(): 81 | if field not in csv_fields: 82 | missing_fields.append(field) 83 | print(missing_fields) 84 | if missing_fields: 85 | errors.append(f'CSV file is missing columns: {", ".join(missing_fields)}') 86 | else: 87 | errors.append("CSV file contains no data rows") 88 | except IOError as e: 89 | errors.append(f"Error parsing CSV file: {e}") 90 | print(errors) 91 | return errors 92 | 93 | def validate_csv_row_update_syntax(csv_data): 94 | print("validating row") 95 | errors = [] 96 | for k, v in csv_data.items(): 97 | if UPDATE_DELIMITER in v: 98 | print("field:") 99 | print(k) 100 | print("value:") 101 | print(v) 102 | actions_values = get_actions_values(v) 103 | print("actions values:") 104 | print(actions_values) 105 | update_actions = list(actions_values.keys()) 106 | if not update_actions: 107 | errors.append("Update delimiter '{}' found in '{}' field but no valid update action found in value {}".format(UPDATE_DELIMITER, k, v)) 108 | if len(update_actions) > 2: 109 | errors.append("{} update actions '{}' found in '{}' field but only 2 are allowed".format(str(len(update_actions)), ", ".join(update_actions), k)) 110 | if len(update_actions) == 2: 111 | if not (UPDATE_ACTIONS['ADD'] and UPDATE_ACTIONS['DELETE']) in update_actions: 112 | errors.append("Invalid combination of update actions '{}' found in '{}' field.".format(", ".join(update_actions), k)) 113 | disallowed_actions = [ua for ua in update_actions if ua not in CSV_REQUIRED_FIELDS_ACTIONS[k]] 114 | print("allowed actions:") 115 | print(CSV_REQUIRED_FIELDS_ACTIONS[k]) 116 | print("disallowed actions:") 117 | print(disallowed_actions) 118 | if disallowed_actions: 119 | errors.append("Invalid update action(s) '{}' found in {} field. Allowed actions for this field are '{}'".format(", ".join(disallowed_actions), k, ", ".join(CSV_REQUIRED_FIELDS_ACTIONS[k]))) 120 | if v.strip() == UPDATE_ACTIONS['DELETE'].lower() and k in NO_DELETE_FIELDS: 121 | errors.append("Invalid update action '{}' in {} field. Cannot remove all values from a required field.".format(UPDATE_ACTIONS['DELETE'], k)) 122 | return errors -------------------------------------------------------------------------------- /rorapi/common/es_utils.py: -------------------------------------------------------------------------------- 1 | from rorapi.settings import ES7, ES_VARS 2 | 3 | from elasticsearch_dsl import Search, Q 4 | 5 | 6 | class ESQueryBuilder: 7 | """Elasticsearch query builder class""" 8 | 9 | def __init__(self, version): 10 | if version == "v2": 11 | self.search = Search(using=ES7, index=ES_VARS["INDEX_V2"]) 12 | else: 13 | self.search = Search(using=ES7, index=ES_VARS["INDEX_V1"]) 14 | self.search = self.search.extra(track_total_hits=True) 15 | self.search = self.search.params(search_type="dfs_query_then_fetch") 16 | 17 | def add_id_query(self, id): 18 | self.search = self.search.query("match", id={"query": id, "operator": "and"}) 19 | 20 | def add_match_all_query(self): 21 | self.search = self.search.query("match_all") 22 | 23 | def add_string_query(self, terms): 24 | self.search = self.search.query( 25 | "nested", 26 | path="names_ids", 27 | score_mode="max", 28 | query=Q("query_string", query=terms, fuzzy_max_expansions=1), 29 | ) 30 | 31 | def add_string_query_advanced(self, terms): 32 | self.search = self.search.query( 33 | "bool", 34 | must=Q( 35 | "query_string", 36 | query=terms, 37 | default_field="*", 38 | default_operator="and", 39 | fuzzy_max_expansions=1, 40 | ), 41 | ) 42 | 43 | def add_phrase_query(self, fields, terms): 44 | self.search.query = Q( 45 | "dis_max", queries=[Q("match_phrase", **{f: terms}) for f in fields] 46 | ) 47 | 48 | def add_common_query(self, fields, terms): 49 | self.search.query = Q( 50 | "dis_max", 51 | queries=[ 52 | Q("common", **{f: {"query": terms, "cutoff_frequency": 0.001}}) 53 | for f in fields 54 | ], 55 | ) 56 | 57 | def add_match_query(self, terms): 58 | self.search = self.search.query("match", acronyms=terms) 59 | 60 | def add_fuzzy_query(self, fields, terms): 61 | self.search.query = Q( 62 | "dis_max", 63 | queries=[ 64 | Q("match", **{f: {"query": terms, "fuzziness": "AUTO"}}) for f in fields 65 | ], 66 | ) 67 | 68 | def add_filters(self, filters): 69 | for f, v in filters.items(): 70 | self.search = self.search.filter("terms", **{f: v}) 71 | 72 | def add_aggregations(self, names): 73 | for name in names: 74 | self.search.aggs.bucket( 75 | name[0], "terms", field=name[1], size=10, min_doc_count=1 76 | ) 77 | 78 | def paginate(self, page): 79 | self.search = self.search[ 80 | ((page - 1) * ES_VARS["BATCH_SIZE"]) : (page * ES_VARS["BATCH_SIZE"]) 81 | ] 82 | 83 | def get_query(self): 84 | return self.search 85 | 86 | def add_sort(self, field, order="asc"): 87 | self.search = self.search.sort({field: {"order": order}}) -------------------------------------------------------------------------------- /rorapi/common/features.py: -------------------------------------------------------------------------------- 1 | import ldclient 2 | from ldclient.config import Config 3 | from rorapi.settings import LAUNCH_DARKLY_KEY 4 | 5 | ldclient.set_config(Config(LAUNCH_DARKLY_KEY)) 6 | launch_darkly_client = ldclient.get() -------------------------------------------------------------------------------- /rorapi/common/models.py: -------------------------------------------------------------------------------- 1 | from geonamescache.mappers import country 2 | 3 | 4 | class Entity: 5 | """Generic model class""" 6 | 7 | def __init__(self, base_object, attributes): 8 | [setattr(self, a, getattr(base_object, a)) for a in attributes] 9 | 10 | 11 | class TypeBucket: 12 | """A model class for type aggregation bucket""" 13 | 14 | def __init__(self, data): 15 | self.id = data.key.lower() 16 | self.title = data.key 17 | self.count = data.doc_count 18 | 19 | 20 | class CountryBucket: 21 | """A model class for country aggregation bucket""" 22 | 23 | def __init__(self, data): 24 | self.id = data.key.lower() 25 | mapper = country(from_key="iso", to_key="name") 26 | try: 27 | self.title = mapper(data.key) 28 | except AttributeError: 29 | # if we have a country code with no name mapping, skip it to prevent 500 30 | pass 31 | self.count = data.doc_count 32 | 33 | 34 | class StatusBucket: 35 | """A model class for status aggregation bucket""" 36 | 37 | def __init__(self, data): 38 | self.id = data.key.lower() 39 | self.title = data.key 40 | self.count = data.doc_count 41 | 42 | 43 | class Errors: 44 | """Errors model class""" 45 | 46 | def __init__(self, errors): 47 | self.errors = errors 48 | -------------------------------------------------------------------------------- /rorapi/common/record_utils.py: -------------------------------------------------------------------------------- 1 | import jsonschema 2 | import requests 3 | from iso639 import Lang 4 | 5 | 6 | def get_lang_code(lang_string): 7 | lang_code = None 8 | error = None 9 | if len(lang_string) == 2: 10 | lang_string = lang_string.lower() 11 | else: 12 | lang_string = lang_string.title() 13 | try: 14 | lg = Lang(lang_string) 15 | lang_code = lg.pt1 16 | except Exception as e: 17 | error = e.msg 18 | return error, lang_code 19 | 20 | def get_file_from_url(url): 21 | rsp = requests.get(url) 22 | rsp.raise_for_status() 23 | return rsp.json() 24 | 25 | def validate_record(data, schema): 26 | try: 27 | print("validating data:") 28 | print(data) 29 | jsonschema.validate(data, schema) 30 | except jsonschema.ValidationError as e: 31 | return "Validation error: " + e.message, None 32 | else: 33 | return None, data 34 | 35 | -------------------------------------------------------------------------------- /rorapi/common/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | 3 | 4 | class OrganizationRelationshipsSerializer(serializers.Serializer): 5 | label = serializers.CharField() 6 | type = serializers.CharField() 7 | id = serializers.CharField() 8 | 9 | 10 | class BucketSerializer(serializers.Serializer): 11 | id = serializers.CharField() 12 | title = serializers.CharField() 13 | count = serializers.IntegerField() 14 | 15 | 16 | class ErrorsSerializer(serializers.Serializer): 17 | errors = serializers.StringRelatedField(many=True) 18 | -------------------------------------------------------------------------------- /rorapi/common/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls import url, include 2 | from django.urls import path, re_path 3 | from rest_framework.documentation import include_docs_urls 4 | from . import views 5 | from rorapi.common.views import ( 6 | HeartbeatView,GenerateAddress,GenerateId,IndexData,IndexDataDump,BulkUpdate,ClientRegistrationView,ValidateClientView) 7 | 8 | urlpatterns = [ 9 | # Health check 10 | url(r"^(?P(v1|v2))\/heartbeat$", HeartbeatView.as_view()), 11 | url(r"^heartbeat$", HeartbeatView.as_view()), 12 | # Using REST API 13 | url(r"^(?P(v1|v2))\/generateaddress\/(?P[0-9]+)", GenerateAddress.as_view()), 14 | path('generateaddress/', GenerateAddress.as_view()), 15 | url(r"^generateid$", GenerateId.as_view()), 16 | re_path(r"^(?P(v1|v2))\/bulkupdate$", BulkUpdate.as_view()), 17 | re_path(r"^(?P(v1|v2))\/register$", ClientRegistrationView.as_view()), 18 | path('validate-client-id//', ValidateClientView.as_view()), 19 | url(r"^(?P(v1|v2))\/indexdata/(?P.*)", IndexData.as_view()), 20 | url(r"^(?P(v1|v2))\/indexdatadump\/(?Pv(\d+\.)?(\d+\.)?(\*|\d+)-\d{4}-\d{2}-\d{2}-ror-data)\/(?P(test|prod))$", IndexDataDump.as_view()), 21 | url(r"^(?P(v1|v2))\/", include(views.organizations_router.urls)), 22 | url(r"^", include(views.organizations_router.urls)), 23 | url(r"^docs/", include_docs_urls(title="Research Organization Registry")), 24 | # Prometheus 25 | url("", include("django_prometheus.urls")), 26 | 27 | ] 28 | -------------------------------------------------------------------------------- /rorapi/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/management/commands/__init__.py -------------------------------------------------------------------------------- /rorapi/management/commands/createindex.py: -------------------------------------------------------------------------------- 1 | import json 2 | from rorapi.settings import ES7, ES_VARS 3 | 4 | from django.core.management.base import BaseCommand 5 | 6 | 7 | def create_index(self, index, template_file): 8 | with open(template_file, 'r') as it: 9 | template = json.load(it) 10 | ES7.indices.put_template(index, template) 11 | self.stdout.write('Updated index template for {}'.format(index)) 12 | ES7.indices.create(index=index) 13 | self.stdout.write('Created index {}'.format(index)) 14 | 15 | class Command(BaseCommand): 16 | help = 'Create ROR API index' 17 | 18 | def handle(self, *args, **options): 19 | if(options['schema']==1 or options['schema'] is None): 20 | print("creating v1 index") 21 | create_index(self, ES_VARS['INDEX_V1'], ES_VARS['INDEX_TEMPLATE_ES7_V1']) 22 | if(options['schema']==2 or options['schema'] is None): 23 | print("creating v2 index") 24 | create_index(self, ES_VARS['INDEX_V2'], ES_VARS['INDEX_TEMPLATE_ES7_V2']) -------------------------------------------------------------------------------- /rorapi/management/commands/deleteindex.py: -------------------------------------------------------------------------------- 1 | from rorapi.settings import ES7, ES_VARS 2 | 3 | from django.core.management.base import BaseCommand 4 | 5 | def delete_index(self, index): 6 | if ES7.indices.exists(index): 7 | ES7.indices.delete(index=index) 8 | self.stdout.write('Deleted index {}'.format(index)) 9 | else: 10 | self.stdout.write('Index {} does not exist'.format(index)) 11 | 12 | class Command(BaseCommand): 13 | help = 'Deletes ROR API index' 14 | 15 | def handle(self, *args, **options): 16 | if(options['schema']==1 or options['schema'] is None): 17 | print("deleting v1 index") 18 | delete_index(self, ES_VARS['INDEX_V1']) 19 | if(options['schema']==2 or options['schema'] is None): 20 | print("deleting v2 index") 21 | delete_index(self, ES_VARS['INDEX_V2']) 22 | 23 | -------------------------------------------------------------------------------- /rorapi/management/commands/generaterorid.py: -------------------------------------------------------------------------------- 1 | import base32_crockford 2 | import random 3 | from rorapi.common.queries import retrieve_organization, get_ror_id 4 | from rorapi.settings import ROR_API 5 | 6 | def generate_ror_id(): 7 | """Generates random ROR ID. 8 | 9 | The checksum calculation is copied from 10 | https://github.com/datacite/base32-url/blob/master/lib/base32/url.rb 11 | to maintain the compatibility with previously generated ROR IDs. 12 | """ 13 | 14 | n = random.randint(0, 200000000) 15 | n_encoded = base32_crockford.encode(n).lower().zfill(6) 16 | checksum = str(98 - ((n * 100) % 97)).zfill(2) 17 | return '{}0{}{}'.format(ROR_API['ID_PREFIX'], n_encoded, checksum) 18 | 19 | 20 | def check_ror_id(version): 21 | """Checks if generated ror id exists in the index. If so, it generates a new id, otherwise it returns the generated ror id 22 | """ 23 | ror_id = get_ror_id(generate_ror_id()) 24 | errors, organization = retrieve_organization(ror_id, version) 25 | if errors is None: 26 | check_ror_id(version) 27 | return ror_id 28 | 29 | 30 | def generate_ror_client_id(): 31 | """Generates a random ROR client ID. 32 | """ 33 | 34 | n = random.randint(0, 2**160 - 1) 35 | return base32_crockford.encode(n).lower().zfill(32) 36 | -------------------------------------------------------------------------------- /rorapi/management/commands/getrordump.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import requests 5 | import zipfile 6 | import base64 7 | from io import BytesIO 8 | from rorapi.settings import ES7, ES_VARS, ROR_DUMP, DATA 9 | from django.core.management.base import BaseCommand 10 | 11 | HEADERS = {'Accept': 'application/vnd.github.v3+json'} 12 | AUTH_HEADERS = {'Authorization': 'token {}'.format(ROR_DUMP['GITHUB_TOKEN']), 'Accept': 'application/vnd.github.v3+json'} 13 | 14 | def get_ror_dump_sha(filename, use_test_data, github_headers): 15 | sha = '' 16 | if use_test_data: 17 | contents_url = ROR_DUMP['TEST_REPO_URL'] + '/contents' 18 | else: 19 | contents_url = ROR_DUMP['PROD_REPO_URL'] + '/contents' 20 | try: 21 | response = requests.get(contents_url, headers=github_headers) 22 | except requests.exceptions.RequestException as e: 23 | raise SystemExit(f"{contents_url}: is Not reachable \nErr: {e}") 24 | try: 25 | repo_contents = response.json() 26 | for file in repo_contents: 27 | if filename in file['name']: 28 | sha = file['sha'] 29 | return sha 30 | except: 31 | return None 32 | 33 | def get_ror_dump_zip(self, filename, use_test_data, github_headers): 34 | sha = get_ror_dump_sha(filename, use_test_data, github_headers) 35 | if sha: 36 | if use_test_data: 37 | blob_url = ROR_DUMP['TEST_REPO_URL'] + '/git/blobs/' + sha 38 | else: 39 | blob_url = ROR_DUMP['PROD_REPO_URL'] + '/git/blobs/' + sha 40 | try: 41 | response = requests.get(blob_url, headers=github_headers) 42 | except requests.exceptions.RequestException as e: 43 | raise SystemExit(f"Github blob is Not reachable \nErr: {e}") 44 | try: 45 | response_json = response.json() 46 | file_decoded = base64.b64decode(response_json['content']) 47 | with open(filename + '.zip', 'wb') as zip_file: 48 | zip_file.write(file_decoded) 49 | with zipfile.ZipFile(zip_file.name, 'r') as ror_zip: 50 | filenames = ror_zip.namelist() 51 | dir_names = [f for f in filenames if ('json' not in f and 'csv' not in f)] 52 | if dir_names: 53 | raise SystemExit(f"Dump zip has extra directory and cannot be indexed") 54 | return zip_file.name 55 | except: 56 | raise SystemExit(f"Something went wrong saving zip file") 57 | 58 | class Command(BaseCommand): 59 | help = 'Downloads a specified ROR data dump from Github' 60 | 61 | def handle(self, *args, **options): 62 | filename = options['filename'] 63 | use_test_data = options['testdata'] 64 | self.stdout.write('Getting ROR dump') 65 | if ROR_DUMP['GITHUB_TOKEN']: 66 | github_headers = AUTH_HEADERS 67 | else: 68 | github_headers = HEADERS 69 | ror_dump_zip = get_ror_dump_zip(self, filename, use_test_data, github_headers) 70 | 71 | -------------------------------------------------------------------------------- /rorapi/management/commands/indexror.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from functools import wraps 4 | from threading import local 5 | import zipfile 6 | import os 7 | import glob 8 | from os.path import exists 9 | import pathlib 10 | import shutil 11 | from rorapi.settings import ES7, ES_VARS, DATA 12 | 13 | from django.core.management.base import BaseCommand 14 | from elasticsearch import TransportError 15 | 16 | def get_nested_names_v1(org): 17 | yield org['name'] 18 | for label in org['labels']: 19 | yield label['label'] 20 | for alias in org['aliases']: 21 | yield alias 22 | for acronym in org['acronyms']: 23 | yield acronym 24 | 25 | def get_nested_names_v2(org): 26 | for name in org['names']: 27 | yield name['value'] 28 | 29 | def get_nested_ids_v1(org): 30 | yield org['id'] 31 | yield re.sub('https://', '', org['id']) 32 | yield re.sub('https://ror.org/', '', org['id']) 33 | for ext_name, ext_id in org['external_ids'].items(): 34 | if ext_name == 'GRID': 35 | yield ext_id['all'] 36 | else: 37 | for eid in ext_id['all']: 38 | yield eid 39 | 40 | def get_nested_ids_v2(org): 41 | yield org['id'] 42 | yield re.sub('https://', '', org['id']) 43 | yield re.sub('https://ror.org/', '', org['id']) 44 | for ext_id in org['external_ids']: 45 | for eid in ext_id['all']: 46 | yield eid 47 | 48 | def prepare_files(path, local_file): 49 | data = [] 50 | err = {} 51 | try: 52 | if exists(local_file): 53 | with zipfile.ZipFile(local_file, 'r') as zip_ref: 54 | zip_ref.extractall(path) 55 | except Exception as e: 56 | err[prepare_files.__name__] = f"ERROR: {e}" 57 | 58 | json_files = os.path.join(path, "*.json") 59 | file_list = glob.glob(json_files) 60 | for file in file_list: 61 | try: 62 | with open(file) as f: 63 | data.append(json.load(f)) 64 | except Exception as e: 65 | key = f"In {prepare_files.__name__}_{file}" 66 | err[key] = f"ERROR: {e}" 67 | return data, err 68 | 69 | 70 | def get_rc_data(dir, contents): 71 | err = {} 72 | path = f"{dir}/files.zip" 73 | branch_objects = [i for i in contents if path == i['Key']] 74 | local_file = None 75 | local_path = None 76 | if branch_objects: 77 | s3_file = branch_objects[0]['Key'] 78 | local_path = os.path.join(DATA['DIR'], dir) 79 | os.makedirs(local_path) 80 | local_file = local_path + "/files.zip" 81 | try: 82 | DATA['CLIENT'].download_file(DATA['DATA_STORE'],s3_file, local_file) 83 | except Exception as e: 84 | key = f"In {get_rc_data.__name__}_downloading files" 85 | err[key] = f"ERROR: {e}" 86 | else: 87 | err[get_rc_data.__name__] = f"ERROR: {dir} not found in S3 bucket" 88 | return local_path, local_file, err 89 | 90 | def get_data(): 91 | err = {} 92 | # return contents or None 93 | contents = None 94 | try: 95 | objects = DATA['CLIENT'].list_objects_v2(Bucket = DATA['DATA_STORE']) 96 | contents = objects['Contents'] 97 | except Exception as e: 98 | err[get_data.__name__] = f"ERROR: Could not get objects from {DATA['DATA_STORE']}: {e}" 99 | return contents, err 100 | 101 | 102 | def process_files(dir, version): 103 | err = [] 104 | if dir: 105 | path = os.path.join(DATA['WORKING_DIR'], dir) 106 | if os.path.isdir(path): 107 | p = pathlib.Path(path) 108 | shutil.rmtree(p) 109 | objects, e = get_data() 110 | err.append(e) 111 | if objects and not(e): 112 | # check if objects exist, otherwise error 113 | path, file, e = get_rc_data(dir, objects) 114 | err.append(e) 115 | if path and file and not(e): 116 | data, e = prepare_files(path, file) 117 | if not(e): 118 | index_error = index(data, version) 119 | err.append(index_error) 120 | else: 121 | err.append(e) 122 | else: 123 | err.append({process_files.__name__: f"No objects found in {dir}"}) 124 | else: 125 | err.append({process_files.__name__: "Need S3 directory argument"}) 126 | err = [i for i in err if i] 127 | if err: 128 | msg = {"status": "ERROR", "msg": err} 129 | else: 130 | msg = {"status": "OK", "msg": f"{dir} indexed using version {version}"} 131 | 132 | return msg 133 | 134 | 135 | def index(dataset, version): 136 | err = {} 137 | if version == 'v2': 138 | index = ES_VARS['INDEX_V2'] 139 | else: 140 | index = ES_VARS['INDEX_V1'] 141 | backup_index = '{}-tmp'.format(index) 142 | ES7.reindex(body={ 143 | 'source': { 144 | 'index': index 145 | }, 146 | 'dest': { 147 | 'index': backup_index 148 | } 149 | }) 150 | 151 | try: 152 | for i in range(0, len(dataset), ES_VARS['BULK_SIZE']): 153 | body = [] 154 | for org in dataset[i:i + ES_VARS['BULK_SIZE']]: 155 | body.append({ 156 | 'index': { 157 | '_index': index, 158 | '_id': org['id'] 159 | } 160 | }) 161 | if 'v2' in index: 162 | org['names_ids'] = [{ 163 | 'name': n 164 | } for n in get_nested_names_v2(org)] 165 | org['names_ids'] += [{ 166 | 'id': n 167 | } for n in get_nested_ids_v2(org)] 168 | else: 169 | org['names_ids'] = [{ 170 | 'name': n 171 | } for n in get_nested_names_v1(org)] 172 | org['names_ids'] += [{ 173 | 'id': n 174 | } for n in get_nested_ids_v1(org)] 175 | body.append(org) 176 | ES7.bulk(body) 177 | except TransportError: 178 | err[index.__name__] = f"Indexing error, reverted index back to previous state" 179 | ES7.reindex(body={ 180 | 'source': { 181 | 'index': backup_index 182 | }, 183 | 'dest': { 184 | 'index': index 185 | } 186 | }) 187 | if ES7.indices.exists(backup_index): 188 | ES7.indices.delete(backup_index) 189 | return err 190 | 191 | class Command(BaseCommand): 192 | help = 'Indexes ROR dataset' 193 | 194 | def add_arguments(self, parser): 195 | parser.add_argument('dir', type=str, help='add directory name for S3 bucket to be processed') 196 | parser.add_argument('version', type=str, help='schema version of files to be processed') 197 | 198 | def handle(self,*args, **options): 199 | dir = options['dir'] 200 | version = options['version'] 201 | process_files(dir, version) 202 | 203 | 204 | -------------------------------------------------------------------------------- /rorapi/management/commands/indexrordump.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import requests 5 | import zipfile 6 | import base64 7 | from io import BytesIO 8 | from rorapi.settings import ES7, ES_VARS, ROR_DUMP, DATA 9 | 10 | from django.core.management.base import BaseCommand 11 | from elasticsearch import TransportError 12 | 13 | HEADERS = {'Accept': 'application/vnd.github.v3+json'} 14 | 15 | def get_nested_names_v1(org): 16 | yield org['name'] 17 | for label in org['labels']: 18 | yield label['label'] 19 | for alias in org['aliases']: 20 | yield alias 21 | for acronym in org['acronyms']: 22 | yield acronym 23 | 24 | def get_nested_names_v2(org): 25 | for name in org['names']: 26 | yield name['value'] 27 | 28 | def get_nested_ids_v1(org): 29 | yield org['id'] 30 | yield re.sub('https://', '', org['id']) 31 | yield re.sub('https://ror.org/', '', org['id']) 32 | for ext_name, ext_id in org['external_ids'].items(): 33 | if ext_name == 'GRID': 34 | yield ext_id['all'] 35 | else: 36 | for eid in ext_id['all']: 37 | yield eid 38 | 39 | def get_nested_ids_v2(org): 40 | yield org['id'] 41 | yield re.sub('https://', '', org['id']) 42 | yield re.sub('https://ror.org/', '', org['id']) 43 | for ext_id in org['external_ids']: 44 | for eid in ext_id['all']: 45 | yield eid 46 | 47 | def index_dump(self, filename, index, dataset): 48 | backup_index = '{}-tmp'.format(index) 49 | ES7.reindex(body={ 50 | 'source': { 51 | 'index': index 52 | }, 53 | 'dest': { 54 | 'index': backup_index 55 | } 56 | }) 57 | 58 | try: 59 | for i in range(0, len(dataset), ES_VARS['BULK_SIZE']): 60 | body = [] 61 | for org in dataset[i:i + ES_VARS['BULK_SIZE']]: 62 | body.append({ 63 | 'index': { 64 | '_index': index, 65 | '_id': org['id'] 66 | } 67 | }) 68 | if 'v2' in index: 69 | org['names_ids'] = [{ 70 | 'name': n 71 | } for n in get_nested_names_v2(org)] 72 | org['names_ids'] += [{ 73 | 'id': n 74 | } for n in get_nested_ids_v2(org)] 75 | else: 76 | org['names_ids'] = [{ 77 | 'name': n 78 | } for n in get_nested_names_v1(org)] 79 | org['names_ids'] += [{ 80 | 'id': n 81 | } for n in get_nested_ids_v1(org)] 82 | body.append(org) 83 | ES7.bulk(body) 84 | except TransportError: 85 | self.stdout.write(TransportError) 86 | self.stdout.write('Reverting to backup index') 87 | ES7.reindex(body={ 88 | 'source': { 89 | 'index': backup_index 90 | }, 91 | 'dest': { 92 | 'index': index 93 | } 94 | }) 95 | if ES7.indices.exists(backup_index): 96 | ES7.indices.delete(backup_index) 97 | self.stdout.write('ROR dataset ' + filename + ' indexed') 98 | 99 | 100 | class Command(BaseCommand): 101 | help = 'Indexes ROR dataset from a full dump file in ror-data repo' 102 | 103 | def handle(self, *args, **options): 104 | json_files = [] 105 | filename = options['filename'] 106 | ror_dump_zip = filename + '.zip' 107 | if os.path.exists(ror_dump_zip): 108 | if not os.path.exists(DATA['WORKING_DIR']): 109 | os.makedirs(DATA['WORKING_DIR']) 110 | self.stdout.write('Extracting ROR dump') 111 | with zipfile.ZipFile(ror_dump_zip, 'r') as zip_ref: 112 | zip_ref.extractall(DATA['WORKING_DIR'] + filename) 113 | unzipped_files = os.listdir(DATA['WORKING_DIR'] + filename) 114 | for file in unzipped_files: 115 | if file.endswith(".json"): 116 | json_files.append(file) 117 | if json_files: 118 | for json_file in json_files: 119 | index = None 120 | json_path = os.path.join(DATA['WORKING_DIR'], filename, '') + json_file 121 | if 'schema_v2' in json_file and (options['schema']==2 or options['schema'] is None): 122 | self.stdout.write('Loading JSON') 123 | with open(json_path, 'r') as it: 124 | dataset = json.load(it) 125 | self.stdout.write('Indexing ROR dataset ' + json_file) 126 | index = ES_VARS['INDEX_V2'] 127 | index_dump(self, json_file, index, dataset) 128 | if 'schema_v2' not in json_file and (options['schema']==1 or options['schema'] is None): 129 | self.stdout.write('Loading JSON') 130 | with open(json_path, 'r') as it: 131 | dataset = json.load(it) 132 | self.stdout.write('Indexing ROR dataset ' + json_file) 133 | index = ES_VARS['INDEX_V1'] 134 | index_dump(self, json_file, index, dataset) 135 | else: 136 | self.stdout.write("ROR data dump does not contain any JSON files") 137 | 138 | else: 139 | self.stdout.write("ROR data dump zip file does not exist") 140 | -------------------------------------------------------------------------------- /rorapi/management/commands/legacyconvertgrid.py: -------------------------------------------------------------------------------- 1 | import base32_crockford 2 | import json 3 | import os.path 4 | import random 5 | import zipfile 6 | import re 7 | from rorapi.settings import ES, ES_VARS, ROR_API, GRID, ROR_DUMP 8 | 9 | from django.core.management.base import BaseCommand 10 | 11 | # Previously used to convert latest GRID dataset configured in settings.py 12 | # to ROR and assign ROR IDs to each GRID org 13 | # As of Mar 2022 ROR is no longer based on GRID 14 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3 15 | # Individual record files in S3 are indexed with indexror.py 16 | # Entire dataset zip files in https://github.com/ror-community/ror-data 17 | # can be indexed with setup.py, which uses indexrordump.py 18 | 19 | def generate_ror_id(): 20 | """Generates random ROR ID. 21 | 22 | The checksum calculation is copied from 23 | https://github.com/datacite/base32-url/blob/master/lib/base32/url.rb 24 | to maintain the compatibility with previously generated ROR IDs. 25 | """ 26 | 27 | n = random.randint(0, 200000000) 28 | n_encoded = base32_crockford.encode(n).lower().zfill(6) 29 | checksum = str(98 - ((n * 100) % 97)).zfill(2) 30 | return '{}0{}{}'.format(ROR_API['ID_PREFIX'], n_encoded, checksum) 31 | 32 | 33 | def get_ror_id(grid_id, es): 34 | """Maps GRID ID to ROR ID. 35 | 36 | If given GRID ID was indexed previously, corresponding ROR ID is obtained 37 | from the index. Otherwise, new ROR ID is generated. 38 | """ 39 | 40 | s = ES.search(ES_VARS['INDEX'], 41 | body={'query': { 42 | 'term': { 43 | 'external_ids.GRID.all': grid_id 44 | } 45 | }}) 46 | if s['hits']['total'] == 1: 47 | return s['hits']['hits'][0]['_id'] 48 | return generate_ror_id() 49 | 50 | 51 | def geonames_city(geonames_city): 52 | geonames = ["geonames_admin1", "geonames_admin2"] 53 | geonames_attributes = ["id", "name", "ascii_name", "code"] 54 | nuts = ["nuts_level1", "nuts_level2", "nuts_level3"] 55 | nuts_attributes = ["code", "name"] 56 | geonames_city_hsh = {} 57 | for k, v in geonames_city.items(): 58 | if (k in geonames): 59 | if isinstance(v, dict): 60 | geonames_city_hsh[k] = { 61 | i: v.get(i, None) 62 | for i in geonames_attributes 63 | } 64 | elif v is None: 65 | geonames_city_hsh[k] = {i: None for i in geonames_attributes} 66 | elif (k in nuts): 67 | if isinstance(v, dict): 68 | geonames_city_hsh[k] = { 69 | i: v.get(i, None) 70 | for i in nuts_attributes 71 | } 72 | elif v is None: 73 | geonames_city_hsh[k] = {i: None for i in nuts_attributes} 74 | else: 75 | geonames_city_hsh[k] = v 76 | return geonames_city_hsh 77 | 78 | 79 | def addresses(location): 80 | line = "" 81 | address = ["line_1", "line_2", "line_3"] 82 | combine_lines = address + ["country", "country_code"] 83 | geonames_admin = ["id", "code", "name", "ascii_name"] 84 | nuts = ["code", "name"] 85 | new_addresses = [] 86 | hsh = {} 87 | hsh["line"] = None 88 | for h in location: 89 | for k, v in h.items(): 90 | if not (k in combine_lines) and (k != "geonames_city"): 91 | v = v if v != "" else None 92 | hsh[k] = v 93 | elif k == "geonames_city": 94 | if isinstance(v, dict): 95 | hsh[k] = geonames_city(v) 96 | elif v is None: 97 | hsh[k] = {} 98 | elif (k in combine_lines): 99 | n = [] 100 | for i in address: 101 | if not (h[i] is None): 102 | n.append(h[i]) 103 | line = " ".join(n) 104 | line = re.sub(' +', ' ', line) 105 | if (len(line) == 1 and line == " "): 106 | line = line.strip() 107 | line = line if len(line) > 0 else None 108 | hsh["line"] = line 109 | new_addresses.append(hsh) 110 | return new_addresses 111 | 112 | 113 | def convert_organization(grid_org, es): 114 | """Converts the organization metadata from GRID schema to ROR schema.""" 115 | return { 116 | 'id': 117 | get_ror_id(grid_org['id'], ES), 118 | 'name': 119 | grid_org['name'], 120 | 'types': 121 | grid_org['types'], 122 | 'links': 123 | grid_org['links'], 124 | 'aliases': 125 | grid_org['aliases'], 126 | 'acronyms': 127 | grid_org['acronyms'], 128 | 'status': 129 | grid_org['status'], 130 | 'wikipedia_url': 131 | grid_org['wikipedia_url'], 132 | 'labels': 133 | grid_org['labels'], 134 | 'email_address': 135 | grid_org['email_address'], 136 | 'ip_addresses': 137 | grid_org['ip_addresses'], 138 | 'established': 139 | grid_org['established'], 140 | 'country': { 141 | 'country_code': grid_org['addresses'][0]['country_code'], 142 | 'country_name': grid_org['addresses'][0]['country'] 143 | }, 144 | 'relationships': 145 | grid_org["relationships"], 146 | 'addresses': 147 | addresses(grid_org["addresses"]), 148 | 'external_ids': 149 | getExternalIds( 150 | dict(grid_org.get('external_ids', {}), 151 | GRID={ 152 | 'preferred': grid_org['id'], 153 | 'all': grid_org['id'] 154 | })) 155 | } 156 | 157 | 158 | def getExternalIds(external_ids): 159 | if 'ROR' in external_ids: del external_ids['ROR'] 160 | return external_ids 161 | 162 | 163 | def get_ids(data): 164 | ids = {} 165 | for d in data: 166 | ids[d['external_ids']['GRID']['all']] = d['id'] 167 | return ids 168 | 169 | 170 | def get_grid(record, ids): 171 | if record['relationships']: 172 | for r in record['relationships']: 173 | r['id'] = ids[r['id']] 174 | 175 | return record 176 | 177 | 178 | class Command(BaseCommand): 179 | help = 'Converts GRID dataset to ROR schema' 180 | 181 | def handle(self, *args, **options): 182 | os.makedirs(ROR_DUMP['DIR'], exist_ok=True) 183 | # make sure we are not overwriting an existing ROR JSON file 184 | # with new ROR identifiers 185 | if zipfile.is_zipfile(ROR_DUMP['ROR_ZIP_PATH']): 186 | self.stdout.write('ROR dataset already exists') 187 | return 188 | 189 | if not os.path.isfile(ROR_DUMP['ROR_JSON_PATH']): 190 | with open(GRID['GRID_JSON_PATH'], 'r') as it: 191 | grid_data = json.load(it) 192 | 193 | self.stdout.write('Converting GRID dataset to ROR schema') 194 | intermediate_ror_data = [ 195 | convert_organization(org, ES) 196 | for org in grid_data['institutes'] if org['status'] == 'active' 197 | ] 198 | ids = get_ids(intermediate_ror_data) 199 | ror_data = [get_grid(rec, ids) for rec in intermediate_ror_data] 200 | with open(ROR_DUMP['ROR_JSON_PATH'], 'w') as outfile: 201 | json.dump(ror_data, outfile, indent=4) 202 | self.stdout.write('ROR dataset created') 203 | 204 | # generate zip archive 205 | with zipfile.ZipFile(ROR_DUMP['ROR_ZIP_PATH'], 'w') as zipArchive: 206 | zipArchive.write(ROR_DUMP['ROR_JSON_PATH'], 207 | arcname='ror.json', 208 | compress_type=zipfile.ZIP_DEFLATED) 209 | self.stdout.write('ROR dataset ZIP archive created') 210 | -------------------------------------------------------------------------------- /rorapi/management/commands/legacydownloadgrid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import zipfile 4 | 5 | from django.core.management.base import BaseCommand 6 | from rorapi.settings import GRID 7 | 8 | # Previously used to download latest GRID dataset configured in settings.py 9 | # which was used to generate a new ROR datasets 10 | # As of Mar 2022 ROR is no longer based on GRID 11 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3 12 | # Individual record files in S3 are indexed with indexror.py 13 | # Entire dataset zip files in https://github.com/ror-community/ror-data 14 | # can be indexed with setup.py, which uses indexrordump.py 15 | 16 | class Command(BaseCommand): 17 | help = 'Downloads GRID dataset' 18 | 19 | def handle(self, *args, **options): 20 | os.makedirs(GRID['DIR'], exist_ok=True) 21 | 22 | # make sure we are not overwriting an existing ROR JSON file 23 | # with new ROR identifiers 24 | if zipfile.is_zipfile(GRID['GRID_ZIP_PATH']): 25 | self.stdout.write('Already downloaded GRID version {}'.format( 26 | GRID['VERSION'])) 27 | return 28 | 29 | self.stdout.write('Downloading GRID version {}'.format( 30 | GRID['VERSION'])) 31 | r = requests.get(GRID['URL']) 32 | with open(GRID['GRID_ZIP_PATH'], 'wb') as f: 33 | f.write(r.content) 34 | 35 | with zipfile.ZipFile(GRID['GRID_ZIP_PATH'], 'r') as zip_ref: 36 | zip_ref.extractall(GRID['DIR']) 37 | -------------------------------------------------------------------------------- /rorapi/management/commands/legacyindexgrid.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import zipfile 4 | from rorapi.settings import ES, ES_VARS, LEGACY_ROR_DUMP 5 | 6 | from django.core.management.base import BaseCommand 7 | from elasticsearch import TransportError 8 | 9 | 10 | def get_nested_names(org): 11 | yield org['name'] 12 | for label in org['labels']: 13 | yield label['label'] 14 | for alias in org['aliases']: 15 | yield alias 16 | for acronym in org['acronyms']: 17 | yield acronym 18 | 19 | 20 | def get_nested_ids(org): 21 | yield org['id'] 22 | yield re.sub('https://', '', org['id']) 23 | yield re.sub('https://ror.org/', '', org['id']) 24 | for ext_name, ext_id in org['external_ids'].items(): 25 | if ext_name == 'GRID': 26 | yield ext_id['all'] 27 | else: 28 | for eid in ext_id['all']: 29 | yield eid 30 | 31 | 32 | class Command(BaseCommand): 33 | help = 'Indexes ROR dataset' 34 | 35 | def handle(self, *args, **options): 36 | with zipfile.ZipFile(LEGACY_ROR_DUMP['ROR_ZIP_PATH'], 'r') as zip_ref: 37 | zip_ref.extractall(LEGACY_ROR_DUMP['DIR']) 38 | 39 | with open(LEGACY_ROR_DUMP['ROR_JSON_PATH'], 'r') as it: 40 | dataset = json.load(it) 41 | 42 | self.stdout.write('Indexing ROR dataset') 43 | 44 | index = ES_VARS['INDEX'] 45 | backup_index = '{}-tmp'.format(index) 46 | ES.reindex(body={ 47 | 'source': { 48 | 'index': index 49 | }, 50 | 'dest': { 51 | 'index': backup_index 52 | } 53 | }) 54 | 55 | try: 56 | for i in range(0, len(dataset), ES_VARS['BULK_SIZE']): 57 | body = [] 58 | for org in dataset[i:i + ES_VARS['BULK_SIZE']]: 59 | body.append({ 60 | 'index': { 61 | '_index': index, 62 | '_type': 'org', 63 | '_id': org['id'] 64 | } 65 | }) 66 | org['names_ids'] = [{ 67 | 'name': n 68 | } for n in get_nested_names(org)] 69 | org['names_ids'] += [{ 70 | 'id': n 71 | } for n in get_nested_ids(org)] 72 | body.append(org) 73 | ES.bulk(body) 74 | except TransportError: 75 | self.stdout.write(TransportError) 76 | ES.reindex(body={ 77 | 'source': { 78 | 'index': backup_index 79 | }, 80 | 'dest': { 81 | 'index': index 82 | } 83 | }) 84 | 85 | if ES.indices.exists(backup_index): 86 | ES.indices.delete(backup_index) 87 | self.stdout.write('ROR dataset ' + LEGACY_ROR_DUMP['VERSION'] + ' indexed') 88 | -------------------------------------------------------------------------------- /rorapi/management/commands/legacyseeschema.py: -------------------------------------------------------------------------------- 1 | import json 2 | from rorapi.settings import ES, ES_VARS 3 | 4 | from django.core.management.base import BaseCommand 5 | 6 | 7 | class Command(BaseCommand): 8 | help = 'Create ROR API index' 9 | 10 | def handle(self, *args, **options): 11 | index = ES_VARS['INDEX'] 12 | if ES.indices.exists(index): 13 | raw_data = ES.indices.get_mapping( index ) 14 | schema = raw_data[ index ]["mappings"]["org"] 15 | print (json.dumps(schema, indent=4)) 16 | else: 17 | with open(ES_VARS['INDEX_TEMPLATE'], 'r') as it: 18 | template = json.load(it) 19 | ES.indices.create(index=index, body=template) 20 | self.stdout.write('Created index {}'.format(index)) 21 | -------------------------------------------------------------------------------- /rorapi/management/commands/legacyupgrade.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | from .downloadgrid import Command as DownloadGridCommand 3 | from .convertgrid import Command as ConvertGridCommand 4 | 5 | # Previously used to generate ROR dataset 6 | # based on the latest GRID dataset configured in settings.py 7 | # As of Mar 2022 ROR is no longer based on GRID 8 | # New records are now created in https://github.com/ror-community/ror-records and pushed to S3 9 | # Individual record files in S3 are indexed with indexror.py 10 | # Entire dataset zip files in https://github.com/ror-community/ror-data 11 | # can be indexed with setup.py, which uses indexrordump.py 12 | 13 | class Command(BaseCommand): 14 | help = 'Generate up-to-date ror.zip from GRID data' 15 | 16 | def handle(self, *args, **options): 17 | DownloadGridCommand().handle(args, options) 18 | ConvertGridCommand().handle(args, options) 19 | -------------------------------------------------------------------------------- /rorapi/management/commands/setup.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import zipfile 3 | import base64 4 | from django.core.management.base import BaseCommand 5 | from rorapi.management.commands.deleteindex import Command as DeleteIndexCommand 6 | from rorapi.management.commands.createindex import Command as CreateIndexCommand 7 | from rorapi.management.commands.indexrordump import Command as IndexRorDumpCommand 8 | from rorapi.management.commands.getrordump import Command as GetRorDumpCommand 9 | from rorapi.settings import ROR_DUMP 10 | 11 | HEADERS = {'Accept': 'application/vnd.github.v3+json'} 12 | 13 | HEADERS = {'Authorization': 'token {}'.format(ROR_DUMP['GITHUB_TOKEN']), 'Accept': 'application/vnd.github.v3+json'} 14 | 15 | def get_ror_dump_sha(filename, use_test_data): 16 | sha = '' 17 | if use_test_data: 18 | contents_url = ROR_DUMP['TEST_REPO_URL'] + '/contents' 19 | else: 20 | contents_url = ROR_DUMP['PROD_REPO_URL'] + '/contents' 21 | try: 22 | response = requests.get(contents_url, headers=HEADERS) 23 | except requests.exceptions.RequestException as e: 24 | raise SystemExit(f"{contents_url}: is Not reachable \nErr: {e}") 25 | try: 26 | repo_contents = response.json() 27 | for file in repo_contents: 28 | if filename in file['name']: 29 | sha = file['sha'] 30 | return sha 31 | except: 32 | return None 33 | 34 | class Command(BaseCommand): 35 | help = 'Setup ROR API' 36 | 37 | def add_arguments(self, parser): 38 | parser.add_argument('filename', type=str, help='Name of data dump zip file to index without extension') 39 | parser.add_argument('-s', '--schema', type=int, choices=[1, 2], help='Schema version to index if only indexing 1 version. Only set if not indexing both versions.') 40 | parser.add_argument('-t', '--testdata', action='store_true', help='Set flag to pull data dump from ror-data-test instead of ror-data') 41 | 42 | def handle(self, *args, **options): 43 | msg = None 44 | # make sure ROR dump file exists 45 | filename = options['filename'] 46 | use_test_data = options['testdata'] 47 | if use_test_data: 48 | print("Using ror-data-test repo") 49 | else: 50 | print("Using ror-data repo") 51 | 52 | sha = get_ror_dump_sha(filename, use_test_data) 53 | 54 | if sha: 55 | try: 56 | GetRorDumpCommand().handle(*args, **options) 57 | DeleteIndexCommand().handle(*args, **options) 58 | CreateIndexCommand().handle(*args, **options) 59 | IndexRorDumpCommand().handle(*args, **options) 60 | msg = 'SUCCESS: ROR dataset {} indexed in version {}. Using test repo: {}'.format(filename, str(options['schema']), str(use_test_data)) 61 | except: 62 | msg = 'ERROR: Could not index ROR data dump. Check API logs for details.' 63 | else: 64 | msg = 'ERROR: ROR dataset for file {} not found. '.format(filename) \ 65 | +'Please generate the data dump first.' 66 | self.stdout.write(msg) 67 | 68 | return msg 69 | 70 | -------------------------------------------------------------------------------- /rorapi/migrations/0001_create_client_model.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.28 on 2025-03-11 07:13 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | initial = True 9 | 10 | dependencies = [ 11 | ] 12 | 13 | operations = [ 14 | migrations.CreateModel( 15 | name='Client', 16 | fields=[ 17 | ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), 18 | ('email', models.EmailField(max_length=255)), 19 | ('name', models.CharField(blank=True, max_length=255)), 20 | ('institution_name', models.CharField(blank=True, max_length=255)), 21 | ('institution_ror', models.URLField(blank=True, max_length=255)), 22 | ('country_code', models.CharField(blank=True, max_length=2)), 23 | ('ror_use', models.TextField(blank=True, max_length=500)), 24 | ('client_id', models.CharField(editable=False, max_length=32, unique=True)), 25 | ('created_at', models.DateTimeField(auto_now_add=True)), 26 | ('last_request_at', models.DateTimeField(blank=True, null=True)), 27 | ('request_count', models.IntegerField(default=0)), 28 | ], 29 | ), 30 | ] 31 | -------------------------------------------------------------------------------- /rorapi/migrations/0002_auto_20250326_1054.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.28 on 2025-03-26 10:54 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('rorapi', '0001_create_client_model'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='client', 15 | name='email', 16 | field=models.EmailField(max_length=255, unique=True), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /rorapi/migrations/0003_auto_20250415_1207.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 2.2.28 on 2025-04-15 12:07 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('rorapi', '0002_auto_20250326_1054'), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name='client', 15 | name='country_code', 16 | field=models.CharField(blank=True, max_length=2, null=True), 17 | ), 18 | migrations.AlterField( 19 | model_name='client', 20 | name='email', 21 | field=models.EmailField(max_length=255), 22 | ), 23 | migrations.AlterField( 24 | model_name='client', 25 | name='institution_name', 26 | field=models.CharField(blank=True, max_length=255, null=True), 27 | ), 28 | migrations.AlterField( 29 | model_name='client', 30 | name='institution_ror', 31 | field=models.URLField(blank=True, max_length=255, null=True), 32 | ), 33 | migrations.AlterField( 34 | model_name='client', 35 | name='name', 36 | field=models.CharField(blank=True, max_length=255, null=True), 37 | ), 38 | migrations.AlterField( 39 | model_name='client', 40 | name='ror_use', 41 | field=models.TextField(blank=True, max_length=500, null=True), 42 | ), 43 | ] 44 | -------------------------------------------------------------------------------- /rorapi/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/migrations/__init__.py -------------------------------------------------------------------------------- /rorapi/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/tests/__init__.py -------------------------------------------------------------------------------- /rorapi/tests/tests_functional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/tests/tests_functional/__init__.py -------------------------------------------------------------------------------- /rorapi/tests/tests_functional/evaluation.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | 4 | 5 | def escape_query(query): 6 | return re.sub(r'([\+\-=\&\|>= ACCURACY_MIN) 54 | 55 | correct = sum([ 56 | len(set(r).intersection(set(d.get('ror_ids')))) 57 | for d, r in zip(self.dataset, self.results) 58 | ]) 59 | total = sum([len(r) for r in self.results]) 60 | precision = correct / total 61 | print('Precision: {} {}'.format(precision, 62 | proportion_confint(correct, total))) 63 | self.assertTrue(precision >= PRECISION_MIN) 64 | 65 | correct = sum([ 66 | len(set(r).intersection(set(d.get('ror_ids')))) 67 | for d, r in zip(self.dataset, self.results) 68 | ]) 69 | total = sum([len(d.get('ror_ids')) for d in self.dataset]) 70 | recall = correct / total 71 | print('Recall: {} {}'.format(recall, 72 | proportion_confint(correct, total))) 73 | self.assertTrue(recall >= RECALL_MIN) 74 | -------------------------------------------------------------------------------- /rorapi/tests/tests_functional/tests_matching_v2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import requests 5 | 6 | from django.test import SimpleTestCase 7 | from statsmodels.stats.api import proportion_confint 8 | 9 | ACCURACY_MIN = 0.885741 10 | PRECISION_MIN = 0.915426 11 | RECALL_MIN = 0.920048 12 | 13 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost') 14 | 15 | 16 | class AffiliationMatchingTestCase(SimpleTestCase): 17 | def match(self, affiliation): 18 | affiliation = re.sub(r'([\+\-=\&\|>= ACCURACY_MIN) 53 | 54 | correct = sum([ 55 | len(set(r).intersection(set(d.get('ror_ids')))) 56 | for d, r in zip(self.dataset, self.results) 57 | ]) 58 | total = sum([len(r) for r in self.results]) 59 | precision = correct / total 60 | print('Precision: {} {}'.format(precision, 61 | proportion_confint(correct, total))) 62 | self.assertTrue(precision >= PRECISION_MIN) 63 | 64 | correct = sum([ 65 | len(set(r).intersection(set(d.get('ror_ids')))) 66 | for d, r in zip(self.dataset, self.results) 67 | ]) 68 | total = sum([len(d.get('ror_ids')) for d in self.dataset]) 69 | recall = correct / total 70 | print('Recall: {} {}'.format(recall, 71 | proportion_confint(correct, total))) 72 | self.assertTrue(recall >= RECALL_MIN) 73 | -------------------------------------------------------------------------------- /rorapi/tests/tests_functional/tests_search_v1.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | from .evaluation import search, escape_query 6 | from django.test import SimpleTestCase 7 | from statsmodels.stats.api import DescrStatsW, proportion_confint 8 | 9 | RANK_MAX_QUERY = 2.315534 10 | R1_MIN_QUERY = 0.749118 11 | R5_MIN_QUERY = 0.913082 12 | 13 | RANK_MAX_QUERY_FUZZY = 2.619402 14 | R1_MIN_QUERY_FUZZY = 0.728343 15 | R5_MIN_QUERY_FUZZY = 0.902090 16 | 17 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost') 18 | API_VERSION = 'v1' 19 | 20 | 21 | def get_rank(ror_id, items): 22 | for i, item in enumerate(items): 23 | if ror_id == item['id']: 24 | return i + 1 25 | return 21 26 | 27 | 28 | def mean_rank(ranks): 29 | return sum(ranks) / len(ranks), DescrStatsW(ranks).tconfint_mean() 30 | 31 | 32 | def recall_at_n(ranks, n): 33 | s = len([r for r in ranks if r <= n]) 34 | a = len(ranks) 35 | return s / a, proportion_confint(s, a) 36 | 37 | 38 | class SearchTestCase(SimpleTestCase): 39 | def set_up(self, param, rank_max, r1_min, r5_min): 40 | with open( 41 | os.path.join(os.path.dirname(__file__), 42 | 'data/dataset_names.json')) as names_file: 43 | data = json.load(names_file) 44 | data_query = [] 45 | for i, d in enumerate(data): 46 | data_query.append((d, search(API_URL, param, d['affiliation'], API_VERSION))) 47 | if i % 100 == 0: 48 | print('Progress: {0:.2f}%'.format(100 * i / len(data))) 49 | self.ranks = [ 50 | get_rank(case['ror-id'], items) for case, items in data_query 51 | ] 52 | self.rank_max = rank_max 53 | self.r1_min = r1_min 54 | self.r5_min = r5_min 55 | 56 | def validate(self, name): 57 | mean, ci = mean_rank(self.ranks) 58 | print('\nMean rank for {}: {} {}'.format(name, mean, ci)) 59 | self.assertTrue(mean <= self.rank_max) 60 | 61 | recall_1, ci = recall_at_n(self.ranks, 1) 62 | print('Recall@1 for {}: {} {}'.format(name, recall_1, ci)) 63 | self.assertTrue(recall_1 >= self.r1_min) 64 | 65 | recall_5, ci = recall_at_n(self.ranks, 5) 66 | print('Recall@5 for {}: {} {}'.format(name, recall_5, ci)) 67 | self.assertTrue(recall_5 >= self.r5_min) 68 | 69 | 70 | class QueryFuzzySearchTestCase(SearchTestCase): 71 | def setUp(self): 72 | self.param = 'query' 73 | with open( 74 | os.path.join(os.path.dirname(__file__), 75 | 'data/dataset_names.json')) as names_file: 76 | data = json.load(names_file) 77 | data_query = [] 78 | for i, d in enumerate(data): 79 | data_query.append((d, 80 | search(API_URL, 81 | 'query', 82 | re.sub('([^ ])(?= |$)', r'\g<1>~', 83 | escape_query(d['affiliation'])), 84 | API_VERSION, 85 | escape=False))) 86 | if i % 100 == 0: 87 | print('Progress: {0:.2f}%'.format(100 * i / len(data))) 88 | self.ranks = [ 89 | get_rank(case['ror-id'], items) for case, items in data_query 90 | ] 91 | self.rank_max = RANK_MAX_QUERY_FUZZY 92 | self.r1_min = R1_MIN_QUERY_FUZZY 93 | self.r5_min = R5_MIN_QUERY_FUZZY 94 | 95 | def test_search_query(self): 96 | self.validate('query (fuzzy)') 97 | 98 | 99 | class QuerySearchTestCase(SearchTestCase): 100 | def setUp(self): 101 | self.set_up('query', RANK_MAX_QUERY, R1_MIN_QUERY, R5_MIN_QUERY) 102 | 103 | def test_search_query(self): 104 | self.validate('query') 105 | -------------------------------------------------------------------------------- /rorapi/tests/tests_functional/tests_search_v2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | from .evaluation import search, escape_query 6 | from django.test import SimpleTestCase 7 | from statsmodels.stats.api import DescrStatsW, proportion_confint 8 | 9 | RANK_MAX_QUERY = 2.315534 10 | R1_MIN_QUERY = 0.749118 11 | R5_MIN_QUERY = 0.913082 12 | 13 | RANK_MAX_QUERY_FUZZY = 2.619402 14 | R1_MIN_QUERY_FUZZY = 0.728343 15 | R5_MIN_QUERY_FUZZY = 0.902090 16 | 17 | API_URL = os.environ.get('ROR_BASE_URL', 'http://localhost') 18 | API_VERSION = 'v2' 19 | 20 | 21 | def get_rank(ror_id, items): 22 | for i, item in enumerate(items): 23 | if ror_id == item['id']: 24 | return i + 1 25 | return 21 26 | 27 | 28 | def mean_rank(ranks): 29 | return sum(ranks) / len(ranks), DescrStatsW(ranks).tconfint_mean() 30 | 31 | 32 | def recall_at_n(ranks, n): 33 | s = len([r for r in ranks if r <= n]) 34 | a = len(ranks) 35 | return s / a, proportion_confint(s, a) 36 | 37 | 38 | class SearchTestCase(SimpleTestCase): 39 | def set_up(self, param, rank_max, r1_min, r5_min): 40 | with open( 41 | os.path.join(os.path.dirname(__file__), 42 | 'data/dataset_names.json')) as names_file: 43 | data = json.load(names_file) 44 | data_query = [] 45 | for i, d in enumerate(data): 46 | data_query.append((d, search(API_URL, param, d['affiliation'], API_VERSION))) 47 | if i % 100 == 0: 48 | print('Progress: {0:.2f}%'.format(100 * i / len(data))) 49 | self.ranks = [ 50 | get_rank(case['ror-id'], items) for case, items in data_query 51 | ] 52 | self.rank_max = rank_max 53 | self.r1_min = r1_min 54 | self.r5_min = r5_min 55 | 56 | def validate(self, name): 57 | mean, ci = mean_rank(self.ranks) 58 | print('\nMean rank for {}: {} {}'.format(name, mean, ci)) 59 | self.assertTrue(mean <= self.rank_max) 60 | 61 | recall_1, ci = recall_at_n(self.ranks, 1) 62 | print('Recall@1 for {}: {} {}'.format(name, recall_1, ci)) 63 | self.assertTrue(recall_1 >= self.r1_min) 64 | 65 | recall_5, ci = recall_at_n(self.ranks, 5) 66 | print('Recall@5 for {}: {} {}'.format(name, recall_5, ci)) 67 | self.assertTrue(recall_5 >= self.r5_min) 68 | 69 | 70 | class QueryFuzzySearchTestCase(SearchTestCase): 71 | def setUp(self): 72 | self.param = 'query' 73 | with open( 74 | os.path.join(os.path.dirname(__file__), 75 | 'data/dataset_names.json')) as names_file: 76 | data = json.load(names_file) 77 | data_query = [] 78 | for i, d in enumerate(data): 79 | data_query.append((d, 80 | search(API_URL, 81 | 'query', 82 | re.sub('([^ ])(?= |$)', r'\g<1>~', 83 | escape_query(d['affiliation'])), 84 | API_VERSION, 85 | escape=False))) 86 | if i % 100 == 0: 87 | print('Progress: {0:.2f}%'.format(100 * i / len(data))) 88 | self.ranks = [ 89 | get_rank(case['ror-id'], items) for case, items in data_query 90 | ] 91 | self.rank_max = RANK_MAX_QUERY_FUZZY 92 | self.r1_min = R1_MIN_QUERY_FUZZY 93 | self.r5_min = R5_MIN_QUERY_FUZZY 94 | 95 | def test_search_query(self): 96 | self.validate('query (fuzzy)') 97 | 98 | 99 | class QuerySearchTestCase(SearchTestCase): 100 | def setUp(self): 101 | self.set_up('query', RANK_MAX_QUERY, R1_MIN_QUERY, R5_MIN_QUERY) 102 | 103 | def test_search_query(self): 104 | self.validate('query') 105 | -------------------------------------------------------------------------------- /rorapi/tests/tests_integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/tests/tests_integration/__init__.py -------------------------------------------------------------------------------- /rorapi/tests/tests_integration/tests_matching_v1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import requests 4 | 5 | from django.test import SimpleTestCase 6 | 7 | BASE_URL = '{}/v1/organizations'.format( 8 | os.environ.get('ROR_BASE_URL', 'http://localhost')) 9 | 10 | 11 | class APIMatchingTestCase(SimpleTestCase): 12 | def test_query_organizations(self): 13 | output = requests.get(BASE_URL, { 14 | 'affiliation': 'university of warsaw' 15 | }).json() 16 | 17 | self.assertTrue(output['number_of_results'] > 1) 18 | 19 | for k in ['number_of_results', 'items']: 20 | self.assertTrue(k in output) 21 | 22 | prev = 1 23 | for i in output['items']: 24 | for k in [ 25 | 'substring', 'score', 'matching_type', 'chosen', 26 | 'organization' 27 | ]: 28 | self.assertTrue(k in i) 29 | 30 | for k in ['id', 'name']: 31 | self.assertTrue(k in i.get('organization')) 32 | self.assertIsNotNone( 33 | re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', 34 | i.get('organization').get('id'))) 35 | 36 | self.assertEqual(i.get('substring'), 'university of warsaw') 37 | self.assertTrue(i.get('score') > 0) 38 | self.assertTrue(i.get('score') <= 1) 39 | self.assertTrue(i.get('score') <= prev) 40 | prev = i.get('score') 41 | self.assertTrue( 42 | i.get('matching_type') in 43 | ['PHRASE', 'ACRONYM', 'FUZZY', 'HEURISTICS', 'COMMON TERMS', 'EXACT']) 44 | -------------------------------------------------------------------------------- /rorapi/tests/tests_integration/tests_matching_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import requests 4 | 5 | from django.test import SimpleTestCase 6 | 7 | BASE_URL = '{}/v2/organizations'.format( 8 | os.environ.get('ROR_BASE_URL', 'http://localhost')) 9 | 10 | 11 | class APIMatchingTestCase(SimpleTestCase): 12 | def test_query_organizations(self): 13 | output = requests.get(BASE_URL, { 14 | 'affiliation': 'university of warsaw' 15 | }).json() 16 | 17 | self.assertTrue(output['number_of_results'] > 1) 18 | 19 | for k in ['number_of_results', 'items']: 20 | self.assertTrue(k in output) 21 | 22 | prev = 1 23 | for i in output['items']: 24 | for k in [ 25 | 'substring', 'score', 'matching_type', 'chosen', 26 | 'organization' 27 | ]: 28 | self.assertTrue(k in i) 29 | 30 | for k in ['id', 'names']: 31 | self.assertTrue(k in i.get('organization')) 32 | self.assertIsNotNone( 33 | re.match(r'https:\/\/ror\.org\/0\w{6}\d{2}', 34 | i.get('organization').get('id'))) 35 | 36 | self.assertEqual(i.get('substring'), 'university of warsaw') 37 | self.assertTrue(i.get('score') > 0) 38 | self.assertTrue(i.get('score') <= 1) 39 | self.assertTrue(i.get('score') <= prev) 40 | prev = i.get('score') 41 | self.assertTrue( 42 | i.get('matching_type') in 43 | ['PHRASE', 'ACRONYM', 'FUZZY', 'HEURISTICS', 'COMMON TERMS', 'EXACT']) 44 | -------------------------------------------------------------------------------- /rorapi/tests/tests_integration/tests_search_v1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | from django.test import SimpleTestCase 5 | 6 | BASE_URL = '{}/v1/organizations'.format( 7 | os.environ.get('ROR_BASE_URL', 'http://localhost')) 8 | 9 | 10 | class QueryTestCase(SimpleTestCase): 11 | def test_exact(self): 12 | items = requests.get(BASE_URL, { 13 | 'query': 'Centro Universitário do Maranhão' 14 | }).json() 15 | self.assertTrue(items['number_of_results'] > 0) 16 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 17 | 18 | items = requests.get(BASE_URL, { 19 | 'query': 'Julius-Maximilians-Universität Würzburg' 20 | }).json() 21 | self.assertTrue(items['number_of_results'] > 0) 22 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 23 | 24 | def test_lowercase(self): 25 | items = requests.get(BASE_URL, { 26 | 'query': 'centro universitário do maranhão' 27 | }).json() 28 | self.assertTrue(items['number_of_results'] > 0) 29 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 30 | 31 | items = requests.get(BASE_URL, { 32 | 'query': 'julius-maximilians-universität würzburg' 33 | }).json() 34 | self.assertTrue(items['number_of_results'] > 0) 35 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 36 | 37 | def test_accents_stripped(self): 38 | items = requests.get(BASE_URL, { 39 | 'query': 'centro universitario do maranhao' 40 | }).json() 41 | self.assertTrue(items['number_of_results'] > 0) 42 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 43 | 44 | items = requests.get(BASE_URL, { 45 | 'query': 'julius-maximilians-universitat wurzburg' 46 | }).json() 47 | self.assertTrue(items['number_of_results'] > 0) 48 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 49 | 50 | def test_extra_word(self): 51 | items = requests.get(BASE_URL, { 52 | 'query': 'Centro Universitário do Maranhão School' 53 | }).json() 54 | self.assertTrue(items['number_of_results'] > 0) 55 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 56 | 57 | items = requests.get( 58 | BASE_URL, { 59 | 'query': 'Julius-Maximilians-Universität Würzburg School' 60 | }).json() 61 | self.assertTrue(items['number_of_results'] > 0) 62 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 63 | 64 | 65 | class QueryFuzzyTestCase(SimpleTestCase): 66 | def test_exact(self): 67 | items = requests.get(BASE_URL, { 68 | 'query': 'Centro~ Universitário~ do~ Maranhão~' 69 | }).json() 70 | self.assertTrue(items['number_of_results'] > 0) 71 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 72 | 73 | items = requests.get( 74 | BASE_URL, { 75 | 'query': 'Julius~ Maximilians~ Universität~ Würzburg~' 76 | }).json() 77 | self.assertTrue(items['number_of_results'] > 0) 78 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 79 | 80 | def test_lowercase(self): 81 | items = requests.get(BASE_URL, { 82 | 'query': 'centro~ universitário~ do~ maranhão~' 83 | }).json() 84 | self.assertTrue(items['number_of_results'] > 0) 85 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 86 | 87 | items = requests.get( 88 | BASE_URL, { 89 | 'query': 'julius~ maximilians~ universität~ würzburg~' 90 | }).json() 91 | self.assertTrue(items['number_of_results'] > 0) 92 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 93 | 94 | def test_accents_stripped(self): 95 | items = requests.get(BASE_URL, { 96 | 'query': 'centro~ universitario~ do~ maranhao~' 97 | }).json() 98 | self.assertTrue(items['number_of_results'] > 0) 99 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 100 | 101 | items = requests.get( 102 | BASE_URL, { 103 | 'query': 'julius~ maximilians~ universitat~ wurzburg~' 104 | }).json() 105 | self.assertTrue(items['number_of_results'] > 0) 106 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 107 | 108 | def test_typos(self): 109 | items = requests.get(BASE_URL, { 110 | 'query': 'centre~ universitario~ do~ marahao~' 111 | }).json() 112 | self.assertTrue(items['number_of_results'] > 0) 113 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 114 | 115 | items = requests.get( 116 | BASE_URL, { 117 | 'query': 'julius~ maximilian~ universitat~ wuerzburg~' 118 | }).json() 119 | self.assertTrue(items['number_of_results'] > 0) 120 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57') 121 | 122 | def test_extra_word(self): 123 | items = requests.get( 124 | BASE_URL, { 125 | 'query': 'Centro~ Universitário~ do~ Maranhão~ School~' 126 | }).json() 127 | self.assertTrue(items['number_of_results'] > 0) 128 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 129 | 130 | items = requests.get( 131 | BASE_URL, { 132 | 'query': 'Julius~ Maximilians~ Universität~ Würzburg~ School~' 133 | }).json() 134 | self.assertTrue(items['number_of_results'] > 0) 135 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 136 | -------------------------------------------------------------------------------- /rorapi/tests/tests_integration/tests_search_v2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | from django.test import SimpleTestCase 5 | 6 | BASE_URL = '{}/v2/organizations'.format( 7 | os.environ.get('ROR_BASE_URL', 'http://localhost')) 8 | 9 | 10 | class QueryTestCase(SimpleTestCase): 11 | def test_exact(self): 12 | items = requests.get(BASE_URL, { 13 | 'query': 'Centro Universitário do Maranhão' 14 | }).json() 15 | self.assertTrue(items['number_of_results'] > 0) 16 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 17 | 18 | items = requests.get(BASE_URL, { 19 | 'query': 'Julius-Maximilians-Universität Würzburg' 20 | }).json() 21 | self.assertTrue(items['number_of_results'] > 0) 22 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 23 | 24 | def test_lowercase(self): 25 | items = requests.get(BASE_URL, { 26 | 'query': 'centro universitário do maranhão' 27 | }).json() 28 | self.assertTrue(items['number_of_results'] > 0) 29 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 30 | 31 | items = requests.get(BASE_URL, { 32 | 'query': 'julius-maximilians-universität würzburg' 33 | }).json() 34 | self.assertTrue(items['number_of_results'] > 0) 35 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 36 | 37 | def test_accents_stripped(self): 38 | items = requests.get(BASE_URL, { 39 | 'query': 'centro universitario do maranhao' 40 | }).json() 41 | self.assertTrue(items['number_of_results'] > 0) 42 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 43 | 44 | items = requests.get(BASE_URL, { 45 | 'query': 'julius-maximilians-universitat wurzburg' 46 | }).json() 47 | self.assertTrue(items['number_of_results'] > 0) 48 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 49 | 50 | def test_extra_word(self): 51 | items = requests.get(BASE_URL, { 52 | 'query': 'Centro Universitário do Maranhão School' 53 | }).json() 54 | self.assertTrue(items['number_of_results'] > 0) 55 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 56 | 57 | items = requests.get( 58 | BASE_URL, { 59 | 'query': 'Julius-Maximilians-Universität Würzburg School' 60 | }).json() 61 | self.assertTrue(items['number_of_results'] > 0) 62 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 63 | 64 | 65 | class QueryFuzzyTestCase(SimpleTestCase): 66 | def test_exact(self): 67 | items = requests.get(BASE_URL, { 68 | 'query': 'Centro~ Universitário~ do~ Maranhão~' 69 | }).json() 70 | self.assertTrue(items['number_of_results'] > 0) 71 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 72 | 73 | items = requests.get( 74 | BASE_URL, { 75 | 'query': 'Julius~ Maximilians~ Universität~ Würzburg~' 76 | }).json() 77 | self.assertTrue(items['number_of_results'] > 0) 78 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 79 | 80 | def test_lowercase(self): 81 | items = requests.get(BASE_URL, { 82 | 'query': 'centro~ universitário~ do~ maranhão~' 83 | }).json() 84 | self.assertTrue(items['number_of_results'] > 0) 85 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 86 | 87 | items = requests.get( 88 | BASE_URL, { 89 | 'query': 'julius~ maximilians~ universität~ würzburg~' 90 | }).json() 91 | self.assertTrue(items['number_of_results'] > 0) 92 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 93 | 94 | def test_accents_stripped(self): 95 | items = requests.get(BASE_URL, { 96 | 'query': 'centro~ universitario~ do~ maranhao~' 97 | }).json() 98 | self.assertTrue(items['number_of_results'] > 0) 99 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 100 | 101 | items = requests.get( 102 | BASE_URL, { 103 | 'query': 'julius~ maximilians~ universitat~ wurzburg~' 104 | }).json() 105 | self.assertTrue(items['number_of_results'] > 0) 106 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 107 | 108 | def test_typos(self): 109 | items = requests.get(BASE_URL, { 110 | 'query': 'centre~ universitario~ do~ marahao~' 111 | }).json() 112 | self.assertTrue(items['number_of_results'] > 0) 113 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 114 | 115 | items = requests.get( 116 | BASE_URL, { 117 | 'query': 'julius~ maximilian~ universitat~ wuerzburg~' 118 | }).json() 119 | self.assertTrue(items['number_of_results'] > 0) 120 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/03pvr2g57') 121 | 122 | def test_extra_word(self): 123 | items = requests.get( 124 | BASE_URL, { 125 | 'query': 'Centro~ Universitário~ do~ Maranhão~ School~' 126 | }).json() 127 | self.assertTrue(items['number_of_results'] > 0) 128 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/044g0p936') 129 | 130 | items = requests.get( 131 | BASE_URL, { 132 | 'query': 'Julius~ Maximilians~ Universität~ Würzburg~ School~' 133 | }).json() 134 | self.assertTrue(items['number_of_results'] > 0) 135 | self.assertEquals(items['items'][0]['id'], 'https://ror.org/00fbnyb24') 136 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/tests/tests_unit/__init__.py -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_address.json: -------------------------------------------------------------------------------- 1 | { 2 | "address": { 3 | "lat": 37.80437, 4 | "lng": -122.2708, 5 | "state": null, 6 | "state_code": null, 7 | "country_geonames_id": 6252001, 8 | "city": "Oakland", 9 | "geonames_city": { 10 | "id": "5378538", 11 | "city": "Oakland", 12 | "geonames_admin1": { 13 | "name": "California", 14 | "ascii_name": "California", 15 | "id": 5332921, 16 | "code": "US.CA" 17 | }, 18 | "geonames_admin2": { 19 | "name": "Alameda", 20 | "id": 5322745, 21 | "ascii_name": "Alameda", 22 | "code": "US.CA.001" 23 | }, 24 | "nuts_level1": { 25 | "name": null, 26 | "code": null 27 | }, 28 | "nuts_level2": { 29 | "name": null, 30 | "code": null 31 | }, 32 | "nuts_level3": { 33 | "name": null, 34 | "code": null 35 | } 36 | } 37 | }, 38 | "country": { 39 | "country_code": "US", 40 | "country_name": "United States" 41 | } 42 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_address_empty.json: -------------------------------------------------------------------------------- 1 | { 2 | "msg": "Expecting value", 3 | "doc": "", 4 | "pos": 0, 5 | "lineno": 1, 6 | "colno": 1 7 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_create_valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "locations": [ 3 | { 4 | "geonames_id": 2661552, 5 | "geonames_details": { 6 | "continent_code": "EU", 7 | "contient_name": "Europe", 8 | "country_code": "CH", 9 | "country_name": "Switzerland", 10 | "country_subdivision_code": "BE", 11 | "country_subdivision_name": "Bern", 12 | "lat": 46.94809, 13 | "lng": 7.44744, 14 | "name": "Bern" 15 | } 16 | } 17 | ], 18 | "established": null, 19 | "external_ids": [ 20 | { 21 | "type": "grid", 22 | "all": [ 23 | "grid.426225.5" 24 | ], 25 | "preferred": "grid.426225.5" 26 | } 27 | ], 28 | "id": "https://ror.org/00wz65j53", 29 | "domains": ["wisc.edu"], 30 | "links": [ 31 | { 32 | "type": "website", 33 | "value": "https://www.jdsu.com" 34 | } 35 | ], 36 | "names": [ 37 | { 38 | "value": "JDSU (Switzerland)", 39 | "types": [ 40 | "ror_display", 41 | "label" 42 | ], 43 | "lang": null 44 | } 45 | ], 46 | "relationships": [ 47 | { 48 | "label": "JDSU (United States)", 49 | "type": "parent", 50 | "id": "https://ror.org/01a5v8x09" 51 | }, 52 | { 53 | "label": "Viavi Solutions (United States)", 54 | "type": "successor", 55 | "id": "https://ror.org/059a9e323" 56 | } 57 | ], 58 | "status": "inactive", 59 | "types": [ 60 | "company" 61 | ], 62 | "admin": { 63 | "created": { 64 | "date": "2023-07-28", 65 | "schema_version": "1.0" 66 | }, 67 | "last_modified": { 68 | "date": "2023-07-28", 69 | "schema_version": "2.0" 70 | } 71 | } 72 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_empty_es7.json: -------------------------------------------------------------------------------- 1 | { 2 | "took" : 32, 3 | "timed_out" : false, 4 | "_shards" : { 5 | "total" : 1, 6 | "successful" : 1, 7 | "skipped" : 0, 8 | "failed" : 0 9 | }, 10 | "hits" : { 11 | "total" : { 12 | "value" : 0, 13 | "relation" : "eq" 14 | }, 15 | "max_score" : null, 16 | "hits" : [ ] 17 | }, 18 | "aggregations" : { 19 | "types" : { 20 | "doc_count_error_upper_bound" : 0, 21 | "sum_other_doc_count" : 0, 22 | "buckets" : [ ] 23 | }, 24 | "statuses" : { 25 | "doc_count_error_upper_bound" : 0, 26 | "sum_other_doc_count" : 0, 27 | "buckets" : [ ] 28 | }, 29 | "countries" : { 30 | "doc_count_error_upper_bound" : 0, 31 | "sum_other_doc_count" : 0, 32 | "buckets" : [ ] 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_new_record_invalid_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "locations": [ 3 | { 4 | "geonames_id": 2661552, 5 | "geonames_details": { 6 | "continent_code": "EU", 7 | "contient_name": "Europe", 8 | "country_code": "CH", 9 | "country_name": "Switzerland", 10 | "country_subdivision_code": "BE", 11 | "country_subdivision_name": "Bern", 12 | "lat": 46.94809, 13 | "lng": 7.44744, 14 | "name": "Bern" 15 | } 16 | } 17 | ], 18 | "names": [ 19 | { 20 | "value": "JDSU (Switzerland)", 21 | "types": [ 22 | "ror_display", 23 | "label" 24 | ], 25 | "lang": null 26 | } 27 | ], 28 | "status": "active", 29 | "types": [ 30 | "company" 31 | ], 32 | "foo": "bar" 33 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_new_record_valid_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "locations": [ 3 | { 4 | "geonames_id": 2661552, 5 | "geonames_details": { 6 | "continent_code": "EU", 7 | "contient_name": "Europe", 8 | "country_code": "CH", 9 | "country_name": "Switzerland", 10 | "country_subdivision_code": "BE", 11 | "country_subdivision_name": "Bern", 12 | "lat": 46.94809, 13 | "lng": 7.44744, 14 | "name": "Bern" 15 | } 16 | } 17 | ], 18 | "names": [ 19 | { 20 | "value": "JDSU (Switzerland)", 21 | "types": [ 22 | "ror_display", 23 | "label" 24 | ], 25 | "lang": null 26 | } 27 | ], 28 | "status": "active", 29 | "types": [ 30 | "company" 31 | ] 32 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_retrieve_es7.json: -------------------------------------------------------------------------------- 1 | { 2 | "took" : 7, 3 | "timed_out" : false, 4 | "_shards" : { 5 | "total" : 1, 6 | "successful" : 1, 7 | "skipped" : 0, 8 | "failed" : 0 9 | }, 10 | "hits" : { 11 | "total" : { 12 | "value" : 1, 13 | "relation" : "eq" 14 | }, 15 | "max_score" : 11.155889, 16 | "hits" : [ 17 | { 18 | "_index" : "organizations", 19 | "_type" : "_doc", 20 | "_id" : "https://ror.org/02atag894", 21 | "_score" : 11.155889, 22 | "_source" : { 23 | "id" : "https://ror.org/02atag894", 24 | "name" : "Office of Nuclear Physics", 25 | "types" : [ 26 | "Government" 27 | ], 28 | "links" : [ 29 | "https://science.osti.gov/np" 30 | ], 31 | "aliases" : [ ], 32 | "acronyms" : [ 33 | "NP" 34 | ], 35 | "status" : "active", 36 | "wikipedia_url" : "", 37 | "labels" : [ ], 38 | "email_address" : null, 39 | "ip_addresses" : [ ], 40 | "established" : 1996, 41 | "country" : { 42 | "country_code" : "US", 43 | "country_name" : "United States" 44 | }, 45 | "relationships" : [ 46 | { 47 | "type" : "Parent", 48 | "label" : "Office of Science", 49 | "id" : "https://ror.org/00mmn6b08" 50 | } 51 | ], 52 | "addresses" : [ 53 | { 54 | "line" : null, 55 | "lat" : 38.88758, 56 | "lng" : -77.025728, 57 | "postcode" : null, 58 | "primary" : false, 59 | "city" : "Washington D.C.", 60 | "state" : "District of Columbia", 61 | "state_code" : "US-DC", 62 | "country_geonames_id" : 6252001, 63 | "geonames_city" : { 64 | "id" : 4140963, 65 | "city" : "Washington, D.C.", 66 | "nuts_level1" : { 67 | "code" : null, 68 | "name" : null 69 | }, 70 | "nuts_level2" : { 71 | "code" : null, 72 | "name" : null 73 | }, 74 | "nuts_level3" : { 75 | "code" : null, 76 | "name" : null 77 | }, 78 | "geonames_admin1" : { 79 | "id" : 4138106, 80 | "name" : "Washington, D.C.", 81 | "ascii_name" : "Washington, D.C.", 82 | "code" : "US.DC" 83 | }, 84 | "geonames_admin2" : { 85 | "id" : 4140987, 86 | "name" : "Washington County", 87 | "ascii_name" : "Washington County", 88 | "code" : "US.DC.001" 89 | }, 90 | "license" : { 91 | "attribution" : "Data from geonames.org under a CC-BY 3.0 license", 92 | "license" : "http://creativecommons.org/licenses/by/3.0/" 93 | } 94 | } 95 | } 96 | ], 97 | "external_ids" : { 98 | "ISNI" : { 99 | "preferred" : null, 100 | "all" : [ 101 | "0000 0004 5897 7463" 102 | ] 103 | }, 104 | "FundRef" : { 105 | "preferred" : null, 106 | "all" : [ 107 | "100006209" 108 | ] 109 | }, 110 | "Wikidata" : { 111 | "preferred" : null, 112 | "all" : [ 113 | "Q30296535" 114 | ] 115 | }, 116 | "GRID" : { 117 | "preferred" : "grid.453025.5", 118 | "all" : "grid.453025.5" 119 | } 120 | } 121 | } 122 | } 123 | ] 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_data_retrieve_es7_v2.json: -------------------------------------------------------------------------------- 1 | { 2 | "took": 85, 3 | "timed_out": false, 4 | "_shards": { 5 | "total": 1, 6 | "successful": 1, 7 | "skipped": 0, 8 | "failed": 0 9 | }, 10 | "hits": { 11 | "total": { 12 | "value": 1, 13 | "relation": "eq" 14 | }, 15 | "max_score": 11.164284, 16 | "hits": [ 17 | { 18 | "_index": "organizations-v2", 19 | "_type": "_doc", 20 | "_id": "https://ror.org/02atag894", 21 | "_score": 11.164284, 22 | "_source": { 23 | "admin": { 24 | "created": { 25 | "date": "2023-07-28", 26 | "schema_version": "1.0" 27 | }, 28 | "last_modified": { 29 | "date": "2023-07-28", 30 | "schema_version": "2.0" 31 | } 32 | }, 33 | "domains": [], 34 | "established": 1996, 35 | "external_ids": [ 36 | { 37 | "all": [ 38 | "0000 0004 5897 7463" 39 | ], 40 | "preferred": null, 41 | "type": "isni" 42 | }, 43 | { 44 | "all": [ 45 | "100006209" 46 | ], 47 | "preferred": null, 48 | "type": "fundref" 49 | }, 50 | { 51 | "all": [ 52 | "Q30296535" 53 | ], 54 | "preferred": null, 55 | "type": "wikidata" 56 | }, 57 | { 58 | "all": [ 59 | "grid.453025.5" 60 | ], 61 | "preferred": "grid.453025.5", 62 | "type": "grid" 63 | } 64 | ], 65 | "id": "https://ror.org/02atag894", 66 | "links": [ 67 | { 68 | "type": "website", 69 | "value": "https://science.osti.gov/np" 70 | } 71 | ], 72 | "locations": [ 73 | { 74 | "geonames_details": { 75 | "continent_code": "NA", 76 | "continent_name": "North America", 77 | "country_code": "US", 78 | "country_name": "United States", 79 | "country_subdivision_code": "DC", 80 | "country_subdivision_name": "District of Columbia", 81 | "lat": 38.88758, 82 | "lng": -77.025728, 83 | "name": "Washington, D.C." 84 | }, 85 | "geonames_id": 4140963 86 | } 87 | ], 88 | "names": [ 89 | { 90 | "lang": null, 91 | "types": [ 92 | "ror_display", 93 | "label" 94 | ], 95 | "value": "Office of Nuclear Physics" 96 | }, 97 | { 98 | "lang": null, 99 | "types": [ 100 | "acronym" 101 | ], 102 | "value": "NP" 103 | } 104 | ], 105 | "relationships": [ 106 | { 107 | "id": "https://ror.org/00mmn6b08", 108 | "label": "Office of Science", 109 | "type": "parent" 110 | } 111 | ], 112 | "status": "active", 113 | "types": [ 114 | "government" 115 | ] 116 | } 117 | } 118 | ] 119 | } 120 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_update_valid.json: -------------------------------------------------------------------------------- 1 | { 2 | "locations": [ 3 | { 4 | "geonames_id": 2661552, 5 | "geonames_details": { 6 | "continent_code": "EU", 7 | "contient_name": "Europe", 8 | "country_code": "CH", 9 | "country_name": "Switzerland", 10 | "country_subdivision_code": "BE", 11 | "country_subdivision_name": "Bern", 12 | "lat": 46.94809, 13 | "lng": 7.44744, 14 | "name": "Bern" 15 | } 16 | } 17 | ], 18 | "established": null, 19 | "external_ids": [ 20 | { 21 | "type": "grid", 22 | "all": [ 23 | "grid.426225.5" 24 | ], 25 | "preferred": "grid.426225.5" 26 | } 27 | ], 28 | "id": "https://ror.org/00wz65j53", 29 | "domains": ["wisc.edu"], 30 | "links": [ 31 | { 32 | "type": "website", 33 | "value": "https://www.jdsu.com" 34 | } 35 | ], 36 | "names": [ 37 | { 38 | "value": "JDSU (Switzerland)", 39 | "types": [ 40 | "ror_display", 41 | "label" 42 | ], 43 | "lang": null 44 | } 45 | ], 46 | "relationships": [ 47 | { 48 | "label": "JDSU (United States)", 49 | "type": "parent", 50 | "id": "https://ror.org/01a5v8x09" 51 | }, 52 | { 53 | "label": "Viavi Solutions (United States)", 54 | "type": "successor", 55 | "id": "https://ror.org/059a9e323" 56 | } 57 | ], 58 | "status": "inactive", 59 | "types": [ 60 | "company" 61 | ], 62 | "admin": { 63 | "created": { 64 | "date": "2023-07-28", 65 | "schema_version": "1.0" 66 | }, 67 | "last_modified": { 68 | "date": "2023-07-28", 69 | "schema_version": "2.0" 70 | } 71 | } 72 | } -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/data/test_upload_csv.csv: -------------------------------------------------------------------------------- 1 | html_url,id,names.types.ror_display,status,types,names.types.alias,names.types.label,names.types.acronym,links.type.website,links.type.wikipedia,domains,established,external_ids.type.fundref.all,external_ids.type.fundref.preferred,external_ids.type.grid.all,external_ids.type.grid.preferred,external_ids.type.isni.all,external_ids.type.isni.preferred,external_ids.type.wikidata.all,external_ids.type.wikidata.preferred,city,country,locations.geonames_id 2 | https://github.com/ror-community/ror-updates/issues/9185,,Jizzakh branch of the National University of Uzbekistan named after Mirzo Ulugbek,active,Education,Jizzakh branch of the National University of Uzbekistan; Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali; Джизакский филиал Национального университета Узбекистана имени Мирзо Улугбека,Mirzo Ulug`bek nomidagi O`zbekiston milliy universiteti Jizzax filiali*Uzbek,,https://jbnuu.uz,https://uz.wikipedia.org/wiki/O%CA%BBzbekiston_milliy_universitetining_Jizzax_filiali,,2019,,,,,,,Q72342707,Q72342707,Jizzakh,Uzbekistan,1513886 3 | https://github.com/ror-community/ror-updates/issues/9389,,Znanstveno-raziskovalno središče Koper,active,Facility; Government,SRC Koper; ZRS Koper;,Science and Research Centre of Koper*English; Centro di ricerche scientifiche Capodistria*Italian,,https://www.zrs-kp.si;,,,,,,,,0000 0004 0398 0403,0000 0004 0398 0403,Q49569044,Q49569044,Koper,Slovenia,3197753 -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/tests_client.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from rorapi.v2.models import Client 3 | 4 | class ClientTests(TestCase): 5 | def test_client_registration(self): 6 | client = Client.objects.create(email='test@example.com') 7 | self.assertIsNotNone(client.client_id) -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/tests_es_utils_v1.py: -------------------------------------------------------------------------------- 1 | from django.test import SimpleTestCase 2 | from rorapi.common.es_utils import ESQueryBuilder 3 | 4 | class QueryBuilderTestCaseV1(SimpleTestCase): 5 | V1_VERSION = 'v1' 6 | def test_id_query(self): 7 | qb = ESQueryBuilder(self.V1_VERSION) 8 | qb.add_id_query('ror-id') 9 | 10 | self.assertEqual(qb.get_query().to_dict(), { 11 | 'query': { 12 | 'match': { 13 | 'id': { 14 | 'query': 'ror-id', 15 | 'operator': 'and' 16 | } 17 | } 18 | }, 19 | 'track_total_hits': True 20 | }) 21 | 22 | def test_match_all_query(self): 23 | qb = ESQueryBuilder(self.V1_VERSION) 24 | qb.add_match_all_query() 25 | 26 | self.assertEqual(qb.get_query().to_dict(), 27 | {'query': { 28 | 'match_all': {} 29 | }, 30 | 'track_total_hits': True 31 | }) 32 | 33 | def test_string_query(self): 34 | qb = ESQueryBuilder(self.V1_VERSION) 35 | qb.add_string_query('query terms') 36 | 37 | self.assertEqual( 38 | qb.get_query().to_dict(), { 39 | 'query': { 40 | 'nested': { 41 | 'path': 'names_ids', 42 | 'score_mode': 'max', 43 | 'query': { 44 | 'query_string': { 45 | 'query': 'query terms', 46 | 'fuzzy_max_expansions': 1 47 | } 48 | } 49 | } 50 | }, 51 | 'track_total_hits': True 52 | }) 53 | def test_string_query_advanced(self): 54 | qb = ESQueryBuilder(self.V1_VERSION) 55 | qb.add_string_query_advanced('query terms') 56 | 57 | self.assertEqual( 58 | qb.get_query().to_dict(), { 59 | 'query': { 60 | 'bool': { 61 | 'must': [{ 62 | 'query_string': { 63 | 'query': 'query terms', 64 | 'default_field': '*', 65 | 'default_operator':'and', 66 | 'fuzzy_max_expansions': 1 67 | } 68 | }] 69 | } 70 | }, 71 | 'track_total_hits': True 72 | }) 73 | 74 | def test_phrase_query(self): 75 | qb = ESQueryBuilder(self.V1_VERSION) 76 | qb.add_phrase_query(['f1', 'f2'], 'query terms') 77 | 78 | self.assertEqual( 79 | qb.get_query().to_dict(), { 80 | 'query': { 81 | 'dis_max': { 82 | 'queries': [{ 83 | 'match_phrase': { 84 | 'f1': 'query terms' 85 | } 86 | }, { 87 | 'match_phrase': { 88 | 'f2': 'query terms' 89 | } 90 | }] 91 | } 92 | }, 93 | 'track_total_hits': True 94 | }) 95 | 96 | def test_common_query(self): 97 | qb = ESQueryBuilder(self.V1_VERSION) 98 | qb.add_common_query(['f1', 'f2'], 'query terms') 99 | 100 | self.assertEqual( 101 | qb.get_query().to_dict(), { 102 | 'query': { 103 | 'dis_max': { 104 | 'queries': [{ 105 | 'common': { 106 | 'f1': { 107 | 'query': 'query terms', 108 | 'cutoff_frequency': 0.001 109 | } 110 | } 111 | }, { 112 | 'common': { 113 | 'f2': { 114 | 'query': 'query terms', 115 | 'cutoff_frequency': 0.001 116 | } 117 | } 118 | }] 119 | } 120 | }, 121 | 'track_total_hits': True 122 | }) 123 | 124 | def test_match_query(self): 125 | qb = ESQueryBuilder(self.V1_VERSION) 126 | qb.add_match_query('query terms') 127 | 128 | self.assertEqual(qb.get_query().to_dict(), 129 | {'query': { 130 | 'match': { 131 | 'acronyms': 'query terms' 132 | } 133 | }, 134 | 'track_total_hits': True 135 | }) 136 | 137 | def test_fuzzy_query(self): 138 | qb = ESQueryBuilder(self.V1_VERSION) 139 | qb.add_fuzzy_query(['f1', 'f2'], 'query terms') 140 | 141 | self.assertEqual( 142 | qb.get_query().to_dict(), { 143 | 'query': { 144 | 'dis_max': { 145 | 'queries': [{ 146 | 'match': { 147 | 'f1': { 148 | 'query': 'query terms', 149 | 'fuzziness': 'AUTO' 150 | } 151 | } 152 | }, { 153 | 'match': { 154 | 'f2': { 155 | 'query': 'query terms', 156 | 'fuzziness': 'AUTO' 157 | } 158 | } 159 | }] 160 | } 161 | }, 162 | 'track_total_hits': True 163 | }) 164 | 165 | def test_add_filters(self): 166 | qb = ESQueryBuilder(self.V1_VERSION) 167 | qb.add_match_all_query() 168 | qb.add_filters({'key1': ['val1'], 'k2': ['value2']}) 169 | 170 | self.assertEqual( 171 | qb.get_query().to_dict(), { 172 | 'query': { 173 | 'bool': { 174 | 'filter': [{ 175 | 'terms': { 176 | 'key1': ['val1'] 177 | } 178 | }, { 179 | 'terms': { 180 | 'k2': ['value2'] 181 | } 182 | }] 183 | } 184 | }, 185 | 'track_total_hits': True 186 | }) 187 | 188 | def test_add_aggregations(self): 189 | qb = ESQueryBuilder(self.V1_VERSION) 190 | qb.add_match_all_query() 191 | qb.add_aggregations([('countries', 'code'), ('types', 'type')]) 192 | 193 | self.assertEqual( 194 | qb.get_query().to_dict(), { 195 | 'query': { 196 | 'match_all': {} 197 | }, 198 | 'track_total_hits': True, 199 | 'aggs': { 200 | 'countries': { 201 | 'terms': { 202 | 'field': 'code', 203 | 'min_doc_count': 1, 204 | 'size': 10 205 | } 206 | }, 207 | 'types': { 208 | 'terms': { 209 | 'field': 'type', 210 | 'min_doc_count': 1, 211 | 'size': 10 212 | } 213 | } 214 | } 215 | }) 216 | 217 | def test_paginate(self): 218 | qb = ESQueryBuilder(self.V1_VERSION) 219 | qb.add_match_all_query() 220 | qb.paginate(10) 221 | 222 | self.assertEqual(qb.get_query().to_dict(), { 223 | 'query': { 224 | 'match_all': {} 225 | }, 226 | 'from': 180, 227 | 'size': 20, 228 | 'track_total_hits': True 229 | }) 230 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/tests_es_utils_v2.py: -------------------------------------------------------------------------------- 1 | from django.test import SimpleTestCase 2 | from rorapi.common.es_utils import ESQueryBuilder 3 | 4 | class QueryBuilderTestCaseV2(SimpleTestCase): 5 | V2_VERSION = 'v2' 6 | def test_id_query(self): 7 | qb = ESQueryBuilder(self.V2_VERSION) 8 | qb.add_id_query('ror-id') 9 | 10 | self.assertEqual(qb.get_query().to_dict(), { 11 | 'query': { 12 | 'match': { 13 | 'id': { 14 | 'query': 'ror-id', 15 | 'operator': 'and' 16 | } 17 | } 18 | }, 19 | 'track_total_hits': True 20 | }) 21 | 22 | def test_match_all_query(self): 23 | qb = ESQueryBuilder(self.V2_VERSION) 24 | qb.add_match_all_query() 25 | 26 | self.assertEqual(qb.get_query().to_dict(), 27 | {'query': { 28 | 'match_all': {} 29 | }, 30 | 'track_total_hits': True 31 | }) 32 | 33 | def test_string_query(self): 34 | qb = ESQueryBuilder(self.V2_VERSION) 35 | qb.add_string_query('query terms') 36 | 37 | self.assertEqual( 38 | qb.get_query().to_dict(), { 39 | 'query': { 40 | 'nested': { 41 | 'path': 'names_ids', 42 | 'score_mode': 'max', 43 | 'query': { 44 | 'query_string': { 45 | 'query': 'query terms', 46 | 'fuzzy_max_expansions': 1 47 | } 48 | } 49 | } 50 | }, 51 | 'track_total_hits': True 52 | }) 53 | def test_string_query_advanced(self): 54 | qb = ESQueryBuilder(self.V2_VERSION) 55 | qb.add_string_query_advanced('query terms') 56 | 57 | self.assertEqual( 58 | qb.get_query().to_dict(), { 59 | 'query': { 60 | 'bool': { 61 | 'must': [{ 62 | 'query_string': { 63 | 'query': 'query terms', 64 | 'default_field': '*', 65 | 'default_operator':'and', 66 | 'fuzzy_max_expansions': 1 67 | } 68 | }] 69 | } 70 | }, 71 | 'track_total_hits': True 72 | }) 73 | 74 | def test_phrase_query(self): 75 | qb = ESQueryBuilder(self.V2_VERSION) 76 | qb.add_phrase_query(['f1', 'f2'], 'query terms') 77 | 78 | self.assertEqual( 79 | qb.get_query().to_dict(), { 80 | 'query': { 81 | 'dis_max': { 82 | 'queries': [{ 83 | 'match_phrase': { 84 | 'f1': 'query terms' 85 | } 86 | }, { 87 | 'match_phrase': { 88 | 'f2': 'query terms' 89 | } 90 | }] 91 | } 92 | }, 93 | 'track_total_hits': True 94 | }) 95 | 96 | def test_common_query(self): 97 | qb = ESQueryBuilder(self.V2_VERSION) 98 | qb.add_common_query(['f1', 'f2'], 'query terms') 99 | 100 | self.assertEqual( 101 | qb.get_query().to_dict(), { 102 | 'query': { 103 | 'dis_max': { 104 | 'queries': [{ 105 | 'common': { 106 | 'f1': { 107 | 'query': 'query terms', 108 | 'cutoff_frequency': 0.001 109 | } 110 | } 111 | }, { 112 | 'common': { 113 | 'f2': { 114 | 'query': 'query terms', 115 | 'cutoff_frequency': 0.001 116 | } 117 | } 118 | }] 119 | } 120 | }, 121 | 'track_total_hits': True 122 | }) 123 | 124 | def test_match_query(self): 125 | qb = ESQueryBuilder(self.V2_VERSION) 126 | qb.add_match_query('query terms') 127 | 128 | self.assertEqual(qb.get_query().to_dict(), 129 | {'query': { 130 | 'match': { 131 | 'acronyms': 'query terms' 132 | } 133 | }, 134 | 'track_total_hits': True 135 | }) 136 | 137 | def test_fuzzy_query(self): 138 | qb = ESQueryBuilder(self.V2_VERSION) 139 | qb.add_fuzzy_query(['f1', 'f2'], 'query terms') 140 | 141 | self.assertEqual( 142 | qb.get_query().to_dict(), { 143 | 'query': { 144 | 'dis_max': { 145 | 'queries': [{ 146 | 'match': { 147 | 'f1': { 148 | 'query': 'query terms', 149 | 'fuzziness': 'AUTO' 150 | } 151 | } 152 | }, { 153 | 'match': { 154 | 'f2': { 155 | 'query': 'query terms', 156 | 'fuzziness': 'AUTO' 157 | } 158 | } 159 | }] 160 | } 161 | }, 162 | 'track_total_hits': True 163 | }) 164 | 165 | def test_add_filters(self): 166 | qb = ESQueryBuilder(self.V2_VERSION) 167 | qb.add_match_all_query() 168 | qb.add_filters({'key1': ['val1'], 'k2': ['value2']}) 169 | 170 | self.assertEqual( 171 | qb.get_query().to_dict(), { 172 | 'query': { 173 | 'bool': { 174 | 'filter': [{ 175 | 'terms': { 176 | 'key1': ['val1'] 177 | } 178 | }, { 179 | 'terms': { 180 | 'k2': ['value2'] 181 | } 182 | }] 183 | } 184 | }, 185 | 'track_total_hits': True 186 | }) 187 | 188 | def test_add_aggregations(self): 189 | qb = ESQueryBuilder(self.V2_VERSION) 190 | qb.add_match_all_query() 191 | qb.add_aggregations([('countries', 'code'), ('types', 'type')]) 192 | 193 | self.assertEqual( 194 | qb.get_query().to_dict(), { 195 | 'query': { 196 | 'match_all': {} 197 | }, 198 | 'track_total_hits': True, 199 | 'aggs': { 200 | 'countries': { 201 | 'terms': { 202 | 'field': 'code', 203 | 'min_doc_count': 1, 204 | 'size': 10 205 | } 206 | }, 207 | 'types': { 208 | 'terms': { 209 | 'field': 'type', 210 | 'min_doc_count': 1, 211 | 'size': 10 212 | } 213 | } 214 | } 215 | }) 216 | 217 | def test_paginate(self): 218 | qb = ESQueryBuilder(self.V2_VERSION) 219 | qb.add_match_all_query() 220 | qb.paginate(10) 221 | 222 | self.assertEqual(qb.get_query().to_dict(), { 223 | 'query': { 224 | 'match_all': {} 225 | }, 226 | 'from': 180, 227 | 'size': 20, 228 | 'track_total_hits': True 229 | }) 230 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/tests_models_common.py: -------------------------------------------------------------------------------- 1 | from django.test import SimpleTestCase 2 | 3 | from rorapi.common.models import CountryBucket, Entity, Errors, TypeBucket 4 | from .utils import AttrDict 5 | 6 | 7 | class EntityTestCase(SimpleTestCase): 8 | def test_attributes_exist(self): 9 | data = {"a": 0, "b": 123, "third": "a thousand"} 10 | entity = Entity(AttrDict(data), ["a", "third", "b"]) 11 | self.assertEqual(entity.a, data["a"]) 12 | self.assertEqual(entity.b, data["b"]) 13 | self.assertEqual(entity.third, data["third"]) 14 | 15 | def test_omits_attributes(self): 16 | data = {"a": 0, "b": 123, "third": "a thousand"} 17 | entity = Entity(AttrDict(data), ["a"]) 18 | self.assertEqual(entity.a, data["a"]) 19 | msg = "'Entity' object has no attribute '{}'" 20 | with self.assertRaisesMessage(AttributeError, msg.format("b")): 21 | entity.b 22 | with self.assertRaisesMessage(AttributeError, msg.format("third")): 23 | entity.third 24 | 25 | 26 | class TypeBucketTestCase(SimpleTestCase): 27 | def test_attributes_exist(self): 28 | bucket = TypeBucket(AttrDict({"key": "Type", "doc_count": 482})) 29 | self.assertEqual(bucket.id, "type") 30 | self.assertEqual(bucket.title, "Type") 31 | self.assertEqual(bucket.count, 482) 32 | 33 | 34 | class CountryBucketTestCase(SimpleTestCase): 35 | def test_attributes_exist(self): 36 | bucket = CountryBucket(AttrDict({"key": "IE", "doc_count": 4821})) 37 | self.assertEqual(bucket.id, "ie") 38 | self.assertEqual(bucket.title, "Ireland") 39 | self.assertEqual(bucket.count, 4821) 40 | 41 | 42 | class ErrorsTestCase(SimpleTestCase): 43 | def test_attributes_exist(self): 44 | data = ["err1", "e2", "terrible error 3"] 45 | error = Errors(data) 46 | self.assertEqual(error.errors, data) 47 | -------------------------------------------------------------------------------- /rorapi/tests/tests_unit/utils.py: -------------------------------------------------------------------------------- 1 | class AttrDict(dict): 2 | def __init__(self, nested_dict): 3 | for k, v in nested_dict.items(): 4 | if isinstance(v, dict): 5 | self[k] = AttrDict(v) 6 | elif isinstance(v, list): 7 | self[k] = [ 8 | AttrDict(e) if isinstance(e, dict) else e for e in v 9 | ] 10 | else: 11 | self[k] = v 12 | 13 | def __getattr__(self, attr): 14 | if attr not in self: 15 | raise AttributeError( 16 | '\'AttrDict\' object has no attribute \'{}\''.format(attr)) 17 | return self[attr] 18 | 19 | 20 | class IterableAttrDict(): 21 | def __init__(self, nested_dict, iter_list): 22 | self.attr_dict = AttrDict(nested_dict) 23 | self.iter_list = [AttrDict(i) for i in iter_list] 24 | 25 | def __iter__(self): 26 | return iter(self.iter_list) 27 | 28 | def __getitem__(self, key): 29 | return self.iter_list[key] 30 | 31 | def __getattr__(self, attr): 32 | return self.attr_dict.__getattr__(attr) 33 | -------------------------------------------------------------------------------- /rorapi/v1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/v1/__init__.py -------------------------------------------------------------------------------- /rorapi/v1/index_template_es7.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_patterns": [ 3 | "organizations" 4 | ], 5 | "settings": { 6 | "number_of_shards": 1, 7 | "analysis": { 8 | "analyzer": { 9 | "string_lowercase": { 10 | "tokenizer": "standard", 11 | "filter": [ 12 | "lowercase", 13 | "ascii_folding" 14 | ] 15 | } 16 | }, 17 | "filter": { 18 | "ascii_folding": { 19 | "type": "asciifolding", 20 | "preserve_original": true 21 | } 22 | } 23 | } 24 | }, 25 | "mappings": { 26 | "properties": { 27 | "id": { 28 | "type": "keyword" 29 | }, 30 | "name": { 31 | "type": "text", 32 | "fields": { 33 | "keyword": { 34 | "type": "keyword" 35 | }, 36 | "norm": { 37 | "type": "text", 38 | "analyzer": "string_lowercase", 39 | "fielddata": true 40 | } 41 | } 42 | }, 43 | "links": { 44 | "type": "text", 45 | "analyzer": "simple" 46 | }, 47 | "wikipedia_url": { 48 | "type": "text", 49 | "analyzer": "simple" 50 | }, 51 | "aliases": { 52 | "type": "text", 53 | "fields": { 54 | "keyword": { 55 | "type": "keyword" 56 | }, 57 | "norm": { 58 | "type": "text", 59 | "analyzer": "string_lowercase", 60 | "fielddata": true 61 | } 62 | } 63 | }, 64 | "acronyms": { 65 | "type": "text", 66 | "fields": { 67 | "keyword": { 68 | "type": "keyword" 69 | }, 70 | "norm": { 71 | "type": "text", 72 | "analyzer": "string_lowercase", 73 | "fielddata": true 74 | } 75 | } 76 | }, 77 | "status": { 78 | "type": "keyword" 79 | }, 80 | "labels": { 81 | "properties": { 82 | "label": { 83 | "type": "text", 84 | "fields": { 85 | "keyword": { 86 | "type": "keyword" 87 | }, 88 | "norm": { 89 | "type": "text", 90 | "analyzer": "string_lowercase", 91 | "fielddata": true 92 | } 93 | } 94 | }, 95 | "iso639": { 96 | "type": "keyword" 97 | } 98 | } 99 | }, 100 | "country": { 101 | "properties": { 102 | "country_code": { 103 | "type": "keyword" 104 | }, 105 | "country_name": { 106 | "type": "keyword" 107 | } 108 | } 109 | }, 110 | "types": { 111 | "type": "keyword" 112 | }, 113 | "email_address": { 114 | "type": "text" 115 | }, 116 | "established": { 117 | "type": "date" 118 | }, 119 | "ip_addresses": { 120 | "type": "text" 121 | }, 122 | "addresses": { 123 | "properties": { 124 | "line": { 125 | "type": "text" 126 | }, 127 | "lat": { 128 | "type": "float" 129 | }, 130 | "lng": { 131 | "type": "float" 132 | }, 133 | "postcode": { 134 | "type": "keyword" 135 | }, 136 | "primary": { 137 | "type": "boolean" 138 | }, 139 | "city": { 140 | "type": "keyword" 141 | }, 142 | "state": { 143 | "type": "keyword" 144 | }, 145 | "state_code": { 146 | "type": "keyword" 147 | }, 148 | "geonames_city": { 149 | "properties": { 150 | "id": { 151 | "type": "integer" 152 | }, 153 | "city": { 154 | "type": "keyword" 155 | }, 156 | "nuts_level1": { 157 | "properties": { 158 | "name": { 159 | "type": "text" 160 | }, 161 | "code": { 162 | "type": "keyword" 163 | } 164 | } 165 | }, 166 | "nuts_level2": { 167 | "properties": { 168 | "name": { 169 | "type": "text" 170 | }, 171 | "code": { 172 | "type": "keyword" 173 | } 174 | } 175 | }, 176 | "nuts_level3": { 177 | "properties": { 178 | "name": { 179 | "type": "text" 180 | }, 181 | "code": { 182 | "type": "keyword" 183 | } 184 | } 185 | }, 186 | "geonames_admin1": { 187 | "properties": { 188 | "ascii_name": { 189 | "type": "keyword" 190 | }, 191 | "name": { 192 | "type": "keyword" 193 | }, 194 | "code": { 195 | "type": "keyword" 196 | } 197 | } 198 | }, 199 | "geonames_admin2": { 200 | "properties": { 201 | "ascii_name": { 202 | "type": "keyword" 203 | }, 204 | "name": { 205 | "type": "keyword" 206 | }, 207 | "code": { 208 | "type": "keyword" 209 | } 210 | } 211 | }, 212 | "license": { 213 | "properties": { 214 | "attribution": { 215 | "type": "text" 216 | }, 217 | "license": { 218 | "type": "text" 219 | } 220 | } 221 | } 222 | } 223 | } 224 | } 225 | }, 226 | "relationships": { 227 | "properties": { 228 | "type": { 229 | "type": "keyword" 230 | }, 231 | "label": { 232 | "type": "text", 233 | "fields": { 234 | "keyword": { 235 | "type": "keyword" 236 | }, 237 | "norm": { 238 | "type": "text", 239 | "analyzer": "string_lowercase", 240 | "fielddata": true 241 | } 242 | } 243 | }, 244 | "id": { 245 | "type": "keyword" 246 | } 247 | } 248 | }, 249 | "external_ids": { 250 | "properties": { 251 | "GRID": { 252 | "properties": { 253 | "preferred": { 254 | "type": "keyword" 255 | }, 256 | "all": { 257 | "type": "keyword" 258 | } 259 | } 260 | }, 261 | "ISNI": { 262 | "properties": { 263 | "preferred": { 264 | "type": "keyword" 265 | }, 266 | "all": { 267 | "type": "keyword" 268 | } 269 | } 270 | }, 271 | "FundRef": { 272 | "properties": { 273 | "preferred": { 274 | "type": "keyword" 275 | }, 276 | "all": { 277 | "type": "keyword" 278 | } 279 | } 280 | }, 281 | "Wikidata": { 282 | "properties": { 283 | "preferred": { 284 | "type": "keyword" 285 | }, 286 | "all": { 287 | "type": "keyword" 288 | } 289 | } 290 | } 291 | } 292 | }, 293 | "names_ids": { 294 | "type": "nested", 295 | "properties": { 296 | "id": { 297 | "type": "keyword" 298 | }, 299 | "name": { 300 | "type": "text", 301 | "analyzer": "string_lowercase" 302 | } 303 | } 304 | } 305 | } 306 | } 307 | } -------------------------------------------------------------------------------- /rorapi/v1/models.py: -------------------------------------------------------------------------------- 1 | from geonamescache.mappers import country 2 | from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity 3 | 4 | class Aggregations: 5 | """Aggregations model class""" 6 | 7 | def __init__(self, data): 8 | self.types = [TypeBucket(b) for b in data.types.buckets] 9 | self.countries = [CountryBucket(b) for b in data.countries.buckets] 10 | self.statuses = [StatusBucket(b) for b in data.statuses.buckets] 11 | 12 | class GeoAdmin: 13 | def __init__(self, data): 14 | if hasattr(data, 'id'): 15 | self.id = data.id 16 | else: 17 | self.id = None 18 | if hasattr(data, 'code'): 19 | self.code = data.code 20 | else: 21 | self.code = None 22 | if hasattr(data, 'name'): 23 | self.name = data.name 24 | else: 25 | self.name = None 26 | if hasattr(data, 'ascii_name'): 27 | self.ascii_name = data.ascii_name 28 | else: 29 | self.ascii_name = None 30 | 31 | 32 | class Nuts: 33 | """A model class for storing the NUTS metadata""" 34 | def __init__(self, data): 35 | self.code = getattr(data, 'code', None) 36 | self.name = getattr(data, 'name', None) 37 | 38 | 39 | class License: 40 | """A model class for storing license metadata""" 41 | def __init__(self, data): 42 | self.attribution = getattr(data, 'attribution', None) 43 | self.license = getattr(data, 'license', None) 44 | 45 | 46 | class GeoNamesCity: 47 | """A model class for storing geonames city hash""" 48 | def __init__(self, data): 49 | self.id = getattr(data, 'id', None) 50 | self.city = getattr(data, 'city', None) 51 | if hasattr(data, 'license'): 52 | self.license = License(data.license) 53 | else: 54 | self.license = None 55 | if hasattr(data, 'geonames_admin1'): 56 | self.geonames_admin1 = GeoAdmin(data.geonames_admin1) 57 | else: 58 | self.geonames_admin1 = None 59 | if hasattr(data, 'geonames_admin2'): 60 | self.geonames_admin2 = GeoAdmin(data.geonames_admin2) 61 | else: 62 | self.geonames_admin2 = None 63 | if hasattr(data, 'nuts_level1'): 64 | self.nuts_level1 = GeoAdmin(data.nuts_level1) 65 | else: 66 | self.nuts_level1 = None 67 | if hasattr(data, 'nuts_level2'): 68 | self.nuts_level2 = GeoAdmin(data.nuts_level2) 69 | else: 70 | self.nuts_level2 = None 71 | if hasattr(data, 'nuts_level3'): 72 | self.nuts_level3 = GeoAdmin(data.nuts_level3) 73 | else: 74 | self.nuts_level3 = None 75 | 76 | 77 | class Addresses: 78 | """A model class for storing addresses""" 79 | def __init__(self, data): 80 | self.country_geonames_id = data.country_geonames_id 81 | self.lat = data.lat 82 | self.lng = data.lng 83 | self.line = data.line 84 | self.state_code = data.state_code 85 | self.state = getattr(data, 'state', None) 86 | self.postcode = data.postcode 87 | self.city = data.city 88 | self.primary = data.primary 89 | self.geonames_city = GeoNamesCity(data.geonames_city) 90 | 91 | 92 | class ExternalIds: 93 | """A model class for storing external identifiers""" 94 | def __init__(self, data): 95 | for a in [ 96 | 'ISNI', 'FundRef', 'HESA', 'UCAS', 'UKPRN', 'CNRS', 'OrgRef', 97 | 'Wikidata', 'GRID' 98 | ]: 99 | try: 100 | setattr(self, a, Entity(getattr(data, a), 101 | ['preferred', 'all'])) 102 | except AttributeError: 103 | pass 104 | 105 | 106 | class Organization(Entity): 107 | """Organization model class""" 108 | def __init__(self, data): 109 | if "_source" in data: 110 | data = data["_source"] 111 | super(Organization, self).__init__(data, [ 112 | 'id', 'name', 'types', 'links', 'aliases', 'acronyms', 'status', 113 | 'wikipedia_url', 'established', 'relationships', 'addresses' 114 | ]) 115 | self.labels = [Entity(l, ['label', 'iso639']) for l in data.labels] 116 | self.country = Entity(data.country, ['country_name', 'country_code']) 117 | self.ip_addresses = data.ip_addresses 118 | self.established = getattr(data, 'established', None) 119 | self.email_address = getattr(data, 'email_address', None) 120 | self.relationships = [ 121 | Entity(r, ['type', 'label', 'id']) for r in data.relationships 122 | ] 123 | self.addresses = [Addresses(a) for a in data.addresses] 124 | self.external_ids = ExternalIds(data.external_ids) 125 | 126 | 127 | class ListResult: 128 | """A model class for the list of organizations returned from the search""" 129 | def __init__(self, data): 130 | self.number_of_results = data.hits.total.value 131 | self.time_taken = data.took 132 | self.items = [Organization(x) for x in data] 133 | self.meta = Aggregations(data.aggregations) 134 | 135 | 136 | class MatchedOrganization: 137 | """A model class for an organization matched based on an affiliation 138 | string""" 139 | def __init__(self, data): 140 | self.substring = data.substring 141 | self.score = data.score 142 | self.matching_type = data.matching_type 143 | self.chosen = data.chosen 144 | self.organization = Organization(data.organization) 145 | 146 | 147 | class MatchingResult: 148 | """A model class for the result of affiliation matching""" 149 | def __init__(self, data): 150 | self.number_of_results = len(data) 151 | self.items = [MatchedOrganization(x) for x in data] -------------------------------------------------------------------------------- /rorapi/v1/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer 3 | 4 | class AggregationsSerializer(serializers.Serializer): 5 | types = BucketSerializer(many=True) 6 | countries = BucketSerializer(many=True) 7 | statuses = BucketSerializer(many=True) 8 | 9 | 10 | class OrganizationLabelSerializer(serializers.Serializer): 11 | label = serializers.CharField() 12 | iso639 = serializers.CharField() 13 | 14 | 15 | class CountrySerializer(serializers.Serializer): 16 | country_name = serializers.CharField() 17 | country_code = serializers.CharField() 18 | 19 | 20 | class LicenseSerializer(serializers.Serializer): 21 | attribution = serializers.StringRelatedField() 22 | license = serializers.StringRelatedField() 23 | 24 | 25 | class NutsSerializer(serializers.Serializer): 26 | name = serializers.CharField() 27 | code = serializers.CharField() 28 | 29 | 30 | class AddressGeoNamesSerializer(serializers.Serializer): 31 | name = serializers.CharField() 32 | id = serializers.IntegerField() 33 | ascii_name = serializers.CharField() 34 | code = serializers.CharField() 35 | 36 | 37 | class GeoNamesCitySerializer(serializers.Serializer): 38 | id = serializers.IntegerField() 39 | city = serializers.StringRelatedField() 40 | geonames_admin1 = AddressGeoNamesSerializer() 41 | geonames_admin2 = AddressGeoNamesSerializer() 42 | license = LicenseSerializer() 43 | nuts_level1 = NutsSerializer() 44 | nuts_level2 = NutsSerializer() 45 | nuts_level3 = NutsSerializer() 46 | 47 | 48 | class OrganizationAddressesSerializer(serializers.Serializer): 49 | lat = serializers.DecimalField(max_digits=None, 50 | decimal_places=10, 51 | coerce_to_string=False) 52 | lng = serializers.DecimalField(max_digits=None, 53 | decimal_places=10, 54 | coerce_to_string=False) 55 | state = serializers.StringRelatedField() 56 | state_code = serializers.CharField() 57 | city = serializers.CharField() 58 | geonames_city = GeoNamesCitySerializer() 59 | postcode = serializers.CharField() 60 | primary = serializers.BooleanField() 61 | line = serializers.CharField() 62 | country_geonames_id = serializers.IntegerField() 63 | 64 | 65 | class ExternalIdSerializer(serializers.Serializer): 66 | preferred = serializers.CharField() 67 | all = serializers.StringRelatedField(many=True) 68 | 69 | 70 | class GridExternalIdSerializer(serializers.Serializer): 71 | preferred = serializers.CharField() 72 | all = serializers.StringRelatedField() 73 | 74 | 75 | class ExternalIdsSerializer(serializers.Serializer): 76 | ISNI = ExternalIdSerializer(required=False) 77 | FundRef = ExternalIdSerializer(required=False) 78 | HESA = ExternalIdSerializer(required=False) 79 | UCAS = ExternalIdSerializer(required=False) 80 | UKPRN = ExternalIdSerializer(required=False) 81 | CNRS = ExternalIdSerializer(required=False) 82 | OrgRef = ExternalIdSerializer(required=False) 83 | Wikidata = ExternalIdSerializer(required=False) 84 | GRID = GridExternalIdSerializer(required=False) 85 | 86 | 87 | class OrganizationSerializer(serializers.Serializer): 88 | id = serializers.CharField() 89 | name = serializers.CharField() 90 | email_address = serializers.StringRelatedField() 91 | ip_addresses = serializers.StringRelatedField(many=True) 92 | established = serializers.IntegerField() 93 | types = serializers.StringRelatedField(many=True) 94 | relationships = OrganizationRelationshipsSerializer(many=True) 95 | addresses = OrganizationAddressesSerializer(many=True) 96 | links = serializers.StringRelatedField(many=True) 97 | aliases = serializers.StringRelatedField(many=True) 98 | acronyms = serializers.StringRelatedField(many=True) 99 | status = serializers.CharField() 100 | wikipedia_url = serializers.CharField() 101 | labels = OrganizationLabelSerializer(many=True) 102 | country = CountrySerializer() 103 | external_ids = ExternalIdsSerializer() 104 | 105 | 106 | class ListResultSerializer(serializers.Serializer): 107 | number_of_results = serializers.IntegerField() 108 | time_taken = serializers.IntegerField() 109 | items = OrganizationSerializer(many=True) 110 | meta = AggregationsSerializer() 111 | 112 | 113 | class MatchedOrganizationSerializer(serializers.Serializer): 114 | substring = serializers.CharField() 115 | score = serializers.FloatField() 116 | matching_type = serializers.CharField() 117 | chosen = serializers.BooleanField() 118 | organization = OrganizationSerializer() 119 | 120 | 121 | class MatchingResultSerializer(serializers.Serializer): 122 | number_of_results = serializers.IntegerField() 123 | items = MatchedOrganizationSerializer(many=True) 124 | -------------------------------------------------------------------------------- /rorapi/v2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ror-community/ror-api/1efb824f020ea66e89790730b9a8ad8072cc678c/rorapi/v2/__init__.py -------------------------------------------------------------------------------- /rorapi/v2/index_template_es7.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_patterns": [ 3 | "organizations-v2" 4 | ], 5 | "settings": { 6 | "number_of_shards": 1, 7 | "analysis": { 8 | "analyzer": { 9 | "string_lowercase": { 10 | "tokenizer": "standard", 11 | "filter": [ 12 | "lowercase", 13 | "ascii_folding" 14 | ] 15 | } 16 | }, 17 | "filter": { 18 | "ascii_folding": { 19 | "type": "asciifolding", 20 | "preserve_original": true 21 | } 22 | } 23 | } 24 | }, 25 | "mappings": { 26 | "properties": { 27 | "admin": { 28 | "properties": { 29 | "created": { 30 | "properties": { 31 | "date": { 32 | "type": "date" 33 | }, 34 | "schema_version": { 35 | "type": "keyword" 36 | } 37 | } 38 | }, 39 | "last_modified": { 40 | "properties": { 41 | "date": { 42 | "type": "date" 43 | }, 44 | "schema_version": { 45 | "type": "keyword" 46 | } 47 | } 48 | } 49 | } 50 | }, 51 | "domains": { 52 | "type": "text", 53 | "analyzer": "simple" 54 | }, 55 | "established": { 56 | "type": "date" 57 | }, 58 | "external_ids": { 59 | "properties": { 60 | "all": { 61 | "type": "keyword" 62 | }, 63 | "type": { 64 | "type": "keyword" 65 | }, 66 | "preferred": { 67 | "type": "keyword" 68 | } 69 | } 70 | }, 71 | "id": { 72 | "type": "keyword" 73 | }, 74 | "links": { 75 | "properties": { 76 | "value": { 77 | "type": "text", 78 | "analyzer": "simple" 79 | }, 80 | "type": { 81 | "type": "keyword" 82 | } 83 | } 84 | }, 85 | "locations": { 86 | "properties": { 87 | "geonames_id": { 88 | "type": "integer" 89 | }, 90 | "geonames_details": { 91 | "properties": { 92 | "continent_code": { 93 | "type": "keyword" 94 | }, 95 | "continent_name": { 96 | "type": "keyword" 97 | }, 98 | "country_code": { 99 | "type": "keyword" 100 | }, 101 | "country_name": { 102 | "type": "keyword" 103 | }, 104 | "country_subdivision_code": { 105 | "type": "keyword" 106 | }, 107 | "country_subdivision_name": { 108 | "type": "keyword" 109 | }, 110 | "lat": { 111 | "type": "float" 112 | }, 113 | "lng": { 114 | "type": "float" 115 | }, 116 | "name": { 117 | "type": "keyword" 118 | } 119 | } 120 | } 121 | } 122 | }, 123 | "names": { 124 | "properties": { 125 | "value": { 126 | "type": "text", 127 | "fields": { 128 | "keyword": { 129 | "type": "keyword" 130 | }, 131 | "norm": { 132 | "type": "text", 133 | "analyzer": "string_lowercase", 134 | "fielddata": true 135 | } 136 | } 137 | }, 138 | "lang": { 139 | "type": "keyword" 140 | }, 141 | "types": { 142 | "type": "keyword" 143 | } 144 | } 145 | }, 146 | "types": { 147 | "type": "keyword" 148 | }, 149 | "relationships": { 150 | "properties": { 151 | "type": { 152 | "type": "keyword" 153 | }, 154 | "label": { 155 | "type": "text", 156 | "fields": { 157 | "keyword": { 158 | "type": "keyword" 159 | }, 160 | "norm": { 161 | "type": "text", 162 | "analyzer": "string_lowercase", 163 | "fielddata": true 164 | } 165 | } 166 | }, 167 | "id": { 168 | "type": "keyword" 169 | } 170 | } 171 | }, 172 | "status": { 173 | "type": "keyword" 174 | }, 175 | "names_ids": { 176 | "type": "nested", 177 | "properties": { 178 | "id": { 179 | "type": "keyword" 180 | }, 181 | "name": { 182 | "type": "text", 183 | "analyzer": "string_lowercase" 184 | } 185 | } 186 | } 187 | } 188 | } 189 | } -------------------------------------------------------------------------------- /rorapi/v2/models.py: -------------------------------------------------------------------------------- 1 | from geonamescache.mappers import country 2 | import random 3 | import string 4 | from django.db import models 5 | from rorapi.common.models import TypeBucket, CountryBucket, StatusBucket, Entity 6 | from rorapi.v2.record_constants import continent_code_to_name 7 | 8 | class ContinentBucket: 9 | """A model class for country aggregation bucket""" 10 | 11 | def __init__(self, data): 12 | self.id = data.key.lower() 13 | self.title = continent_code_to_name(data.key) 14 | self.count = data.doc_count 15 | 16 | class CountryBucket: 17 | """A model class for country aggregation bucket""" 18 | 19 | def __init__(self, data): 20 | self.id = data.key.lower() 21 | mapper = country(from_key="iso", to_key="name") 22 | try: 23 | self.title = mapper(data.key) 24 | except AttributeError: 25 | # if we have a country code with no name mapping, skip it to prevent 500 26 | pass 27 | self.count = data.doc_count 28 | 29 | 30 | class Aggregations: 31 | """Aggregations model class""" 32 | 33 | def __init__(self, data): 34 | self.types = [TypeBucket(b) for b in data.types.buckets] 35 | self.countries = [CountryBucket(b) for b in data.countries.buckets] 36 | self.continents = [ContinentBucket(b) for b in data.continents.buckets] 37 | self.statuses = [StatusBucket(b) for b in data.statuses.buckets] 38 | 39 | class GeoNamesDetails: 40 | """A model class for storing geonames city hash""" 41 | 42 | def __init__(self, data): 43 | self.continent_code = getattr(data, 'continent_code', None) 44 | self.continent_name = getattr(data, 'continent_name', None) 45 | self.country_code = data.country_code 46 | self.country_name = data.country_name 47 | self.country_subdivision_code = getattr(data, 'country_subdivision_code', None) 48 | self.country_subdivision_name = getattr(data, 'country_subdivision_name', None) 49 | self.name = data.name 50 | self.lat = data.lat 51 | self.lng = data.lng 52 | 53 | 54 | class Location: 55 | """A model class for storing addresses""" 56 | 57 | def __init__(self, data): 58 | self.geonames_id = data.geonames_id 59 | self.geonames_details = GeoNamesDetails(data.geonames_details) 60 | 61 | 62 | class ExternalId: 63 | """A model class for storing external id""" 64 | 65 | def __init__(self, data): 66 | self.type = data.type 67 | self.preferred = data.preferred 68 | self.all = [a for a in data.all] 69 | 70 | 71 | class Admin: 72 | """A model class for storing admin information""" 73 | 74 | def __init__(self, data): 75 | for a in ["created", "last_modified"]: 76 | try: 77 | setattr(self, a, Entity(getattr(data, a), ["date", "schema_version"])) 78 | except AttributeError: 79 | pass 80 | 81 | 82 | class Organization(Entity): 83 | """Organization model class""" 84 | 85 | def __init__(self, data): 86 | if "_source" in data: 87 | data = data["_source"] 88 | super(Organization, self).__init__( 89 | data, ["established", "id", "status"] 90 | ) 91 | self.admin = Admin(data.admin) 92 | self.domains = sorted(data.domains) 93 | sorted_ext_ids = sorted(data.external_ids, key=lambda x: x['type']) 94 | self.external_ids = [ 95 | Entity(e, ["type", "preferred", "all"]) for e in sorted_ext_ids 96 | ] 97 | sorted_links = sorted(data.links, key=lambda x: x['type']) 98 | self.links = [Entity(l, ["value", "type"]) for l in sorted_links] 99 | self.locations = [Location(l) for l in data.locations] 100 | sorted_names = sorted(data.names, key=lambda x: x['value']) 101 | self.names = [Entity(n, ["value", "lang", "types"]) for n in sorted_names] 102 | sorted_rels = sorted(data.relationships, key=lambda x: x['type']) 103 | self.relationships = [ 104 | Entity(r, ["type", "label", "id"]) for r in sorted_rels 105 | ] 106 | self.types = sorted(data.types) 107 | 108 | 109 | class ListResult: 110 | """A model class for the list of organizations returned from the search""" 111 | 112 | def __init__(self, data): 113 | self.number_of_results = data.hits.total.value 114 | self.time_taken = data.took 115 | self.items = [Organization(x) for x in data] 116 | self.meta = Aggregations(data.aggregations) 117 | 118 | 119 | class MatchedOrganization: 120 | """A model class for an organization matched based on an affiliation 121 | string""" 122 | 123 | def __init__(self, data): 124 | self.substring = data.substring 125 | self.score = data.score 126 | self.matching_type = data.matching_type 127 | self.chosen = data.chosen 128 | self.organization = Organization(data.organization) 129 | 130 | 131 | class MatchingResult: 132 | """A model class for the result of affiliation matching""" 133 | 134 | def __init__(self, data): 135 | self.number_of_results = len(data) 136 | self.items = [MatchedOrganization(x) for x in data] 137 | 138 | 139 | class Client(models.Model): 140 | # Required fields 141 | email = models.EmailField(max_length=255) 142 | 143 | # Optional fields 144 | name = models.CharField(max_length=255, blank=True, null=True) 145 | institution_name = models.CharField(max_length=255, blank=True, null=True) 146 | institution_ror = models.URLField(max_length=255, blank=True, null=True) 147 | country_code = models.CharField(max_length=2, blank=True, null=True) 148 | ror_use = models.TextField(max_length=500, blank=True, null=True) 149 | 150 | # System fields 151 | client_id = models.CharField( 152 | max_length=32, 153 | unique=True, 154 | editable=False 155 | ) 156 | created_at = models.DateTimeField(auto_now_add=True) 157 | last_request_at = models.DateTimeField(null=True, blank=True) 158 | request_count = models.IntegerField(default=0) 159 | 160 | def __str__(self): 161 | return f"{self.email} - {self.client_id}" 162 | 163 | @staticmethod 164 | def generate_client_id(): 165 | """Generate a unique 32-character client ID""" 166 | return ''.join(random.choices(string.ascii_uppercase + string.digits, k=32)) 167 | 168 | def save(self, *args, **kwargs): 169 | # Ensure client_id is generated before saving 170 | if not self.client_id: # Only generate if it's empty 171 | self.client_id = self.generate_client_id() 172 | super().save(*args, **kwargs) 173 | -------------------------------------------------------------------------------- /rorapi/v2/record_constants.py: -------------------------------------------------------------------------------- 1 | V2_ADMIN = { 2 | "created": { 3 | "date": "", 4 | "schema_version": "2.1" 5 | }, 6 | "last_modified": { 7 | "date": "", 8 | "schema_version": "2.1" 9 | } 10 | } 11 | 12 | V2_LAST_MOD = { 13 | "date": "", 14 | "schema_version": "2.1" 15 | } 16 | 17 | V2_OPTIONAL_FIELD_DEFAULTS = { 18 | "domains": [], 19 | "established": None, 20 | "external_ids": [], 21 | "links": [], 22 | "relationships": [] 23 | } 24 | 25 | V2_TEMPLATE = { 26 | "locations": [], 27 | "established": None, 28 | "external_ids": [], 29 | "id": "", 30 | "domains": [], 31 | "links": [], 32 | "names": [], 33 | "relationships": [], 34 | "status": "", 35 | "types": [], 36 | "admin": {} 37 | } 38 | 39 | V2_EXTERNAL_ID_TYPES = { 40 | "FUNDREF" : "fundref", 41 | "GRID" : "grid", 42 | "ISNI" : "isni", 43 | "WIKIDATA" : "wikidata" 44 | } 45 | 46 | V2_LINK_TYPES = { 47 | "WEBSITE" : "website", 48 | "WIKIPEDIA" : "wikipedia" 49 | } 50 | 51 | V2_NAME_TYPES = { 52 | "ACRONYM" : "acronym", 53 | "ALIAS" : "alias", 54 | "LABEL" : "label", 55 | "ROR_DISPLAY" : "ror_display" 56 | } 57 | 58 | V2_SORT_KEYS = { 59 | "domains": None, 60 | "external_ids": "type", 61 | "links": "type", 62 | "names": "value", 63 | "relationships": "type", 64 | "types": None 65 | } 66 | 67 | V2_CONTINENT_CODES_NAMES = { 68 | "AF": "Africa", 69 | "AN": "Antarctica", 70 | "AS": "Asia", 71 | "EU": "Europe", 72 | "NA": "North America", 73 | "OC": "Oceania", 74 | "SA": "South America" 75 | } 76 | 77 | def continent_code_to_name(continent_code): 78 | if continent_code.upper() in V2_CONTINENT_CODES_NAMES.keys(): 79 | return V2_CONTINENT_CODES_NAMES[continent_code.upper()] 80 | return None 81 | -------------------------------------------------------------------------------- /rorapi/v2/record_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "locations": [], 3 | "established": null, 4 | "external_ids": [], 5 | "id": "", 6 | "domains": [], 7 | "links": [], 8 | "names": [], 9 | "relationships": [], 10 | "status": "", 11 | "types": [], 12 | "admin": {} 13 | } -------------------------------------------------------------------------------- /rorapi/v2/serializers.py: -------------------------------------------------------------------------------- 1 | from rest_framework import serializers 2 | import bleach 3 | import pycountry 4 | import re 5 | from rorapi.v2.models import Client 6 | from rorapi.common.serializers import BucketSerializer, OrganizationRelationshipsSerializer 7 | 8 | class AggregationsSerializer(serializers.Serializer): 9 | types = BucketSerializer(many=True) 10 | countries = BucketSerializer(many=True) 11 | continents = BucketSerializer(many=True) 12 | statuses = BucketSerializer(many=True) 13 | 14 | class AdminDetailsSerializer(serializers.Serializer): 15 | date = serializers.DateTimeField() 16 | schema_version = serializers.CharField() 17 | 18 | 19 | class AdminSerializer(serializers.Serializer): 20 | created = AdminDetailsSerializer() 21 | last_modified = AdminDetailsSerializer() 22 | 23 | 24 | class OrganizationNameSerializer(serializers.Serializer): 25 | lang = serializers.CharField() 26 | types = serializers.StringRelatedField(many=True) 27 | value = serializers.CharField() 28 | 29 | 30 | class ExternalIdSerializer(serializers.Serializer): 31 | all = serializers.StringRelatedField(many=True) 32 | preferred = serializers.CharField() 33 | type = serializers.CharField() 34 | 35 | 36 | class LinkSerializer(serializers.Serializer): 37 | type = serializers.CharField() 38 | value = serializers.CharField() 39 | 40 | 41 | class GeoNamesDetailsSerializer(serializers.Serializer): 42 | continent_code = serializers.CharField() 43 | continent_name = serializers.CharField() 44 | country_code = serializers.CharField() 45 | country_name = serializers.CharField() 46 | country_subdivision_code = serializers.CharField() 47 | country_subdivision_name = serializers.CharField() 48 | lat = serializers.DecimalField( 49 | max_digits=None, decimal_places=10, coerce_to_string=False 50 | ) 51 | lng = serializers.DecimalField( 52 | max_digits=None, decimal_places=10, coerce_to_string=False 53 | ) 54 | name = serializers.StringRelatedField() 55 | 56 | 57 | class OrganizationLocationSerializer(serializers.Serializer): 58 | geonames_details = GeoNamesDetailsSerializer() 59 | geonames_id = serializers.IntegerField() 60 | 61 | 62 | class OrganizationSerializer(serializers.Serializer): 63 | admin = AdminSerializer() 64 | domains = serializers.StringRelatedField(many=True) 65 | established = serializers.IntegerField() 66 | external_ids = ExternalIdSerializer(many=True) 67 | id = serializers.CharField() 68 | links = LinkSerializer(many=True) 69 | locations = OrganizationLocationSerializer(many=True) 70 | names = OrganizationNameSerializer(many=True) 71 | relationships = OrganizationRelationshipsSerializer(many=True) 72 | status = serializers.CharField() 73 | types = serializers.StringRelatedField(many=True) 74 | 75 | 76 | class ListResultSerializer(serializers.Serializer): 77 | number_of_results = serializers.IntegerField() 78 | time_taken = serializers.IntegerField() 79 | items = OrganizationSerializer(many=True) 80 | meta = AggregationsSerializer() 81 | 82 | 83 | class MatchedOrganizationSerializer(serializers.Serializer): 84 | substring = serializers.CharField() 85 | score = serializers.FloatField() 86 | matching_type = serializers.CharField() 87 | chosen = serializers.BooleanField() 88 | organization = OrganizationSerializer() 89 | 90 | 91 | class MatchingResultSerializer(serializers.Serializer): 92 | number_of_results = serializers.IntegerField() 93 | items = MatchedOrganizationSerializer(many=True) 94 | 95 | 96 | class ClientSerializer(serializers.ModelSerializer): 97 | class Meta: 98 | model = Client 99 | fields = ['email', 'name', 'institution_name', 'institution_ror', 'country_code', 'ror_use'] 100 | extra_kwargs = { 101 | 'name': {'required': False, 'allow_null': True}, 102 | 'institution_name': {'required': False, 'allow_null': True}, 103 | 'institution_ror': {'required': False, 'allow_null': True}, 104 | 'country_code': {'required': False, 'allow_null': True}, 105 | 'ror_use': {'required': False, 'allow_null': True}, 106 | } 107 | 108 | def validate_email(self, value): 109 | """Validate the email format and ensure it's unique.""" 110 | if value is None: 111 | raise serializers.ValidationError("Email cannot be null.") 112 | return value 113 | 114 | def validate_name(self, value): 115 | """Sanitize name and validate length. Reject empty string.""" 116 | if value is not None: 117 | if value == "": 118 | raise serializers.ValidationError("Name cannot be an empty string.") 119 | value = bleach.clean(value) # Sanitize to strip HTML 120 | if len(value) > 255: 121 | raise serializers.ValidationError("Name cannot be longer than 255 characters.") 122 | return value 123 | 124 | def validate_institution_name(self, value): 125 | """Sanitize institution name and validate length. Reject empty string.""" 126 | if value is not None: 127 | if value == "": 128 | raise serializers.ValidationError("Institution name cannot be an empty string.") 129 | value = bleach.clean(value) # Sanitize to strip HTML 130 | if len(value) > 255: 131 | raise serializers.ValidationError("Institution name cannot be longer than 255 characters.") 132 | return value 133 | 134 | def validate_institution_ror(self, value): 135 | """Validate and format institution ROR to match 'https://ror.org/XXXXX'. Reject empty string.""" 136 | if value is not None: 137 | if value == "": 138 | raise serializers.ValidationError("Institution ROR cannot be an empty string.") 139 | value = bleach.clean(value) # Sanitize to strip HTML 140 | ror_regex = r'https://ror\.org/[A-Za-z0-9]+' 141 | if not re.match(ror_regex, value): 142 | raise serializers.ValidationError("Institution ROR must be in the format 'https://ror.org/XXXXX'.") 143 | return value 144 | 145 | def validate_country_code(self, value): 146 | """Validate that the country code is a valid ISO 3166-1 alpha-2 country code. Reject empty string.""" 147 | if value is not None: 148 | if value == "": 149 | raise serializers.ValidationError("Country code cannot be an empty string.") 150 | value = value.strip().upper() # Normalize to uppercase 151 | if len(value) != 2 or not pycountry.countries.get(alpha_2=value): 152 | raise serializers.ValidationError(f"{value} is not a valid ISO 3166-1 alpha-2 country code.") 153 | return value 154 | 155 | def validate_ror_use(self, value): 156 | """Sanitize ror_use and validate length. Reject empty string.""" 157 | if value is not None: 158 | if value == "": 159 | raise serializers.ValidationError("ROR use cannot be an empty string.") 160 | value = bleach.clean(value) # Sanitize to strip HTML 161 | if len(value) > 500: 162 | raise serializers.ValidationError("ROR use cannot be longer than 500 characters.") 163 | return value 164 | -------------------------------------------------------------------------------- /rorapi/v2/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | from .models.client import Client 3 | 4 | class ClientTests(TestCase): 5 | def test_client_registration(self): 6 | client = Client.objects.create(email='test@example.com') 7 | self.assertIsNotNone(client.client_id) 8 | 9 | def test_rate_limiting(self): 10 | response = self.client.get('/client-id/', HTTP_CLIENT_ID="INVALID_ID") 11 | self.assertEqual(response.status_code, 429) 12 | -------------------------------------------------------------------------------- /rorapi/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for rorapi project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rorapi.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /vendor/docker/00_app_env.conf: -------------------------------------------------------------------------------- 1 | # File will be overwritten if user runs the container with `-e PASSENGER_APP_ENV=...`! 2 | passenger_app_env development; 3 | -------------------------------------------------------------------------------- /vendor/docker/10_ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ "${PUBLIC_KEY}" ]; then 3 | echo "${PUBLIC_KEY}" > /root/.ssh/authorized_keys 4 | fi 5 | -------------------------------------------------------------------------------- /vendor/docker/_ror-api-dev.auto.tfvars.tmpl: -------------------------------------------------------------------------------- 1 | ror-api-dev_tags = { 2 | sha = "{{ .Env.GIT_SHA }}" 3 | version = "{{ .Env.GIT_TAG }}" 4 | } 5 | -------------------------------------------------------------------------------- /vendor/docker/_ror-api.auto.tfvars.tmpl: -------------------------------------------------------------------------------- 1 | ror-api_tags = { 2 | sha = "{{ .Env.GIT_SHA }}" 3 | version = "{{ .Env.GIT_TAG }}" 4 | } 5 | -------------------------------------------------------------------------------- /vendor/docker/ntp.conf: -------------------------------------------------------------------------------- 1 | server 0.amazon.pool.ntp.org iburst 2 | server 1.amazon.pool.ntp.org iburst 3 | server 2.amazon.pool.ntp.org iburst 4 | server 3.amazon.pool.ntp.org iburst 5 | -------------------------------------------------------------------------------- /vendor/docker/webapp.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80 default_server; 3 | server_name _; 4 | 5 | root /home/app/webapp/; 6 | passenger_enabled on; 7 | passenger_python /usr/bin/python; 8 | passenger_app_root /home/app/webapp/; 9 | passenger_user app; 10 | passenger_app_type wsgi; 11 | passenger_startup_file rorapi/wsgi.py; 12 | 13 | merge_slashes off; 14 | resolver 8.8.8.8; 15 | 16 | location /static { 17 | alias /home/app/webapp/static; 18 | } 19 | } 20 | --------------------------------------------------------------------------------