├── .editorconfig
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── dockerpublish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── backup.py
├── backup
    └── .gitignore
├── bin
    ├── clean.sh
    ├── cron.sh
    ├── cronlog.sh
    ├── django_wait_for_db.sh
    ├── example_data.sh
    ├── gunicorn.sh
    ├── restore.sh
    └── swarm_update.sh
├── crontab
├── db.cnf
├── django_wait_for_migrations.py
├── docker-compose.yml
├── docs
    └── pull_request_template.md
├── extlinks
    ├── __init__.py
    ├── aggregates
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── factories.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   ├── commands
    │   │   │   ├── __init__.py
    │   │   │   ├── archive_link_aggregates.py
    │   │   │   ├── archive_pageproject_aggregates.py
    │   │   │   ├── archive_user_aggregates.py
    │   │   │   ├── fill_link_aggregates.py
    │   │   │   ├── fill_monthly_link_aggregates.py
    │   │   │   ├── fill_monthly_pageproject_aggregates.py
    │   │   │   ├── fill_monthly_user_aggregates.py
    │   │   │   ├── fill_pageproject_aggregates.py
    │   │   │   └── fill_user_aggregates.py
    │   │   └── helpers
    │   │   │   ├── __init__.py
    │   │   │   └── aggregate_archive_command.py
    │   ├── migrations
    │   │   ├── 0001_initial.py
    │   │   ├── 0002_useraggregate.py
    │   │   ├── 0003_add_indexes.py
    │   │   ├── 0004_pageprojectaggregate.py
    │   │   ├── 0005_add_organisation_index.py
    │   │   ├── 0006_delete_aggregate_tables_info.py
    │   │   ├── 0007_add_user_list_flags.py
    │   │   ├── 0008_alter_linkaggregate_id_alter_pageprojectaggregate_id_and_more.py
    │   │   ├── 0009_pageprojectaggregate_composite_index.py
    │   │   ├── 0010_add_aggregate_indexes.py
    │   │   ├── 0011_aggregate_composite_indexes.py
    │   │   └── __init__.py
    │   ├── models.py
    │   ├── tests.py
    │   └── views.py
    ├── common
    │   ├── __init__.py
    │   ├── forms.py
    │   ├── helpers.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   └── import_twl_data.py
    │   ├── swift.py
    │   ├── templates
    │   │   └── common
    │   │   │   ├── statistics_table.html
    │   │   │   └── top_organisations_table.html
    │   ├── templatetags
    │   │   ├── __init__.py
    │   │   └── common_filters.py
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    ├── healthcheck
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── migrations
    │   │   └── __init__.py
    │   ├── models.py
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    ├── links
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── factories.py
    │   ├── helpers.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   ├── fix_proxy_linkevents_on_user_list.py
    │   │   │   ├── linkevent_example_data.py
    │   │   │   ├── linkevents_archive.py
    │   │   │   ├── linkevents_collect.py
    │   │   │   ├── linksearchtotal_collect.py
    │   │   │   ├── linksearchtotal_example_data.py
    │   │   │   ├── remove_ezproxy_collection.py
    │   │   │   └── upload_all_archived.py
    │   ├── migrations
    │   │   ├── 0001_initial.py
    │   │   ├── 0002_auto_20190520_1530.py
    │   │   ├── 0003_auto_20190530_1045.py
    │   │   ├── 0004_auto_20190603_1110.py
    │   │   ├── 0005_linkevent_user_is_bot.py
    │   │   ├── 0006_auto_20190628_1221.py
    │   │   ├── 0007_auto_20190730_1355.py
    │   │   ├── 0008_fill_proquest_openurl.py
    │   │   ├── 0009_auto_20230215_1656.py
    │   │   ├── 0010_data_link_event_id_hash.py
    │   │   ├── 0011_auto_20230217_1326.py
    │   │   ├── 0012_alter_linkevent_id_alter_linksearchtotal_id_and_more.py
    │   │   ├── 0013_add_linkevent_url_linkevent_content_type_and_more.py
    │   │   ├── 0014_migrate_url_pattern_relationships.py
    │   │   ├── __init__.py
    │   │   └── urlpatterns.json
    │   ├── models.py
    │   └── tests.py
    ├── logs
    │   └── .gitignore
    ├── organisations
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── factories.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   └── users_update_lists.py
    │   ├── migrations
    │   │   ├── 0001_initial.py
    │   │   ├── 0002_auto_20190603_1255.py
    │   │   ├── 0003_auto_20190603_1325.py
    │   │   ├── 0004_auto_20190603_1325.py
    │   │   ├── 0005_auto_20190628_1221.py
    │   │   ├── 0006_auto_20190730_1355.py
    │   │   ├── 0007_auto_20230216_1931.py
    │   │   ├── 0008_alter_collection_id_alter_organisation_id_and_more.py
    │   │   ├── 0009_organisation_username_list_updated.py
    │   │   └── __init__.py
    │   ├── models.py
    │   ├── templates
    │   │   └── organisations
    │   │   │   ├── organisation_charts_include.html
    │   │   │   ├── organisation_detail.html
    │   │   │   └── organisation_list.html
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    ├── programs
    │   ├── __init__.py
    │   ├── admin.py
    │   ├── factories.py
    │   ├── management
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   └── programs_example_data.py
    │   ├── migrations
    │   │   ├── 0001_initial.py
    │   │   ├── 0002_auto_20190603_1255.py
    │   │   ├── 0003_alter_program_id.py
    │   │   └── __init__.py
    │   ├── models.py
    │   ├── templates
    │   │   └── programs
    │   │   │   ├── program_charts_include.html
    │   │   │   ├── program_detail.html
    │   │   │   └── program_list.html
    │   ├── tests.py
    │   ├── urls.py
    │   └── views.py
    ├── settings
    │   ├── base.py
    │   ├── helpers.py
    │   ├── local.py
    │   ├── logging.py
    │   └── production.py
    ├── templates
    │   ├── base.html
    │   ├── documentation.html
    │   └── homepage.html
    ├── tests.py
    ├── urls.py
    ├── views.py
    └── wsgi.py
├── manage.py
├── nginx.conf
├── requirements
    ├── django.txt
    └── local.txt
├── robots.txt
├── static
    ├── css
    │   └── local.css
    └── favicon.ico
├── template.env
└── wiki-list.csv


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # top-most EditorConfig file
 2 | root = true
 3 | 
 4 | # Unix-style newlines with a newline ending every file
 5 | [*]
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | 
 9 | # 4 space indentation for Python files
10 | [*.py]
11 | indent_style = space
12 | indent_size = 4
13 | trim_trailing_whitespace = true
14 | 
15 | # 2 space indentation for YAML files
16 | [*.{yml, yaml}]
17 | indent_style = space
18 | indent_size = 2
19 | 
20 | # 2 space indentation for HTML files
21 | [*.html]
22 | indent_style = space
23 | indent_size = 2
24 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/dockerpublish.yml:
--------------------------------------------------------------------------------
  1 | name: Docker
  2 | 
  3 | on:
  4 |   push:
  5 |     # Publish `master` as Docker `latest` image.
  6 |     branches:
  7 |       - master
  8 |       - staging
  9 | 
 10 |   # Run tests for any PRs.
 11 |   pull_request:
 12 | 
 13 | jobs:
 14 |   # Run tests.
 15 |   test:
 16 |     # Ensure latest python image is mirrored before running tests.
 17 |     runs-on: ubuntu-latest
 18 |     steps:
 19 |       - uses: actions/checkout@v4
 20 |       - name: Build and Start Images
 21 |         run: |
 22 |           cp template.env .env
 23 |           docker compose up -d --build
 24 |       - name: Run tests
 25 |         run: |
 26 |           docker compose exec -T externallinks /app/bin/django_wait_for_db.sh python django_wait_for_migrations.py test
 27 | 
 28 |   # Push images to quay.io/wikipedialibrary.
 29 |   push:
 30 |     # Ensure test job passes before pushing images.
 31 |     needs: test
 32 |     runs-on: ubuntu-latest
 33 |     if: github.event_name == 'push'
 34 | 
 35 |     steps:
 36 |       - uses: actions/checkout@v4
 37 | 
 38 |       - name: Log into quay.io
 39 |         run: echo "${{ secrets.CR_PASSWORD }}" | docker login quay.io -u ${{ secrets.CR_USERNAME }} --password-stdin
 40 | 
 41 |       - name: Build Images
 42 |         run: |
 43 |           cp template.env .env
 44 |           docker compose build
 45 | 
 46 |       - name: Set branch tag
 47 |         id: branch
 48 |         run: |
 49 |           # Strip git ref prefix from version
 50 |           branch_tag=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,')
 51 | 
 52 |           # Strip "v" prefix from tag name
 53 |           [[ "${{ github.ref }}" == "refs/tags/"* ]] && branch_tag=$(echo $branch_tag | sed -e 's/^v//')
 54 | 
 55 |           # preprend with "branch_" so we know what the tag means by looking at it.
 56 |           branch_tag="branch_${branch_tag}"
 57 | 
 58 |           echo ::set-output name=tag::$(echo $branch_tag)
 59 | 
 60 |       - name: Set commit tag
 61 |         id: commit
 62 |         run: |
 63 |           # The short git commit object name.
 64 |           commit_tag=${GITHUB_SHA::8}
 65 | 
 66 |           # prepend with "commit_" so we know what the tag means by looking at it.
 67 |           commit_tag="commit_${commit_tag}"
 68 | 
 69 |           echo ::set-output name=tag::$(echo $commit_tag)
 70 | 
 71 |       - name: Push externallinks image to quay.io/wikipedialibrary
 72 |         run: |
 73 |           # The image name represents both the local image name and the remote image repository.
 74 |           image_name=quay.io/wikipedialibrary/externallinks
 75 |           branch_tag=${{ steps.branch.outputs.tag }}
 76 |           commit_tag=${{ steps.commit.outputs.tag }}
 77 | 
 78 |           docker tag ${image_name}:latest ${image_name}:${branch_tag}
 79 |           docker tag ${image_name}:latest ${image_name}:${commit_tag}
 80 |           docker push ${image_name}:${branch_tag}
 81 |           docker push ${image_name}:${commit_tag}
 82 | 
 83 |       - name: Push eventstream image to quay.io/wikipedialibrary
 84 |         run: |
 85 |           # The image name represents both the local image name and the remote image repository.
 86 |           image_name=quay.io/wikipedialibrary/eventstream
 87 |           branch_tag=${{ steps.branch.outputs.tag }}
 88 |           commit_tag=${{ steps.commit.outputs.tag }}
 89 | 
 90 |           docker tag ${image_name}:latest ${image_name}:${branch_tag}
 91 |           docker tag ${image_name}:latest ${image_name}:${commit_tag}
 92 |           docker push ${image_name}:${branch_tag}
 93 |           docker push ${image_name}:${commit_tag}
 94 | 
 95 |       - name: Push externallinks_cron image to quay.io/wikipedialibrary
 96 |         run: |
 97 |           # The image name represents both the local image name and the remote image repository.
 98 |           image_name=quay.io/wikipedialibrary/externallinks_cron
 99 |           branch_tag=${{ steps.branch.outputs.tag }}
100 |           commit_tag=${{ steps.commit.outputs.tag }}
101 | 
102 |           docker tag ${image_name}:latest ${image_name}:${branch_tag}
103 |           docker tag ${image_name}:latest ${image_name}:${commit_tag}
104 |           docker push ${image_name}:${branch_tag}
105 |           docker push ${image_name}:${commit_tag}
106 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | .idea
 3 | .vscode
 4 | .env
 5 | .env.*
 6 | .swp
 7 | 
 8 | # Python stuff
 9 | __pycache__/
10 | *.pyc
11 | 
12 | static/admin
13 | static/debug_toolbar
14 | static/django_extensions
15 | 
16 | db.json
17 | 
18 | .coverage
19 | htmlcov/
20 | 
21 | .DS_Store
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Dockerfile
 2 | FROM quay.io/wikipedialibrary/python:3.11-bullseye-updated AS eventstream
 3 | 
 4 | WORKDIR /app
 5 | ARG REQUIREMENTS_FILE
 6 | ENV REQUIREMENTS_FILE=${REQUIREMENTS_FILE:-django.txt}
 7 | COPY requirements/* /app/requirements/
 8 | RUN echo "Installing $REQUIREMENTS_FILE" && pip install -r /app/requirements/$REQUIREMENTS_FILE
 9 | RUN apt update && apt install -y default-mysql-client && rm -rf /var/lib/apt/lists/* && rm -f /var/log/apt/*
10 | # This file only exists once the code directory is mounted by docker-compose.
11 | ENTRYPOINT ["/app/bin/django_wait_for_db.sh"]
12 | 
13 | FROM eventstream AS externallinks
14 | RUN pip install gunicorn
15 | 
16 | FROM eventstream AS cron
17 | RUN apt update && apt install -y cron && rm -rf /var/lib/apt/lists/* && rm -f /var/log/apt/*
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Wikimedia Foundation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/backup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | from datetime import datetime
 4 | from dotenv import load_dotenv
 5 | from filelock import FileLock
 6 | import subprocess
 7 | import os
 8 | 
 9 | 
10 | def backup(args):
11 |     ## Dump and gzip DB
12 |     date = datetime.today().strftime("%Y%m%d")
13 |     print("Backing up database.")
14 |     filename = "/app/backup/{}.sql.gz".format(date)
15 |     extra_opts = ""
16 |     if args.missing_only:
17 |         extra_opts = "--insert-ignore --no-create-info --skip-opt"
18 |         filename = "/app/backup/{}.missing-only.sql.gz".format(date)
19 |     command = 'nice -n 5 bash -c "mysqldump {extra_opts} --skip-comments -h db -u root -p{mysql_root_password} {mysql_database} | gzip > {filename}"'.format(
20 |         extra_opts=extra_opts,
21 |         mysql_root_password=os.environ["MYSQL_ROOT_PASSWORD"],
22 |         mysql_database=os.environ["MYSQL_DATABASE"],
23 |         filename=filename,
24 |     )
25 |     subprocess.run(command, shell=True, check=True)
26 | 
27 |     ## `root:wikidev` only; using IDs instead of names to avoid problems in localdev
28 |     os.chown(filename, 0, 500)
29 |     os.chmod(filename, 0o640)
30 | 
31 |     print("Finished backup.")
32 | 
33 | 
34 | def clean():
35 |     # Retain backups for 14 days.
36 |     subprocess.run(
37 |         'find /app/backup -name "*.sql.gz" -mtime +14 -delete || :',
38 |         shell=True,
39 |         check=True,
40 |     )
41 |     print("Removed backups created 14 days ago or more.")
42 | 
43 | 
44 | def main():
45 |     load_dotenv(".env")
46 |     parser = argparse.ArgumentParser(description="externallinks compressed backup")
47 |     parser.add_argument("--missing_only", action="store_true")
48 |     args = parser.parse_args()
49 | 
50 |     # Use a lockfile to prevent overruns.
51 |     lockfile = "/tmp/backup.lock"
52 |     lock = FileLock(lockfile)
53 |     lock.acquire()
54 |     try:
55 |         backup(args)
56 |         clean()
57 |     finally:
58 |         lock.release()
59 |         os.remove(lockfile)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/backup/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/bin/clean.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | PATH=/usr/local/bin:/usr/bin:/bin:/sbin:/app/bin:$PATH
 6 | 
 7 | if /app/bin/django_wait_for_db.sh
 8 | then
 9 |     echo "This will drop all tables in ${MYSQL_DATABASE}. Proceed [y/N]?"
10 |     read -p "This will drop all tables in ${MYSQL_DATABASE}. Proceed [y/N]?" -n 1 -r
11 |     echo ""
12 |     if [[ ! $REPLY =~ ^[Yy]$ ]]
13 |     then
14 |         echo "Exiting..."
15 |         exit
16 |     fi
17 |     mysql_cmd="mysql -h db -u root -p${MYSQL_ROOT_PASSWORD} -D ${MYSQL_DATABASE}"
18 |     # Build an SQL statement for dropping every table
19 |     concat_fragment="GROUP_CONCAT('DROP TABLE IF EXISTS ', table_name SEPARATOR ';')"
20 |     get_tables_query="SELECT ${concat_fragment} FROM information_schema.tables WHERE table_schema = '${MYSQL_DATABASE}';"
21 |     drop_query=$(echo ${get_tables_query} | ${mysql_cmd})
22 |     drop_query=${drop_query/$concat_fragment/}
23 |     drop_query=${drop_query//[$'\r\n']}
24 |     if [ "$drop_query" == "NULL" ]
25 |     then
26 |         echo "No tables to drop."
27 |         exit
28 |     fi
29 |     drop_query="SET FOREIGN_KEY_CHECKS = 0;${drop_query};SET FOREIGN_KEY_CHECKS = 1;"
30 |     echo "Dropping tables."
31 |     echo ${drop_query}
32 |     nice -n 5 bash -c "echo \"${drop_query}\" | ${mysql_cmd}"
33 | 
34 |     echo "Tables dropped."
35 | else
36 |     exit 1
37 | fi
38 | 
39 | 


--------------------------------------------------------------------------------
/bin/cron.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # copy crontab, set permissions, and start cron
 3 | set -eo pipefail
 4 | PATH=/usr/local/bin:/usr/bin:/bin:/sbin:/app/bin:$PATH
 5 | if /app/bin/django_wait_for_db.sh
 6 | then
 7 |     cp /app/crontab /etc/crontab
 8 |     # `root:wikidev` only; using IDs instead of names to avoid problems in localdev
 9 |     chown 0:500 /etc/crontab
10 |     chmod 640 /etc/crontab
11 |     echo "Starting cron."
12 |     cron -f -L 8
13 | else
14 |     echo "ERROR: couldn't start cron."
15 |     exit 1
16 | fi
17 | 


--------------------------------------------------------------------------------
/bin/cronlog.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # A simple wrapper to redirect cron STDOUT & STDERR to docker logs
3 | set -eo pipefail
4 | PATH=/usr/local/bin:/usr/bin:/bin:/sbin:/app/bin:$PATH
5 | cd /app
6 | bash "$@">/proc/1/fd/1 2>/proc/1/fd/2
7 | 


--------------------------------------------------------------------------------
/bin/django_wait_for_db.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Try to get a db shell
 4 | db_init_wait=0
 5 | db_init_timeout=60
 6 | function connect() {
 7 |     connect=$(echo 'exit' | python manage.py dbshell 2>&1 >/dev/null)
 8 |     if ${connect} 2>/dev/null
 9 |     then
10 |         true
11 |     else
12 |         echo ${connect} | sed -e "s/'--\(user\|password\)=[^']*'/'--\1=******'/g" >/tmp/externallink_db_connect
13 |         false
14 |     fi
15 | }
16 | 
17 | until connect || [ $db_init_wait -eq $db_init_timeout ]
18 | do
19 |     >&2 echo "Waiting for DB."
20 |     sleep 1
21 |     db_init_wait=$(( $db_init_wait + 1 ))
22 | done
23 | 
24 | if [ $db_init_wait -lt $db_init_timeout ]
25 | then
26 |     >&2 echo "DB up."
27 |     rm /tmp/externallink_db_connect 2>/dev/null || :
28 | 	exec "$@"
29 | else
30 |     cat /tmp/externallink_db_connect
31 |     rm /tmp/externallink_db_connect 2>/dev/null || :
32 |     exit 1
33 | fi
34 | 


--------------------------------------------------------------------------------
/bin/example_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "Creating Programs"
4 | python /app/manage.py programs_example_data 10
5 | echo "Creating LinkSearchTotals"
6 | python /app/manage.py linksearchtotal_example_data 60
7 | echo "Creating LinkEvents"
8 | python /app/manage.py linkevent_example_data 10000
9 | echo "Done"


--------------------------------------------------------------------------------
/bin/gunicorn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | python manage.py migrate
 4 | python manage.py collectstatic --noinput
 5 | 
 6 | exec gunicorn extlinks.wsgi:application \
 7 |     --name extlinks_django \
 8 |     --bind 0.0.0.0:8000 \
 9 |     --worker-class gthread \
10 |     --workers 7 \
11 |     --threads 1 \
12 |     --timeout 30 \
13 |     --backlog 2048 \
14 |     --log-level=info \
15 |     --reload \
16 | "$@"
17 | 


--------------------------------------------------------------------------------
/bin/restore.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | PATH=/usr/local/bin:/usr/bin:/bin:/sbin:/app/bin:$PATH
 6 | 
 7 | restore_file=${1}
 8 | 
 9 | if /app/bin/django_wait_for_db.sh
10 | then
11 | 
12 |     echo "This may drop the DB. Proceed [y/N]?"
13 |     read -p "This may drop the DB. Proceed [y/N]?" -n 1 -r
14 |     echo ""
15 |     if [[ ! $REPLY =~ ^[Yy]$ ]]
16 |     then
17 |         echo "Exiting..."
18 |         exit
19 |     fi
20 | 
21 |     echo "Importing backup DB."
22 |     nice -n 5 bash -c "gunzip -c ${restore_file} | mysql -h db -u root -p${MYSQL_ROOT_PASSWORD} -D ${MYSQL_DATABASE}"
23 | 
24 |     ## Run any necessary DB operations.
25 |     python /app/manage.py migrate
26 | 
27 |     echo "Finished restore."
28 | else
29 |     exit 1
30 | fi
31 | 
32 | 


--------------------------------------------------------------------------------
/bin/swarm_update.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # One level up from this script, which should be the root of this repo.
 4 | dir=$(dirname $(readlink -f $0))/..
 5 | 
 6 | # Gets a value from a dotenv file at .env.
 7 | function default () {
 8 |     grep ${1} ${dir}/.env | cut -d '=' -f2
 9 | }
10 | 
11 | # Pull updated image if available.
12 | function pull () {
13 |     image_name=${1}
14 |     tag=${2}
15 |     target=${image_name}:${tag}
16 |     
17 |     # Check for newer image
18 |     pull=$(docker pull ${target})
19 | 
20 |     # Pull swarm config updates and update the stack if there is a new image.
21 |     if echo ${pull} | grep "Status: Downloaded newer image for ${target}" >/dev/null
22 |     then
23 |        echo "${target} updated"
24 | 
25 |     # Report if the local image is already up to date.
26 |     elif echo ${pull} | grep "Status: Image is up to date for ${target}" >/dev/null
27 |     then
28 |        echo "${target} already up to date"
29 | 
30 |     # Fail in any other circumstance.
31 |     else
32 |        echo "Error updating ${target}"
33 |        exit 1
34 |     fi
35 | }
36 | 
37 | # Take .env values or arguments.
38 | env=${1:-$(default ENV)}
39 | externallinks_tag=${2:-$(default EXTERNALLINKS_TAG)}
40 | eventstream_tag=${3:-$(default EVENTSTREAM_TAG)}
41 | 
42 | if [ -z "$env" ] || [ -z "$externallinks_tag" ] || [ -z "$eventstream_tag" ]
43 | then
44 |     echo "Usage: swarm_update.sh \$env \$externallinks_tag \$eventstream_tag
45 |     \$env               docker swarm environment (eg. staging | production).
46 |     \$externallinks_tag docker hub image tag (eg. branch_staging | branch_production | latest)
47 |     \$eventstream_tag   docker hub image tag (eg. branch_staging | branch_production | latest)"
48 |     exit 1;
49 | fi
50 | 
51 | # Pull image updates.
52 | pull quay.io/wikipedialibrary/externallinks ${externallinks_tag}
53 | pull quay.io/wikipedialibrary/eventstream ${eventstream_tag}
54 | 
55 | # Update repository for updates to code or to the swarm deployment itself.
56 | git -C ${dir} pull
57 | 
58 | # Deploy the updates.
59 | docker stack deploy -c <(cd ${dir}; docker-compose config 2>/dev/null) ${env}
60 | 


--------------------------------------------------------------------------------
/crontab:
--------------------------------------------------------------------------------
 1 | # /etc/crontab: system-wide crontab
 2 | # Unlike any other crontab you don't have to run the `crontab'
 3 | # command to install the new version when you edit this file
 4 | # and files in /etc/cron.d. These files also have username fields,
 5 | # that none of the other crontabs do.
 6 | 
 7 | SHELL=/app/bin/cronlog.sh
 8 | PATH=/app/bin:/usr/bin:/bin
 9 | 
10 | # Example of job definition:
11 | #.---------------- minute (0 - 59)
12 | #|	.------------- hour (0 - 23)
13 | #|	|	.---------- day of month (1 - 31)
14 | #|	|	|	.------- month (1 - 12) OR jan,feb,mar,apr ...
15 | #|	|	|	|	.---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
16 | #|	|	|	|	|
17 | #*	*	*	*	*	user	command to be executed
18 | 30	6	*/2	*	*	root	python backup.py
19 | # from extlinks/aggregates/cron.py
20 | # daily
21 | 0	0	*	*	*	root	python manage.py fill_link_aggregates
22 | 5	0	*	*	*	root	python manage.py fill_user_aggregates
23 | 45	0	*	*	*	root	python manage.py fill_pageproject_aggregates
24 | 0	3	*	*	*	root	python manage.py fill_monthly_link_aggregates
25 | 10	3	*	*	*	root	python manage.py fill_monthly_user_aggregates
26 | 50	3	*	*	*	root	python manage.py fill_monthly_pageproject_aggregates
27 | # from extlinks/links/cron.py
28 | # weekly
29 | 10	5	*	*	1	root	python manage.py linksearchtotal_collect
30 | # daily
31 | 0	2	*	*	*	root	python manage.py linkevents_archive dump
32 | # from extlinks/organisations/cron.py
33 | # hourly (was every 65 minutes for some reason?)
34 | 5	*	*	*	*	root	python manage.py users_update_lists
35 | 
36 | 


--------------------------------------------------------------------------------
/db.cnf:
--------------------------------------------------------------------------------
 1 | [mysqld]
 2 | collation-server = utf8mb4_unicode_ci
 3 | init-connect = SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci
 4 | character-set-server = utf8mb4
 5 | max_allowed_packet=1024M
 6 | connect_timeout = 1200
 7 | net_read_timeout = 180
 8 | innodb_buffer_pool_size = 512M
 9 | 
10 | 


--------------------------------------------------------------------------------
/django_wait_for_migrations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Wrapper for Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | import logging
 6 | from time import sleep
 7 | 
 8 | try:
 9 |     from django.db.migrations.executor import MigrationExecutor
10 |     from django.db.utils import ConnectionHandler, DEFAULT_DB_ALIAS
11 | except ImportError as exc:
12 |     raise ImportError(
13 |         "Couldn't import Django. Are you sure it's installed and "
14 |         "available on your PYTHONPATH environment variable? Did you "
15 |         "forget to activate a virtual environment?"
16 |     ) from exc
17 | 
18 | 
19 | def db_migrated(database):
20 |     connections = ConnectionHandler()
21 |     connection = connections[database]
22 |     connection.prepare_database()
23 |     executor = MigrationExecutor(connection)
24 |     targets = executor.loader.graph.leaf_nodes()
25 |     return not executor.migration_plan(targets)
26 | 
27 | 
28 | def wait_for_migrations(args):
29 |     try:
30 |         from django import setup
31 |         from django.core.management import execute_from_command_line
32 |     except ImportError as exc:
33 |         raise ImportError(
34 |             "Couldn't import Django. Are you sure it's installed and "
35 |             "available on your PYTHONPATH environment variable? Did you "
36 |             "forget to activate a virtual environment?"
37 |         ) from exc
38 |     setup()
39 |     logger = logging.getLogger(__name__)
40 |     wait = 0
41 |     # Unapplied migrations found.
42 |     while not db_migrated(DEFAULT_DB_ALIAS):
43 |         logger.info("Unapplied migrations found.")
44 |         sleep(1)
45 |         wait += 1
46 | 
47 |         if wait > 30:
48 |             raise Exception("Migration timeout")
49 | 
50 |     # All migrations have been applied.
51 |     if db_migrated(DEFAULT_DB_ALIAS):
52 |         logger.info("All migrations have been applied.")
53 |         execute_from_command_line(args)
54 |     else:
55 |         raise Exception("Unknown error.")
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     wait_for_migrations(sys.argv)
60 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | ---
  2 | version: "3.8"
  3 | volumes:
  4 |   mysql:
  5 | 
  6 | services:
  7 |   externallinks:
  8 |     image: quay.io/wikipedialibrary/externallinks:${EXTERNALLINKS_TAG}
  9 |     build:
 10 |       context: .
 11 |       target: externallinks
 12 |     env_file:
 13 |       - ".env"
 14 |     depends_on:
 15 |       - db
 16 |     ports:
 17 |       - "8000:8000"
 18 |     command: ["/app/bin/gunicorn.sh"]
 19 |     volumes:
 20 |       - type: bind
 21 |         source: ./
 22 |         target: /app
 23 |       - type: bind
 24 |         source: ${HOST_BACKUP_DIR}
 25 |         target: /app/backup
 26 |     deploy:
 27 |       resources:
 28 |         reservations:
 29 |           cpus: "0.25"
 30 |           memory: "384M"
 31 |   crons:
 32 |     image: quay.io/wikipedialibrary/externallinks_cron:${EXTERNALLINKS_TAG}
 33 |     build:
 34 |       context: .
 35 |       target: cron
 36 |     env_file:
 37 |       - ".env"
 38 |     depends_on:
 39 |       - db
 40 |     command: ["/app/bin/cron.sh"]
 41 |     volumes:
 42 |       - type: bind
 43 |         source: ./
 44 |         target: /app
 45 |       - type: bind
 46 |         source: ${HOST_BACKUP_DIR}
 47 |         target: /app/backup
 48 |     deploy:
 49 |       resources:
 50 |         reservations:
 51 |           cpus: "0.25"
 52 |           memory: "384M"
 53 |   db:
 54 |     image: quay.io/wikipedialibrary/mariadb:10-updated
 55 |     env_file:
 56 |       - ".env"
 57 |     ports:
 58 |       - "3306:3306"
 59 |     volumes:
 60 |       - type: volume
 61 |         source: mysql
 62 |         target: /var/lib/mysql
 63 |         volume: {}
 64 |       - type: bind
 65 |         source: ./db.cnf
 66 |         target: /etc/mysql/conf.d/db.cnf
 67 |     healthcheck:
 68 |       test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-plinks"]
 69 |       timeout: 20s
 70 |       interval: 10s
 71 |       retries: 10
 72 |     deploy:
 73 |       resources:
 74 |         reservations:
 75 |           cpus: "0.5"
 76 |           memory: "2.5G"
 77 |   nginx:
 78 |     image: quay.io/wikipedialibrary/nginx:latest-updated
 79 |     volumes:
 80 |       - type: volume
 81 |         target: /var/lib/nginx/cache
 82 |       - type: bind
 83 |         source: ./nginx.conf
 84 |         target: /etc/nginx/conf.d/default.conf
 85 |       - type: bind
 86 |         source: ./robots.txt
 87 |         target: /app/robots.txt
 88 |       - type: bind
 89 |         source: ./static
 90 |         target: /app/static
 91 |     ports:
 92 |       - "80:80"
 93 |     depends_on:
 94 |       - externallinks
 95 |     deploy:
 96 |       resources:
 97 |         reservations:
 98 |           cpus: "0.25"
 99 |           memory: "32M"
100 |   eventstream:
101 |     image: quay.io/wikipedialibrary/eventstream:${EVENTSTREAM_TAG}
102 |     build:
103 |       context: .
104 |       target: eventstream
105 |     depends_on:
106 |       - db
107 |     env_file:
108 |       - ".env"
109 |     command:
110 |       [
111 |         "python",
112 |         "django_wait_for_migrations.py",
113 |         "linkevents_collect",
114 |         "--historical",
115 |       ]
116 |     volumes:
117 |       - type: bind
118 |         source: ./
119 |         target: /app
120 |     deploy:
121 |       resources:
122 |         reservations:
123 |           cpus: "0.25"
124 |           memory: "48M"
125 |   cache:
126 |     image: quay.io/wikipedialibrary/memcached:latest
127 |     ports:
128 |       - "11211:11211"
129 |     entrypoint:
130 |       - memcached
131 |     depends_on:
132 |       - externallinks
133 |     deploy:
134 |       resources:
135 |         reservations:
136 |           cpus: "0.25"
137 |           memory: "64M"
138 | 


--------------------------------------------------------------------------------
/docs/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | [//]: # (Thank you for uploading a PR to Wikilinks (externallinks)!)
 2 | 
 3 | ## Description
 4 | Describe your changes in detail following the [commit message guidelines](https://www.mediawiki.org/wiki/Gerrit/Commit_message_guidelines)
 5 | 
 6 | ## Rationale
 7 | [//]: # (Why is this change required? What problem does it solve?)
 8 | 
 9 | ## Phabricator Ticket
10 | [//]: # (Link to the Phabricator ticket)
11 | 
12 | ## How Has This Been Tested?
13 | [//]: # (- Did you add tests to your changes? Did you modify tests to accommodate your changes?)
14 | [//]: # (- Can this change be tested manually? How?)
15 | 
16 | ## Screenshots of your changes (if appropriate):
17 | [//]: # (It can also be a GIF to prove that your changes are working)
18 | 
19 | ## Types of changes
20 | What types of changes does your code introduce? Add an `x` in all the boxes that apply:
21 | - [ ] Bug fix (non-breaking change which fixes an issue)
22 | - [ ] New feature (non-breaking change which adds functionality)
23 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
24 | 


--------------------------------------------------------------------------------
/extlinks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/__init__.py


--------------------------------------------------------------------------------
/extlinks/aggregates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/aggregates/__init__.py


--------------------------------------------------------------------------------
/extlinks/aggregates/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | 
 3 | from .models import LinkAggregate, UserAggregate, PageProjectAggregate
 4 | 
 5 | 
 6 | class LinkAggregateAdmin(admin.ModelAdmin):
 7 |     list_display = (
 8 |         "organisation",
 9 |         "collection",
10 |         "full_date",
11 |         "total_links_added",
12 |         "total_links_removed",
13 |         "on_user_list",
14 |     )
15 |     list_filter = ("organisation", "collection", "month", "year", "on_user_list")
16 |     list_select_related = ["organisation", "collection"]
17 | 
18 | 
19 | admin.site.register(LinkAggregate, LinkAggregateAdmin)
20 | 
21 | 
22 | class UserAggregateAdmin(admin.ModelAdmin):
23 |     list_display = (
24 |         "organisation",
25 |         "collection",
26 |         "username",
27 |         "full_date",
28 |         "total_links_added",
29 |         "total_links_removed",
30 |         "on_user_list",
31 |     )
32 |     list_filter = ("organisation", "collection", "month", "year", "on_user_list")
33 |     list_select_related = ["organisation", "collection"]
34 | 
35 | admin.site.register(UserAggregate, UserAggregateAdmin)
36 | 
37 | 
38 | class PageProjectAggregateAdmin(admin.ModelAdmin):
39 |     list_display = (
40 |         "organisation",
41 |         "collection",
42 |         "project_name",
43 |         "page_name",
44 |         "full_date",
45 |         "total_links_added",
46 |         "total_links_removed",
47 |         "on_user_list",
48 |     )
49 |     list_filter = ("organisation", "collection", "month", "year", "on_user_list")
50 |     list_select_related = ["organisation", "collection"]
51 | 
52 | admin.site.register(PageProjectAggregate, PageProjectAggregateAdmin)
53 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/factories.py:
--------------------------------------------------------------------------------
 1 | import factory
 2 | import random
 3 | import datetime
 4 | 
 5 | from .models import LinkAggregate, UserAggregate, PageProjectAggregate
 6 | from extlinks.organisations.factories import CollectionFactory, OrganisationFactory
 7 | 
 8 | 
 9 | class LinkAggregateFactory(factory.django.DjangoModelFactory):
10 |     class Meta:
11 |         model = LinkAggregate
12 |         strategy = factory.CREATE_STRATEGY
13 | 
14 |     organisation = factory.SubFactory(OrganisationFactory)
15 |     collection = factory.SubFactory(CollectionFactory)
16 |     full_date = factory.Faker(
17 |         "date_between_dates",
18 |         date_start=datetime.date(2017, 1, 1),
19 |         date_end=datetime.date(2020, 10, 31),
20 |     )
21 |     total_links_added = random.randint(0, 100)
22 |     total_links_removed = random.randint(0, 80)
23 | 
24 | 
25 | class UserAggregateFactory(factory.django.DjangoModelFactory):
26 |     class Meta:
27 |         model = UserAggregate
28 |         strategy = factory.CREATE_STRATEGY
29 | 
30 |     organisation = factory.SubFactory(OrganisationFactory)
31 |     collection = factory.SubFactory(CollectionFactory)
32 |     username = factory.Sequence(lambda n: 'user%d' % n)
33 |     full_date = factory.Faker(
34 |         "date_between_dates",
35 |         date_start=datetime.date(2017, 1, 1),
36 |         date_end=datetime.date(2020, 10, 31),
37 |     )
38 |     total_links_added = random.randint(0, 100)
39 |     total_links_removed = random.randint(0, 80)
40 | 
41 | 
42 | class PageProjectAggregateFactory(factory.django.DjangoModelFactory):
43 |     class Meta:
44 |         model = PageProjectAggregate
45 |         strategy = factory.CREATE_STRATEGY
46 | 
47 |     organisation = factory.SubFactory(OrganisationFactory)
48 |     collection = factory.SubFactory(CollectionFactory)
49 |     project_name = factory.Faker("word")
50 |     page_name = factory.Faker("word")
51 |     full_date = factory.Faker(
52 |         "date_between_dates",
53 |         date_start=datetime.date(2017, 1, 1),
54 |         date_end=datetime.date(2020, 10, 31),
55 |     )
56 |     total_links_added = random.randint(0, 100)
57 |     total_links_removed = random.randint(0, 80)
58 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/aggregates/management/__init__.py


--------------------------------------------------------------------------------
/extlinks/aggregates/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/aggregates/management/commands/__init__.py


--------------------------------------------------------------------------------
/extlinks/aggregates/management/commands/archive_link_aggregates.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | from django.db import models
 3 | from extlinks.aggregates.management.helpers import AggregateArchiveCommand
 4 | from extlinks.aggregates.models import LinkAggregate
 5 | 
 6 | 
 7 | class Command(AggregateArchiveCommand):
 8 |     """
 9 |     This command archives data from the 'aggregates_linkaggregate' table.
10 |     """
11 | 
12 |     help = "Dump & delete or load data from the LinkAggregate table"
13 |     name = "LinkAggregate"
14 | 
15 |     def get_model(self) -> Type[models.Model]:
16 |         return LinkAggregate
17 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/management/commands/archive_pageproject_aggregates.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | from django.db import models
 3 | from extlinks.aggregates.management.helpers import AggregateArchiveCommand
 4 | from extlinks.aggregates.models import PageProjectAggregate
 5 | 
 6 | 
 7 | class Command(AggregateArchiveCommand):
 8 |     """
 9 |     This command archives data from the 'aggregates_pageprojectaggregate' table.
10 |     """
11 | 
12 |     help = "Dump & delete or load data from the PageProjectAggregate table"
13 |     name = "PageProjectAggregate"
14 | 
15 |     def get_model(self) -> Type[models.Model]:
16 |         return PageProjectAggregate
17 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/management/commands/archive_user_aggregates.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | from django.db import models
 3 | from extlinks.aggregates.management.helpers import AggregateArchiveCommand
 4 | from extlinks.aggregates.models import UserAggregate
 5 | 
 6 | 
 7 | class Command(AggregateArchiveCommand):
 8 |     """
 9 |     This command archives data from the 'aggregates_useraggregate' table.
10 |     """
11 | 
12 |     help = "Dump & delete or load data from the UserAggregate table"
13 |     name = "UserAggregate"
14 | 
15 |     def get_model(self) -> Type[models.Model]:
16 |         return UserAggregate
17 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/management/helpers/__init__.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import json
 3 | import os
 4 | 
 5 | from typing import Union
 6 | 
 7 | from django.core.serializers import serialize
 8 | 
 9 | from extlinks.aggregates.models import (
10 |     LinkAggregate,
11 |     PageProjectAggregate,
12 |     UserAggregate,
13 | )
14 | from extlinks.aggregates.management.helpers.aggregate_archive_command import (
15 |     AggregateArchiveCommand,
16 | )
17 | 
18 | 
19 | def decode_archive(filename: str):
20 |     """
21 |     Loads and decompresses the given archive file.
22 |     """
23 | 
24 |     with gzip.open(filename, "rt", encoding="utf-8") as archive:
25 |         return json.loads(archive.read())
26 | 
27 | 
28 | def validate_aggregate_archive(
29 |     aggregate_type: str,
30 |     aggregate: Union[LinkAggregate, UserAggregate, PageProjectAggregate],
31 |     output_dir: str,
32 | ) -> bool:
33 |     """
34 |     Validates that the gvien aggregate has a matching archive file.
35 |     """
36 | 
37 |     on_user_list = "1" if aggregate.on_user_list else "0"
38 |     filename = f"aggregates_{aggregate_type}_{aggregate.organisation.id}_{aggregate.collection.id}_{aggregate.full_date}_{on_user_list}.json.gz"
39 |     archive_path = os.path.join(output_dir, filename)
40 | 
41 |     if not os.path.isfile(archive_path):
42 |         return False
43 | 
44 |     archive_json = decode_archive(archive_path)
45 |     link_aggregate_json = json.loads(serialize("json", [aggregate]))
46 | 
47 |     return link_aggregate_json == archive_json
48 | 
49 | 
50 | def validate_link_aggregate_archive(
51 |     link_aggregate: LinkAggregate, output_dir: str
52 | ) -> bool:
53 |     """
54 |     Validates that the given LinkAggregate has a matching archive file.
55 |     """
56 | 
57 |     return validate_aggregate_archive("linkaggregate", link_aggregate, output_dir)
58 | 
59 | 
60 | def validate_user_aggregate_archive(
61 |     user_aggregate: UserAggregate, output_dir: str
62 | ) -> bool:
63 |     """
64 |     Validates that the given UserAggregate has a matching archive file.
65 |     """
66 | 
67 |     return validate_aggregate_archive("useraggregate", user_aggregate, output_dir)
68 | 
69 | 
70 | def validate_pageproject_aggregate_archive(
71 |     pageproject_aggregate: PageProjectAggregate, output_dir: str
72 | ) -> bool:
73 |     """
74 |     Validates that the given PageProjectAggregate has a matching archive file.
75 |     """
76 | 
77 |     return validate_aggregate_archive(
78 |         "pageprojectaggregate", pageproject_aggregate, output_dir
79 |     )
80 | 
81 | 
82 | __all__ = ["AggregateArchiveCommand"]
83 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.3 on 2020-11-10 00:50
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("organisations", "0006_auto_20190730_1355"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="LinkAggregate",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.AutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("day", models.PositiveIntegerField()),
26 |                 ("month", models.PositiveIntegerField()),
27 |                 ("year", models.PositiveIntegerField()),
28 |                 ("full_date", models.DateField()),
29 |                 ("total_links_added", models.PositiveIntegerField()),
30 |                 ("total_links_removed", models.PositiveIntegerField()),
31 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
32 |                 ("updated_at", models.DateTimeField(auto_now=True)),
33 |                 (
34 |                     "collection",
35 |                     models.ForeignKey(
36 |                         on_delete=django.db.models.deletion.CASCADE,
37 |                         to="organisations.collection",
38 |                     ),
39 |                 ),
40 |                 (
41 |                     "organisation",
42 |                     models.ForeignKey(
43 |                         default=None,
44 |                         on_delete=django.db.models.deletion.CASCADE,
45 |                         to="organisations.organisation",
46 |                     ),
47 |                 ),
48 |             ],
49 |         ),
50 |     ]
51 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0002_useraggregate.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.3 on 2020-11-16 18:58
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("organisations", "0006_auto_20190730_1355"),
11 |         ("aggregates", "0001_initial"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.CreateModel(
16 |             name="UserAggregate",
17 |             fields=[
18 |                 (
19 |                     "id",
20 |                     models.AutoField(
21 |                         auto_created=True,
22 |                         primary_key=True,
23 |                         serialize=False,
24 |                         verbose_name="ID",
25 |                     ),
26 |                 ),
27 |                 ("username", models.CharField(max_length=235)),
28 |                 ("day", models.PositiveIntegerField()),
29 |                 ("month", models.PositiveIntegerField()),
30 |                 ("year", models.PositiveIntegerField()),
31 |                 ("full_date", models.DateField()),
32 |                 ("total_links_added", models.PositiveIntegerField()),
33 |                 ("total_links_removed", models.PositiveIntegerField()),
34 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
35 |                 ("updated_at", models.DateTimeField(auto_now=True)),
36 |                 (
37 |                     "collection",
38 |                     models.ForeignKey(
39 |                         on_delete=django.db.models.deletion.CASCADE,
40 |                         to="organisations.collection",
41 |                     ),
42 |                 ),
43 |                 (
44 |                     "organisation",
45 |                     models.ForeignKey(
46 |                         on_delete=django.db.models.deletion.CASCADE,
47 |                         to="organisations.organisation",
48 |                     ),
49 |                 ),
50 |             ],
51 |         ),
52 |     ]
53 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0003_add_indexes.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.3 on 2020-11-25 20:12
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("aggregates", "0002_useraggregate"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddIndex(
14 |             model_name="linkaggregate",
15 |             index=models.Index(
16 |                 fields=["full_date"], name="aggregates__full_da_865352_idx"
17 |             ),
18 |         ),
19 |         migrations.AddIndex(
20 |             model_name="linkaggregate",
21 |             index=models.Index(
22 |                 fields=["collection"], name="aggregates__collect_1c3986_idx"
23 |             ),
24 |         ),
25 |         migrations.AddIndex(
26 |             model_name="useraggregate",
27 |             index=models.Index(
28 |                 fields=["full_date"], name="aggregates__full_da_3a3ae3_idx"
29 |             ),
30 |         ),
31 |         migrations.AddIndex(
32 |             model_name="useraggregate",
33 |             index=models.Index(
34 |                 fields=["collection"], name="aggregates__collect_53b6a2_idx"
35 |             ),
36 |         ),
37 |     ]
38 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0004_pageprojectaggregate.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.3 on 2020-11-26 00:58
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("organisations", "0006_auto_20190730_1355"),
11 |         ("aggregates", "0003_add_indexes"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.CreateModel(
16 |             name="PageProjectAggregate",
17 |             fields=[
18 |                 (
19 |                     "id",
20 |                     models.AutoField(
21 |                         auto_created=True,
22 |                         primary_key=True,
23 |                         serialize=False,
24 |                         verbose_name="ID",
25 |                     ),
26 |                 ),
27 |                 ("project_name", models.CharField(max_length=32)),
28 |                 ("page_name", models.CharField(max_length=255)),
29 |                 ("day", models.PositiveIntegerField()),
30 |                 ("month", models.PositiveIntegerField()),
31 |                 ("year", models.PositiveIntegerField()),
32 |                 ("full_date", models.DateField()),
33 |                 ("total_links_added", models.PositiveIntegerField()),
34 |                 ("total_links_removed", models.PositiveIntegerField()),
35 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
36 |                 ("updated_at", models.DateTimeField(auto_now=True)),
37 |                 (
38 |                     "collection",
39 |                     models.ForeignKey(
40 |                         on_delete=django.db.models.deletion.CASCADE,
41 |                         to="organisations.collection",
42 |                     ),
43 |                 ),
44 |                 (
45 |                     "organisation",
46 |                     models.ForeignKey(
47 |                         on_delete=django.db.models.deletion.CASCADE,
48 |                         to="organisations.organisation",
49 |                     ),
50 |                 ),
51 |             ],
52 |         ),
53 |         migrations.AddIndex(
54 |             model_name="pageprojectaggregate",
55 |             index=models.Index(
56 |                 fields=["full_date"], name="aggregates__full_da_6cc1a0_idx"
57 |             ),
58 |         ),
59 |         migrations.AddIndex(
60 |             model_name="pageprojectaggregate",
61 |             index=models.Index(
62 |                 fields=["collection"], name="aggregates__collect_35e404_idx"
63 |             ),
64 |         ),
65 |     ]
66 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0005_add_organisation_index.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.3 on 2020-12-03 22:55
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ('organisations', '0006_auto_20190730_1355'),
11 |         ('aggregates', '0004_pageprojectaggregate'),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name='linkaggregate',
17 |             name='organisation',
18 |             field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='organisations.organisation'),
19 |         ),
20 |         migrations.AddIndex(
21 |             model_name='linkaggregate',
22 |             index=models.Index(fields=['organisation'], name='aggregates__organis_c142ff_idx'),
23 |         ),
24 |         migrations.AddIndex(
25 |             model_name='pageprojectaggregate',
26 |             index=models.Index(fields=['organisation'], name='aggregates__organis_dc6018_idx'),
27 |         ),
28 |         migrations.AddIndex(
29 |             model_name='useraggregate',
30 |             index=models.Index(fields=['organisation'], name='aggregates__organis_3955fd_idx'),
31 |         ),
32 |     ]
33 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0006_delete_aggregate_tables_info.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations
 2 | 
 3 | 
 4 | def delete_information_aggregate_tables(apps, schema_editor):
 5 |     LinkAggregate = apps.get_model("aggregates", "LinkAggregate")
 6 |     PageProjectAggregate = apps.get_model("aggregates", "PageProjectAggregate")
 7 |     UserAggregate = apps.get_model("aggregates", "UserAggregate")
 8 | 
 9 |     LinkAggregate.objects.all().delete()
10 |     PageProjectAggregate.objects.all().delete()
11 |     UserAggregate.objects.all().delete()
12 | 
13 | 
14 | class Migration(migrations.Migration):
15 | 
16 |     dependencies = [("aggregates", "0005_add_organisation_index")]
17 | 
18 |     operations = [migrations.RunPython(delete_information_aggregate_tables)]
19 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0007_add_user_list_flags.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.4 on 2020-12-23 23:59
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("aggregates", "0006_delete_aggregate_tables_info"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="linkaggregate",
15 |             name="on_user_list",
16 |             field=models.BooleanField(default=False),
17 |         ),
18 |         migrations.AddField(
19 |             model_name="pageprojectaggregate",
20 |             name="on_user_list",
21 |             field=models.BooleanField(default=False),
22 |         ),
23 |         migrations.AddField(
24 |             model_name="useraggregate",
25 |             name="on_user_list",
26 |             field=models.BooleanField(default=False),
27 |         ),
28 |     ]
29 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0008_alter_linkaggregate_id_alter_pageprojectaggregate_id_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.7 on 2023-11-07 19:28
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('aggregates', '0007_add_user_list_flags'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='linkaggregate',
15 |             name='id',
16 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name='pageprojectaggregate',
20 |             name='id',
21 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
22 |         ),
23 |         migrations.AlterField(
24 |             model_name='useraggregate',
25 |             name='id',
26 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
27 |         ),
28 |     ]
29 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0009_pageprojectaggregate_composite_index.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.19 on 2025-02-19 20:52
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('aggregates', '0008_alter_linkaggregate_id_alter_pageprojectaggregate_id_and_more'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddIndex(
14 |             model_name='pageprojectaggregate',
15 |             index=models.Index(fields=['full_date', 'collection_id', 'project_name', 'page_name'], name='aggregates__full_da_53fee7_idx'),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0010_add_aggregate_indexes.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.19 on 2025-02-27 17:24
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('aggregates', '0009_pageprojectaggregate_composite_index'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddIndex(
14 |             model_name='linkaggregate',
15 |             index=models.Index(fields=['organisation_id', 'collection_id', 'on_user_list', 'year', 'month'], name='aggregates__organis_286b0f_idx'),
16 |         ),
17 |         migrations.AddIndex(
18 |             model_name='pageprojectaggregate',
19 |             index=models.Index(fields=['organisation_id', 'collection_id', 'project_name', 'page_name', 'on_user_list', 'year', 'month'], name='aggregates__organis_c106e7_idx'),
20 |         ),
21 |         migrations.AddIndex(
22 |             model_name='useraggregate',
23 |             index=models.Index(fields=['organisation_id', 'collection_id', 'username', 'on_user_list', 'year', 'month'], name='aggregates__organis_318980_idx'),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/0011_aggregate_composite_indexes.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.19 on 2025-03-07 19:13
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("aggregates", "0010_add_aggregate_indexes"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddIndex(
14 |             model_name="pageprojectaggregate",
15 |             index=models.Index(
16 |                 fields=["organisation", "project_name"],
17 |                 name="aggregates__organis_572036_idx",
18 |             ),
19 |         ),
20 |         migrations.AddIndex(
21 |             model_name="useraggregate",
22 |             index=models.Index(
23 |                 fields=["organisation", "username"],
24 |                 name="aggregates__organis_05ef9a_idx",
25 |             ),
26 |         ),
27 |         migrations.AddIndex(
28 |             model_name="pageprojectaggregate",
29 |             index=models.Index(
30 |                 fields=["collection", "project_name", "page_name"],
31 |                 name="aggregates__collect_e1e227_idx",
32 |             ),
33 |         ),
34 |         migrations.AddIndex(
35 |             model_name="useraggregate",
36 |             index=models.Index(
37 |                 fields=["collection", "username"], name="aggregates__collect_463085_idx"
38 |             ),
39 |         ),
40 |     ]
41 | 


--------------------------------------------------------------------------------
/extlinks/aggregates/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/aggregates/migrations/__init__.py


--------------------------------------------------------------------------------
/extlinks/aggregates/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 | 
3 | # Create your views here.
4 | 


--------------------------------------------------------------------------------
/extlinks/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/common/__init__.py


--------------------------------------------------------------------------------
/extlinks/common/forms.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | from dateutil.relativedelta import relativedelta
 3 | 
 4 | from django import forms
 5 | 
 6 | 
 7 | class DateInput(forms.DateInput):
 8 |     # Creating a custom widget because default DateInput doesn't use
 9 |     # input type="date"
10 |     input_type = "date"
11 | 
12 | 
13 | class FilterForm(forms.Form):
14 | 
15 |     start_date = forms.DateField(
16 |         required=False,
17 |         label="Start date:",
18 |         widget=DateInput(attrs={"class": "form-control"}),
19 |     )
20 |     end_date = forms.DateField(
21 |         required=False,
22 |         label="End date:",
23 |         widget=DateInput(attrs={"class": "form-control"}),
24 |     )
25 | 
26 |     limit_to_user_list = forms.BooleanField(required=False)
27 | 
28 |     namespace_id = forms.IntegerField(
29 |         required=False,
30 |         label="Namespace ID:",
31 |         widget=forms.NumberInput(
32 |             attrs={"class": "form-control", "style": "width: 6rem;"}
33 |         ),
34 |     )
35 | 
36 |     exclude_bots = forms.BooleanField(required=False)
37 | 
38 |     def clean_start_date(self):
39 |         """
40 |         This is automatically called by Django when validating this field.
41 |         Modify the start date to return the first day of its month.
42 |         """
43 |         start_date = self.cleaned_data.get("start_date")
44 | 
45 |         if not start_date:
46 |             return None
47 | 
48 |         return start_date.replace(day=1)
49 | 
50 | 
51 |     def clean_end_date(self):
52 |         """
53 |         This is automatically called by Django when validating this field.
54 |         Modify the end date to return the last day of its month.
55 |         """
56 |         end_date = self.cleaned_data.get("end_date")
57 | 
58 |         if not end_date:
59 |             return None
60 | 
61 |         next_month = end_date.replace(day=1) + relativedelta(months=1)
62 | 
63 |         return next_month - timedelta(days=1)
64 | 


--------------------------------------------------------------------------------
/extlinks/common/helpers.py:
--------------------------------------------------------------------------------
  1 | from datetime import date, timedelta
  2 | from itertools import islice
  3 | 
  4 | from django.db.models import Avg, Q
  5 | from django.db.models.functions import TruncMonth
  6 | 
  7 | from logging import getLogger
  8 | 
  9 | logger = getLogger("django")
 10 | 
 11 | 
 12 | def get_month_average(average_data, check_date):
 13 |     for avg_data in average_data:
 14 |         if avg_data["month"] == check_date:
 15 |             return avg_data["average"]
 16 | 
 17 |     return 0
 18 | 
 19 | 
 20 | def get_linksearchtotal_data_by_time(queryset):
 21 |     """
 22 |     Calculates per-unit-time data from a queryset of LinkSearchTotal objects
 23 | 
 24 |     Given a queryset of LinkSearchTotal objects, returns the totals
 25 |     per month.
 26 | 
 27 |     Returns two lists: dates and totals
 28 |     """
 29 | 
 30 |     if queryset:
 31 |         earliest_date = queryset.earliest("date").date
 32 |         current_date = date.today()
 33 | 
 34 |         linksearch_data = []
 35 |         dates = []
 36 | 
 37 |         average_month_data = (
 38 |             queryset.annotate(month=TruncMonth("date"))
 39 |             .values("month")
 40 |             .annotate(average=Avg("total"))
 41 |         )
 42 | 
 43 |         while current_date >= earliest_date:
 44 |             month_first = current_date.replace(day=1)
 45 |             this_month_avg = get_month_average(average_month_data, month_first)
 46 | 
 47 |             linksearch_data.append(round(this_month_avg))
 48 |             dates.append(month_first.strftime("%Y-%m-%d"))
 49 | 
 50 |             # Figure out what the last month is regardless of today's date
 51 |             current_date = month_first - timedelta(days=1)
 52 | 
 53 |         # If a month has no data for some reason, we should use whatever
 54 |         # figure we have for the previous month, unless it is the current month
 55 |         for i, data in enumerate(linksearch_data):
 56 |             if data == 0 and i != len(linksearch_data) - 1:
 57 |                 linksearch_data[i] = linksearch_data[i + 1]
 58 | 
 59 |         return dates[::-1], linksearch_data[::-1]
 60 |     else:
 61 |         return [], []
 62 | 
 63 | 
 64 | def filter_linksearchtotals(queryset, filter_dict):
 65 |     """
 66 |     Adds filter conditions to a LinkSearchTotal queryset based on form results.
 67 | 
 68 |     queryset -- a LinkSearchTotal queryset
 69 |     filter_dict -- a dictionary of data from the user filter form
 70 | 
 71 |     Returns a queryset
 72 |     """
 73 |     if "start_date" in filter_dict:
 74 |         start_date = filter_dict["start_date"]
 75 |         if start_date:
 76 |             queryset = queryset.filter(date__gte=start_date)
 77 | 
 78 |     if "end_date" in filter_dict:
 79 |         end_date = filter_dict["end_date"]
 80 |         if end_date:
 81 |             queryset = queryset.filter(date__lte=end_date)
 82 | 
 83 |     return queryset
 84 | 
 85 | 
 86 | def build_queryset_filters(form_data, collection_or_organisations):
 87 |     """
 88 |     This function parses a filter dictionary and creates Q object to filter
 89 |     the aggregates tables by
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     form_data: dict
 94 |         If the filter form has valid filters, then there will be a dictionary
 95 |         to filter the aggregates tables by dates or if a user is part of a user
 96 |         list
 97 | 
 98 |     collection_or_organisations : dict
 99 |         A dictionary that will have either a collection or a set of
100 |         organisations to filter by.
101 | 
102 |     Returns
103 |     -------
104 |     Q : A Q object which will filter the aggregates queries
105 |     """
106 |     start_date = None
107 |     end_date = None
108 |     start_date_filter = Q()
109 |     end_date_filter = Q()
110 |     limit_to_user_list_filter = Q()
111 |     # The aggregates queries will always be filtered by organisation
112 |     if "organisations" in collection_or_organisations:
113 |         collection_or_organisation_filter = Q(
114 |             organisation__in=collection_or_organisations["organisations"]
115 |         )
116 |     elif "linkevents" in collection_or_organisations:
117 |         collection_or_organisation_filter = Q()
118 |     else:
119 |         collection_or_organisation_filter = Q(
120 |             collection=collection_or_organisations["collection"]
121 |         )
122 | 
123 |     if "start_date" in form_data:
124 |         start_date = form_data["start_date"]
125 |         if start_date:
126 |             if "linkevents" in collection_or_organisations:
127 |                 start_date_filter = Q(timestamp__gte=start_date)
128 |             else:
129 |                 start_date_filter = Q(full_date__gte=start_date)
130 |     if "end_date" in form_data:
131 |         end_date = form_data["end_date"]
132 |         # The end date must not be greater than today's date
133 |         if end_date:
134 |             if "linkevents" in collection_or_organisations:
135 |                 end_date_filter = Q(timestamp__lte=end_date)
136 |             else:
137 |                 end_date_filter = Q(full_date__lte=end_date)
138 | 
139 |     if "limit_to_user_list" in form_data:
140 |         limit_to_user_list = form_data["limit_to_user_list"]
141 |         if limit_to_user_list:
142 |             limit_to_user_list_filter = Q(on_user_list=True)
143 | 
144 |     if start_date and end_date:
145 |         # If the start date is greater tham the end date, it won't filter
146 |         # by date
147 |         if start_date >= end_date:
148 |             return collection_or_organisation_filter & limit_to_user_list_filter
149 | 
150 |     return (
151 |         collection_or_organisation_filter
152 |         & limit_to_user_list_filter
153 |         & start_date_filter
154 |         & end_date_filter
155 |     )
156 | 
157 | 
158 | def batch_iterator(iterable, size=1000):
159 |     """
160 |     This yields successive batches from an iterable (memory-efficient).
161 | 
162 |     Used for large queries that use `.iterator()` for efficiency.
163 |     Instead of loading all data into memory at once, this function
164 |     retrieves items lazily in fixed-size batches.
165 | 
166 |     Parameters
167 |     ----------
168 |     iterable : Iterator
169 |         An iterable object, typically a Django QuerySet with `.iterator()`,
170 |         that returns items one by one in a memory-efficient manner.
171 | 
172 |     size : int
173 |         The maximum number of items to include in each batch.
174 | 
175 |     Returns
176 |     -------
177 |     Iterator[List]
178 |         An iterator that yields lists containing at most `size` items
179 |         per batch.
180 |     """
181 |     iterator = iter(iterable)
182 |     while batch := list(islice(iterator, size)):
183 |         yield batch
184 | 


--------------------------------------------------------------------------------
/extlinks/common/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/common/management/__init__.py


--------------------------------------------------------------------------------
/extlinks/common/management/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from django.core.management.base import BaseCommand as DjangoBaseCommand
 2 | from filelock import FileLock
 3 | import inspect
 4 | from os import remove
 5 | from os.path import basename
 6 | 
 7 | 
 8 | class BaseCommand(DjangoBaseCommand):
 9 |     """
10 |     Django BaseCommand wrapper that adds file locks to management commands
11 |     """
12 | 
13 |     def handle(self, *args, **options):
14 |         lockname = basename(inspect.getfile(self.__class__))
15 |         # Use a lockfile to prevent overruns.
16 |         lockfile = "/tmp/{}.lock".format(lockname)
17 |         lock = FileLock(lockfile)
18 |         lock.acquire()
19 |         try:
20 |             self._handle(*args, **options)
21 |         finally:
22 |             lock.release()
23 |             remove(lockfile)
24 | 


--------------------------------------------------------------------------------
/extlinks/common/management/commands/import_twl_data.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from . import BaseCommand
 4 | from extlinks.links.models import URLPattern
 5 | from extlinks.organisations.models import Organisation, Collection
 6 | from extlinks.programs.models import Program
 7 | 
 8 | 
 9 | class Command(BaseCommand):
10 |     help = """
11 |            Imports Programs, Orgs, Collections, and URLPatterns from The Wikipedia
12 |            Library's old metrics collection system"""
13 | 
14 |     def add_arguments(self, parser):
15 |         parser.add_argument("file_path", nargs="+", type=str)
16 | 
17 |     def _handle(self, *args, **options):
18 |         file_path = options["file_path"][0]
19 | 
20 |         # Check TWL program exists, if it doesn't, create it.
21 |         try:
22 |             twl_program = Program.objects.get(name="The Wikipedia Library")
23 |         except Program.DoesNotExist:
24 |             twl_program = Program(name="The Wikipedia Library")
25 |             twl_program.save()
26 | 
27 |         with open(file_path, "r") as input_file:
28 |             csv_reader = csv.reader(input_file)
29 |             next(csv_reader)
30 |             for row in csv_reader:
31 |                 organisation = row[0]
32 |                 collection = row[1]
33 |                 urlpattern = row[2]
34 |                 twl_link = row[3]
35 |                 print(row)
36 | 
37 |                 # Create Organisation
38 |                 try:
39 |                     organisation_object = Organisation.objects.get(name=organisation)
40 |                 except Organisation.DoesNotExist:
41 |                     organisation_object = Organisation(name=organisation)
42 |                     organisation_object.save()
43 |                     if twl_link == "x":
44 |                         organisation_object.program.add(twl_program)
45 | 
46 |                 # Create Collection
47 |                 try:
48 |                     collection_object = Collection.objects.get(
49 |                         organisation=organisation_object,
50 |                         name=collection,
51 |                     )
52 |                 except Collection.DoesNotExist:
53 |                     collection_object = Collection(
54 |                         name=collection, organisation=organisation_object
55 |                     )
56 |                     collection_object.save()
57 | 
58 |                 # Create URLPattern
59 |                 # We shouldn't have any duplicates here but let's be safe.
60 |                 try:
61 |                     urlpattern_object = URLPattern.objects.get(url=urlpattern)
62 |                 except URLPattern.DoesNotExist:
63 |                     urlpattern_object = URLPattern(
64 |                         url=urlpattern, collection=collection_object
65 |                     )
66 |                     urlpattern_object.save()
67 | 


--------------------------------------------------------------------------------
/extlinks/common/swift.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import logging
  3 | import os
  4 | 
  5 | from typing import Iterable, List, Tuple, cast
  6 | 
  7 | import swiftclient
  8 | import keystoneauth1.identity.v3 as identity
  9 | import keystoneauth1.session as session
 10 | 
 11 | logger = logging.getLogger("django")
 12 | 
 13 | 
 14 | def swift_connection() -> swiftclient.Connection:
 15 |     """
 16 |     Creates a swiftclient Connection configured using environment variables.
 17 | 
 18 |     This method works with v3 application credentials authentication only.
 19 | 
 20 |     Returns
 21 |     -------
 22 |     swiftclient.Connection
 23 |         A connection to the Swift object storage.
 24 |     """
 25 | 
 26 |     auth_url = os.environ.get("OPENSTACK_AUTH_URL")
 27 |     credential_id = os.environ.get("SWIFT_APPLICATION_CREDENTIAL_ID")
 28 |     credential_secret = os.environ.get("SWIFT_APPLICATION_CREDENTIAL_SECRET")
 29 | 
 30 |     if not auth_url or not credential_id or not credential_secret:
 31 |         raise RuntimeError(
 32 |             "The 'OPENSTACK_AUTH_URL', 'SWIFT_APPLICATION_CREDENTIAL_ID' and "
 33 |             "'SWIFT_APPLICATION_CREDENTIAL_SECRET' environment variables must "
 34 |             "be defined to use the Swift client"
 35 |         )
 36 | 
 37 |     return swiftclient.Connection(
 38 |         session=session.Session(
 39 |             auth=identity.ApplicationCredential(
 40 |                 auth_url=auth_url,
 41 |                 application_credential_id=credential_id,
 42 |                 application_credential_secret=credential_secret,
 43 |                 user_domain_id="default",
 44 |             )
 45 |         )
 46 |     )
 47 | 
 48 | 
 49 | def get_containers(conn: swiftclient.Connection) -> List[dict]:
 50 |     """
 51 |     Retrieves a list of containers from object storage.
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     conn : swiftclient.Connection
 56 |         A connection to the Swift object storage.
 57 | 
 58 |     Returns
 59 |     -------
 60 |     List[dict]
 61 |         A list of dictionaries containing information about each container.
 62 |     """
 63 | 
 64 |     response = conn.get_account()
 65 |     if not response or len(response) < 2:
 66 |         raise RuntimeError("Failed to retrieve container list from Swift account.")
 67 | 
 68 |     return response[1]
 69 | 
 70 | 
 71 | def ensure_container_exists(conn: swiftclient.Connection, container: str) -> bool:
 72 |     """
 73 |     Creates a new container in object storage if it doesn't already exist.
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     conn : swiftclient.Connection
 78 |         A connection to the Swift object storage.
 79 | 
 80 |     container : str
 81 |         The name of the container to create.
 82 | 
 83 |     Returns
 84 |     -------
 85 |     bool
 86 |         True if the container was created, False if it already existed.
 87 |     """
 88 | 
 89 |     containers = (c["name"] for c in get_containers(conn))
 90 |     if container not in containers:
 91 |         conn.put_container(container)
 92 |         return True
 93 | 
 94 |     return False
 95 | 
 96 | 
 97 | def upload_file(
 98 |     conn: swiftclient.Connection,
 99 |     container: str,
100 |     path: str,
101 |     content_type="application/octet-stream",
102 | ):
103 |     """
104 |     Uploads a file on the local filesystem to the provided Swift container.
105 | 
106 |     Parameters
107 |     ----------
108 |     conn : swiftclient.Connection
109 |         A connection to the Swift object storage.
110 | 
111 |     container : str
112 |         The name of the container to upload the file to.
113 | 
114 |     path : str
115 |         The path to the file on the local filesystem.
116 | 
117 |     content_type : str
118 |         The content type of the file.
119 | 
120 |     Returns
121 |     -------
122 |     str
123 |         The name of the object in Swift.
124 |     """
125 | 
126 |     object_name = os.path.basename(path)
127 | 
128 |     with open(path, "rb") as f:
129 |         conn.put_object(
130 |             container,
131 |             object_name,
132 |             contents=f,
133 |             content_type=content_type,
134 |         )
135 | 
136 |     return object_name
137 | 
138 | 
139 | def download_file(conn: swiftclient.Connection, container: str, object: str) -> bytes:
140 |     """
141 |     Downloads a file from object storage.
142 | 
143 |     Parameters
144 |     ----------
145 |     conn : swiftclient.Connection
146 |         A connection to the Swift object storage.
147 | 
148 |     container : str
149 |         The name of the container to download the file from.
150 | 
151 |     object : str
152 |         The name of the object to download.
153 | 
154 |     Returns
155 |     -------
156 |     bytes
157 |         The contents of the object.
158 |     """
159 | 
160 |     _, response = conn.get_object(container, object)
161 | 
162 |     return cast(bytes, response)
163 | 
164 | 
165 | def file_exists(conn: swiftclient.Connection, container: str, object: str) -> bool:
166 |     """
167 |     Checks if a file exists in Swift.
168 | 
169 |     Parameters
170 |     ----------
171 |     conn : swiftclient.Connection
172 |         A connection to the Swift object storage.
173 | 
174 |     container : str
175 |         The name of the container to check.
176 | 
177 |     object : str
178 |         The name of the object to check.
179 | 
180 |     Returns
181 |     -------
182 |     bool
183 |         True if the file exists, False if it doesn't.
184 |     """
185 | 
186 |     try:
187 |         conn.head_object(container, object)
188 |         return True
189 |     except swiftclient.ClientException as e:
190 |         if e.http_status == 404:
191 |             return False
192 |         else:
193 |             raise e
194 | 
195 | 
196 | def batch_upload_files(
197 |     conn: swiftclient.Connection,
198 |     container: str,
199 |     files: Iterable[str],
200 |     max_workers=10,
201 | ) -> Tuple[List[str], List[str]]:
202 |     """
203 |     Uploads a batch of multiple files to the given Swift container.
204 | 
205 |     Parameters
206 |     ----------
207 |     conn : swiftclient.Connection
208 |         A connection to the Swift object storage.
209 | 
210 |     container : str
211 |         The name of the container to upload the files to.
212 | 
213 |     files : Iterable[str]
214 |         An iterable of file paths to upload.
215 | 
216 |     max_workers : int
217 |         The maximum number of concurrent uploads to perform.
218 | 
219 |     Returns
220 |     -------
221 |     Tuple[List[str], List[str]]
222 |         A tuple containing two lists. The first list contains the names of the
223 |         files that were successfully uploaded. The second list contains the
224 |         names of the files that failed to upload.
225 |     """
226 | 
227 |     successful = []
228 |     failed = []
229 | 
230 |     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
231 |         futures = {executor.submit(upload_file, conn, container, f): f for f in files}
232 | 
233 |         for future in concurrent.futures.as_completed(futures):
234 |             path = futures[future]
235 | 
236 |             try:
237 |                 object_name = future.result()
238 |                 logger.info(f"Successfully uploaded '%s' as '%s'", path, object_name)
239 |                 successful.append(path)
240 |             except Exception as exc:
241 |                 logger.error(f"Upload failed for '%s': %s", path, exc)
242 |                 failed.append(path)
243 | 
244 |     return successful, failed
245 | 


--------------------------------------------------------------------------------
/extlinks/common/templates/common/statistics_table.html:
--------------------------------------------------------------------------------
 1 | <h4>Statistics</h4>
 2 | {% if collections %}
 3 |   <table style="width:90%; margin-top: 30px;">
 4 |       <tr class="stat-table-header">
 5 |           <td>Link events</td>
 6 |       </tr>
 7 |       <tr>
 8 |           <td>Total added:</td>
 9 |           <td id="{{ collection.collection_id }}-links-added" style="text-align: right;"></td>
10 |       </tr>
11 |       <tr>
12 |           <td>Total removed:</td>
13 |           <td id="{{ collection.collection_id }}-links-removed" style="text-align: right;"></td>
14 |       </tr>
15 |       <tr style="line-height:55px;">
16 |           <td></td>
17 |           <td id="{{ collection.collection_id }}-links-diff"></td>
18 |       </tr>
19 |       {% if collection.linksearch_total_start %}
20 |         <tr class="stat-table-header">
21 |           <td>Total links (Wikipedia only)</td>
22 |         </tr>
23 |         <tr>
24 |           <td>{{ collection.linksearch_start_date }}:</td>
25 |           <td style="text-align: right;">{{ collection.linksearch_total_start }}</td>
26 |         </tr>
27 |         <tr>
28 |           <td>Current total:</td>
29 |           <td style="text-align: right;">{{ collection.linksearch_total_current }}</td>
30 |         </tr>
31 |         <tr style="line-height:55px;">
32 |           <td></td>
33 |           <td style="text-align: right; font-weight: bold;
34 |               {% if collection.linksearch_total_diff > 0 %}
35 |                       color: green;">+
36 |               {% else %}
37 |                       color: red;">
38 |               {% endif %}
39 |               {{ collection.linksearch_total_diff }}
40 |           </td>
41 |         </tr>
42 |       {% endif %}
43 |       <tr class="stat-table-header">
44 |         <td>Other</td>
45 |       </tr>
46 |       <tr>
47 |         <td>Total editors:</td>
48 |         <td id="{{ collection.collection_id }}-total-editors" style="text-align: right;"></td>
49 |       </tr>
50 |       <tr>
51 |         <td>Total projects:</td>
52 |         <td id="{{ collection.collection_id }}-total-projects" style="text-align: right;"></td>
53 |       </tr>
54 |   </table>
55 | {% else %}
56 |   <table style="width:90%; margin-top: 30px;">
57 |       <tr class="stat-table-header">
58 |         <td>Link events</td>
59 |       </tr>
60 |       <tr>
61 |         <td>Total added:</td>
62 |         <td id="links-added" style="text-align: right;"></td>
63 |       </tr>
64 |       <tr>
65 |         <td>Total removed:</td>
66 |         <td id="links-removed" style="text-align: right;"></td>
67 |       </tr>
68 |       <tr style="line-height:55px;">
69 |         <td></td>
70 |         <td id="links-diff" style="text-align: right; font-weight: bold;"></td>
71 |       </tr>
72 |       <tr class="stat-table-header">
73 |         <td>Other</td>
74 |       </tr>
75 |       <tr>
76 |         <td>Total editors:</td>
77 |         <td id="total-editors" style="text-align: right;"></td>
78 |       </tr>
79 |       <tr>
80 |         <td>Total projects:</td>
81 |         <td id="total-projects" style="text-align: right;"></td>
82 |       </tr>
83 |   </table>
84 | {% endif %}
85 | 


--------------------------------------------------------------------------------
/extlinks/common/templates/common/top_organisations_table.html:
--------------------------------------------------------------------------------
1 | {% load common_filters %}
2 | <table id="top-organisations-table" class="table">
3 |   <tr>
4 |     <th>Organisation</th>
5 |     <th>Added Links</th>
6 |   </tr>
7 | </table>
8 | 


--------------------------------------------------------------------------------
/extlinks/common/templatetags/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/common/templatetags/__init__.py


--------------------------------------------------------------------------------
/extlinks/common/templatetags/common_filters.py:
--------------------------------------------------------------------------------
 1 | from django import template
 2 | from django.template.defaultfilters import stringfilter
 3 | 
 4 | register = template.Library()
 5 | 
 6 | 
 7 | @register.filter
 8 | @stringfilter
 9 | def replace_underscores(string):
10 |     return string.replace("_", " ")
11 | 


--------------------------------------------------------------------------------
/extlinks/common/tests.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | 
  5 | from datetime import date, datetime, timezone
  6 | from unittest import mock
  7 | 
  8 | import swiftclient
  9 | import time_machine
 10 | 
 11 | from django.test import TestCase
 12 | 
 13 | import extlinks.common.swift as swift
 14 | 
 15 | from extlinks.common.forms import FilterForm
 16 | from extlinks.common.helpers import get_linksearchtotal_data_by_time
 17 | from extlinks.links.factories import LinkSearchTotalFactory, URLPatternFactory
 18 | from extlinks.links.models import LinkSearchTotal
 19 | 
 20 | SWIFT_TEST_CREDENTIALS = {
 21 |     "OPENSTACK_AUTH_URL": "fakeauthurl",
 22 |     "SWIFT_APPLICATION_CREDENTIAL_ID": "fakecredentialid",
 23 |     "SWIFT_APPLICATION_CREDENTIAL_SECRET": "fakecredentialsecret",
 24 | }
 25 | 
 26 | 
 27 | class LinkSearchDataByTimeTest(TestCase):
 28 |     def setUp(self):
 29 |         url = URLPatternFactory(url="www.acme.org")
 30 |         # Adding LinkSearch data
 31 |         LinkSearchTotalFactory(url=url, date=datetime(2020, 1, 15, tzinfo=timezone.utc))
 32 |         LinkSearchTotalFactory(url=url, date=datetime(2020, 2, 1, tzinfo=timezone.utc))
 33 |         LinkSearchTotalFactory(url=url, date=datetime(2020, 2, 2, tzinfo=timezone.utc))
 34 |         LinkSearchTotalFactory(url=url, date=datetime(2020, 2, 18, tzinfo=timezone.utc))
 35 |         LinkSearchTotalFactory(url=url, date=datetime(2020, 3, 6, tzinfo=timezone.utc))
 36 |         LinkSearchTotalFactory(
 37 |             url=url, date=datetime(2020, 4, 16, tzinfo=timezone.utc), total=0
 38 |         )
 39 | 
 40 |     def test_linksearch_data_empty_queryset(self):
 41 |         linksearch_queryset = None
 42 | 
 43 |         dates, linksearch_data = get_linksearchtotal_data_by_time(linksearch_queryset)
 44 | 
 45 |         self.assertEqual(0, len(dates))
 46 |         self.assertEqual(0, len(linksearch_data))
 47 | 
 48 |     def test_linksearch_data(self):
 49 |         with time_machine.travel(date(2020, 12, 31)):
 50 |             linksearch = LinkSearchTotal.objects.all()
 51 | 
 52 |             dates, linksearch_data = get_linksearchtotal_data_by_time(linksearch)
 53 | 
 54 |             self.assertEqual(12, len(dates))
 55 |             self.assertEqual(12, len(linksearch_data))
 56 | 
 57 | 
 58 | class FilterFormTest(TestCase):
 59 | 
 60 |     def test_valid_data(self):
 61 |         form = FilterForm(
 62 |             data={
 63 |                 "start_date": "2025-02-15",
 64 |                 "end_date": "2025-06-21",
 65 |                 "limit_to_user_list": "on",
 66 |                 "namespace_id": "10",
 67 |                 # "exclude_bots": "on", # omit this key to assertFalse
 68 |             }
 69 |         )
 70 |         self.assertTrue(form.is_valid())
 71 |         self.assertEqual(form.cleaned_data["start_date"], date(2025, 2, 1))
 72 |         self.assertEqual(
 73 |             form.cleaned_data["end_date"], date(2025, 6, 30)
 74 |         )  # Should return last day of month
 75 |         self.assertTrue(form.cleaned_data["limit_to_user_list"])
 76 |         self.assertEqual(form.cleaned_data["namespace_id"], 10)
 77 |         self.assertFalse(form.cleaned_data["exclude_bots"])
 78 | 
 79 |     def test_empty_data(self):
 80 |         form = FilterForm(data={})
 81 |         self.assertTrue(form.is_valid())
 82 |         self.assertIsNone(form.cleaned_data["start_date"])
 83 |         self.assertIsNone(form.cleaned_data["end_date"])
 84 |         self.assertFalse(form.cleaned_data["limit_to_user_list"])
 85 |         self.assertIsNone(form.cleaned_data["namespace_id"])
 86 |         self.assertFalse(form.cleaned_data["exclude_bots"])
 87 | 
 88 |     def test_invalid_start_date(self):
 89 |         form = FilterForm(data={"start_date": "2025/02/01"})
 90 |         self.assertFalse(form.is_valid())
 91 |         self.assertIn("start_date", form.errors)
 92 | 
 93 |     def test_clean_start_date(self):
 94 |         form = FilterForm(data={"start_date": "2023-11-11"})
 95 |         self.assertTrue(form.is_valid())
 96 |         self.assertEqual(form.cleaned_data["start_date"], date(2023, 11, 1))
 97 | 
 98 |     def test_invalid_end_date(self):
 99 |         form = FilterForm(data={"end_date": "abcd-12-ef"})
100 |         self.assertFalse(form.is_valid())
101 |         self.assertIn("end_date", form.errors)
102 | 
103 |     def test_clean_end_date(self):
104 |         form = FilterForm(data={"end_date": "2023-11-01"})
105 |         self.assertTrue(form.is_valid())
106 |         self.assertEqual(form.cleaned_data["end_date"], date(2023, 11, 30))
107 | 
108 |     def test_clean_end_date_leap_year(self):
109 |         form = FilterForm(data={"end_date": "2024-02-01"})
110 |         self.assertTrue(form.is_valid())
111 |         self.assertEqual(form.cleaned_data["end_date"], date(2024, 2, 29))
112 | 
113 |     def test_clean_end_date_feb_non_leap_year(self):
114 |         form = FilterForm(data={"end_date": "2025-02-01"})
115 |         self.assertTrue(form.is_valid())
116 |         self.assertEqual(form.cleaned_data["end_date"], date(2025, 2, 28))
117 | 
118 | 
119 | class SwiftConnectionTest(TestCase):
120 |     @mock.patch.dict(os.environ, SWIFT_TEST_CREDENTIALS, clear=True)
121 |     def test_swift_connection(self):
122 |         conn = swift.swift_connection()
123 |         self.assertIsInstance(conn, swiftclient.Connection)
124 | 
125 |     @mock.patch.dict(os.environ, {}, clear=True)
126 |     def test_swift_connection_validation(self):
127 |         with self.assertRaises(RuntimeError):
128 |             swift.swift_connection()
129 | 
130 | 
131 | class SwiftUploadTest(TestCase):
132 |     def setUp(self):
133 |         self.tmpdir = os.path.join(tempfile.gettempdir(), "SwiftUploadTest")
134 |         os.mkdir(self.tmpdir)
135 | 
136 |     def tearDown(self):
137 |         shutil.rmtree(self.tmpdir)
138 | 
139 |     def write_file(self, path: str, contents="placeholder"):
140 |         """
141 |         Writes a text file to the temporary directory.
142 |         """
143 | 
144 |         full_path = os.path.join(self.tmpdir, path)
145 |         with open(full_path, "w") as f:
146 |             f.write(contents)
147 | 
148 |         return full_path
149 | 
150 |     @mock.patch("swiftclient.Connection")
151 |     @mock.patch.dict(os.environ, SWIFT_TEST_CREDENTIALS, clear=True)
152 |     def test_swift_upload(self, mock_swift_connection):
153 |         """
154 |         Test that we can upload a file to Swift using the helper methods.
155 |         """
156 | 
157 |         mock_conn = mock_swift_connection.return_value
158 |         mock_conn.put_object.return_value = ""
159 | 
160 |         swift.upload_file(
161 |             swift.swift_connection(), "fakecontainer", self.write_file("file.txt")
162 |         )
163 |         mock_conn.put_object.assert_called_once_with(
164 |             "fakecontainer", "file.txt", contents=mock.ANY, content_type=mock.ANY
165 |         )
166 | 
167 |     @mock.patch("swiftclient.Connection")
168 |     @mock.patch.dict(os.environ, SWIFT_TEST_CREDENTIALS, clear=True)
169 |     def test_swift_batch_upload(self, mock_swift_connection):
170 |         """
171 |         Test that we can upload a batch of files to Swift.
172 |         """
173 | 
174 |         mock_conn = mock_swift_connection.return_value
175 |         mock_conn.put_object.return_value = ""
176 | 
177 |         files = ["file1.txt", "file2.txt", "file3.txt", "file4.txt", "file5.txt"]
178 |         for file in files:
179 |             self.write_file(os.path.join(self.tmpdir, file))
180 | 
181 |         conn = swift.swift_connection()
182 |         swift.batch_upload_files(
183 |             conn,
184 |             "fakecontainer",
185 |             (os.path.join(self.tmpdir, file) for file in files),
186 |             max_workers=3,
187 |         )
188 |         mock_conn.put_object.assert_has_calls(
189 |             (
190 |                 mock.call(
191 |                     "fakecontainer",
192 |                     file,
193 |                     contents=mock.ANY,
194 |                     content_type=mock.ANY,
195 |                 )
196 |                 for file in files
197 |             ),
198 |             any_order=True,
199 |         )
200 | 


--------------------------------------------------------------------------------
/extlinks/common/urls.py:
--------------------------------------------------------------------------------
 1 | from django.urls import path
 2 | 
 3 | from extlinks.common.views import (
 4 |     CSVProjectTotals,
 5 |     CSVUserTotals,
 6 | )
 7 | 
 8 | # Shared URL paths. These get namespaced by each app's urlpatterns.
 9 | urlpatterns = [
10 |     path(
11 |         "<int:pk>/csv/project_totals",
12 |         CSVProjectTotals.as_view(),
13 |         name="csv_project_totals",
14 |     ),
15 |     path("<int:pk>/csv/user_totals", CSVUserTotals.as_view(), name="csv_user_totals"),
16 | ]
17 | 


--------------------------------------------------------------------------------
/extlinks/common/views.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | 
  3 | from django.contrib.contenttypes.models import ContentType
  4 | from django.db.models import Q, Sum
  5 | from django.http import HttpResponse
  6 | from django.views.generic import View
  7 | 
  8 | from extlinks.aggregates.models import (
  9 |     LinkAggregate,
 10 |     PageProjectAggregate,
 11 |     UserAggregate,
 12 | )
 13 | from extlinks.common.helpers import build_queryset_filters
 14 | from extlinks.links.models import URLPattern, LinkEvent
 15 | from extlinks.organisations.models import Collection, Organisation
 16 | from extlinks.programs.models import Program
 17 | 
 18 | 
 19 | # CSV views borrowed from
 20 | # https://github.com/WikipediaLibrary/TWLight/blob/master/TWLight/graphs/views.py
 21 | # These views are a little hacky in how they determine whether we need the CSV
 22 | # for an organisation or partner page, but this seems to work.
 23 | class _CSVDownloadView(View):
 24 |     """
 25 |     Base view powering CSV downloads. Not intended to be used directly.
 26 |     URLs should point at subclasses of this view. Subclasses should implement a
 27 |     _write_data() method.
 28 |     """
 29 | 
 30 |     def get(self, request, *args, **kwargs):
 31 |         # Create the HttpResponse object with the appropriate CSV header.
 32 |         response = HttpResponse(content_type="text/csv")
 33 |         response["Content-Disposition"] = 'attachment; filename="data.csv"'
 34 | 
 35 |         self._write_data(response)
 36 | 
 37 |         return response
 38 | 
 39 |     def _write_data(self, response):
 40 |         raise NotImplementedError
 41 | 
 42 | 
 43 | class CSVOrgTotals(_CSVDownloadView):
 44 |     def _write_data(self, response):
 45 |         program_pk = self.kwargs["pk"]
 46 |         queryset_filter = _get_queryset_filter(
 47 |             program_pk, self.request.build_absolute_uri(), self.request.GET
 48 |         )
 49 | 
 50 |         top_orgs = (
 51 |             LinkAggregate.objects.filter(queryset_filter)
 52 |             .values("organisation__pk", "organisation__name")
 53 |             .annotate(
 54 |                 links_added=Sum("total_links_added"),
 55 |                 links_removed=Sum("total_links_removed"),
 56 |                 links_diff=Sum("total_links_added") - Sum("total_links_removed"),
 57 |             )
 58 |             .order_by("-links_diff", "-links_added", "-links_removed")
 59 |         )
 60 | 
 61 |         writer = csv.writer(response)
 62 | 
 63 |         writer.writerow(["Organisation", "Links added", "Links removed", "Net Change"])
 64 | 
 65 |         for org in top_orgs:
 66 |             writer.writerow(
 67 |                 [
 68 |                     org["organisation__name"],
 69 |                     org["links_added"],
 70 |                     org["links_removed"],
 71 |                     org["links_diff"],
 72 |                 ]
 73 |             )
 74 | 
 75 | 
 76 | class CSVPageTotals(_CSVDownloadView):
 77 |     def _write_data(self, response):
 78 |         pk = self.kwargs["pk"]
 79 |         queryset_filter = _get_queryset_filter(
 80 |             pk, self.request.build_absolute_uri(), self.request.GET
 81 |         )
 82 | 
 83 |         top_pages = (
 84 |             PageProjectAggregate.objects.filter(queryset_filter)
 85 |             .values("project_name", "page_name")
 86 |             .annotate(
 87 |                 links_added=Sum("total_links_added"),
 88 |                 links_removed=Sum("total_links_removed"),
 89 |                 links_diff=Sum("total_links_added") - Sum("total_links_removed"),
 90 |             )
 91 |             .order_by("-links_diff", "-links_added", "-links_removed")
 92 |         )
 93 |         writer = csv.writer(response)
 94 | 
 95 |         writer.writerow(
 96 |             ["Page title", "Project", "Links added", "Links removed", "Net Change"]
 97 |         )
 98 | 
 99 |         for page in top_pages:
100 |             writer.writerow(
101 |                 [
102 |                     page["page_name"],
103 |                     page["project_name"],
104 |                     page["links_added"],
105 |                     page["links_removed"],
106 |                     page["links_diff"],
107 |                 ]
108 |             )
109 | 
110 | 
111 | class CSVProjectTotals(_CSVDownloadView):
112 |     def _write_data(self, response):
113 |         pk = self.kwargs["pk"]
114 |         queryset_filter = _get_queryset_filter(
115 |             pk, self.request.build_absolute_uri(), self.request.GET
116 |         )
117 | 
118 |         top_projects = (
119 |             PageProjectAggregate.objects.filter(queryset_filter)
120 |             .values("project_name")
121 |             .annotate(
122 |                 links_added=Sum("total_links_added"),
123 |                 links_removed=Sum("total_links_removed"),
124 |                 links_diff=Sum("total_links_added") - Sum("total_links_removed"),
125 |             )
126 |             .order_by("-links_diff", "-links_added", "-links_removed")
127 |         )
128 |         writer = csv.writer(response)
129 | 
130 |         writer.writerow(["Project", "Links added", "Links removed", "Net Change"])
131 | 
132 |         for project in top_projects:
133 |             writer.writerow(
134 |                 [
135 |                     project["project_name"],
136 |                     project["links_added"],
137 |                     project["links_removed"],
138 |                     project["links_diff"],
139 |                 ]
140 |             )
141 | 
142 | 
143 | class CSVUserTotals(_CSVDownloadView):
144 |     def _write_data(self, response):
145 |         pk = self.kwargs["pk"]
146 |         queryset_filter = _get_queryset_filter(
147 |             pk, self.request.build_absolute_uri(), self.request.GET
148 |         )
149 | 
150 |         top_users = (
151 |             UserAggregate.objects.filter(queryset_filter)
152 |             .values("username")
153 |             .annotate(
154 |                 links_added=Sum("total_links_added"),
155 |                 links_removed=Sum("total_links_removed"),
156 |                 links_diff=Sum("total_links_added") - Sum("total_links_removed"),
157 |             )
158 |             .order_by("-links_diff", "-links_added", "-links_removed")
159 |         )
160 |         writer = csv.writer(response)
161 | 
162 |         writer.writerow(["Username", "Links added", "Links removed", "Net Change"])
163 | 
164 |         for user in top_users:
165 |             writer.writerow(
166 |                 [
167 |                     user["username"],
168 |                     user["links_added"],
169 |                     user["links_removed"],
170 |                     user["links_diff"],
171 |                 ]
172 |             )
173 | 
174 | 
175 | def _get_queryset_filter(pk, uri, filters):
176 |     """
177 |     This function returns a Q object with filters depending on which URL a user
178 |     is requesting information from
179 | 
180 |     Parameters
181 |     ----------
182 |     pk: int
183 |         The primary key of a collection or a program, depending on the origin of
184 |         the request
185 | 
186 |     uri: str
187 |         The origin URL from the request. If the URL is from the organisations view,
188 |         then we will obtain the collection. Otherwise, if the URL is from the
189 |         programs view, we will obtain the organisations associated to that program
190 | 
191 |     filters: dict
192 |         The filters (if there are any) that were passed in the request
193 | 
194 |     Returns
195 |     -------
196 |     Q : A Q object which will filter the aggregates queries
197 |     """
198 |     # If we came from an organisation page, then we are passing the collection id
199 |     if "/organisations" in uri:
200 |         collection = Collection.objects.get(pk=pk)
201 |         queryset_filter = build_queryset_filters(filters, {"collection": collection})
202 |     else:
203 |         program = Program.objects.prefetch_related("organisation_set").get(pk=pk)
204 |         queryset_filter = build_queryset_filters(
205 |             filters, {"organisations": program.organisation_set.all()}
206 |         )
207 | 
208 |     return queryset_filter
209 | 


--------------------------------------------------------------------------------
/extlinks/healthcheck/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/healthcheck/__init__.py


--------------------------------------------------------------------------------
/extlinks/healthcheck/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | 
3 | # Register your models here.
4 | 


--------------------------------------------------------------------------------
/extlinks/healthcheck/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/healthcheck/migrations/__init__.py


--------------------------------------------------------------------------------
/extlinks/healthcheck/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | 
3 | # Create your models here.
4 | 


--------------------------------------------------------------------------------
/extlinks/healthcheck/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | 
3 | # Create your tests here.
4 | 


--------------------------------------------------------------------------------
/extlinks/healthcheck/urls.py:
--------------------------------------------------------------------------------
 1 | from django.urls import path
 2 | 
 3 | from .views import (
 4 |     AggregatesCronHealthCheckView,
 5 |     CommonCronHealthCheckView,
 6 |     LinksCronHealthCheckView,
 7 |     OrganizationsCronHealthCheckView,
 8 |     LinkEventHealthCheckView,
 9 |     MonthlyAggregatesCronHealthCheckView,
10 | )
11 | 
12 | urlpatterns = [
13 |     path("link_event", LinkEventHealthCheckView.as_view(), name="link_event"),
14 |     path("agg_crons", AggregatesCronHealthCheckView.as_view(), name="agg_crons"),
15 |     path("common_crons", CommonCronHealthCheckView.as_view(), name="common_crons"),
16 |     path("link_crons", LinksCronHealthCheckView.as_view(), name="link_crons"),
17 |     path("org_crons", OrganizationsCronHealthCheckView.as_view(), name="org_crons"),
18 |     path("month_agg_crons", MonthlyAggregatesCronHealthCheckView.as_view(), name="month_agg_crons"),
19 | ]
20 | 


--------------------------------------------------------------------------------
/extlinks/healthcheck/views.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | import os
  3 | from django.http import JsonResponse
  4 | from django.views import View
  5 | from django.utils.decorators import method_decorator
  6 | from django.utils.timezone import now
  7 | from django.views.decorators.cache import cache_page
  8 | from extlinks.aggregates.models import (
  9 |     LinkAggregate,
 10 |     UserAggregate,
 11 |     PageProjectAggregate,
 12 | )
 13 | from extlinks.links.models import LinkEvent, LinkSearchTotal
 14 | from extlinks.organisations.models import Organisation
 15 | 
 16 | 
 17 | @method_decorator(cache_page(60 * 1), name="dispatch")
 18 | class LinkEventHealthCheckView(View):
 19 |     """
 20 |     Healthcheck that passes only if the latest link event is less than a day old
 21 |     """
 22 | 
 23 |     def get(self, request, *args, **kwargs):
 24 |         status_code = 500
 25 |         status_msg = "error"
 26 |         try:
 27 |             latest_linkevent_datetime = LinkEvent.objects.all().latest().timestamp
 28 |             cutoff_datetime = now() - timedelta(days=1)
 29 |             if latest_linkevent_datetime > cutoff_datetime:
 30 |                 status_code = 200
 31 |                 status_msg = "ok"
 32 |             else:
 33 |                 status_msg = "out of date"
 34 |         except LinkEvent.DoesNotExist:
 35 |             status_code = 404
 36 |             status_msg = "not found"
 37 |         response = JsonResponse({"status": status_msg})
 38 |         response.status_code = status_code
 39 |         return response
 40 | 
 41 | 
 42 | @method_decorator(cache_page(60 * 1), name="dispatch")
 43 | class AggregatesCronHealthCheckView(View):
 44 |     """
 45 |     Healthcheck that passes only if the link aggregate jobs have all run successfully in the last 2 days
 46 |     """
 47 | 
 48 |     def get_most_recent(self, aggregate, monthly=False):
 49 |         try:
 50 |             if monthly:
 51 |                 aggregate.objects.filter(day=0).latest("full_date").full_date
 52 |             else:
 53 |                 aggregate.objects.exclude(day=0).latest("full_date").full_date
 54 |         except aggregate.DoesNotExist:
 55 |             pass
 56 | 
 57 |     def get(self, request, *args, **kwargs):
 58 |         status_code = 500
 59 |         status_msg = "error"
 60 |         try:
 61 |             latest_link_aggregates_cron_endtime = self.get_most_recent(LinkAggregate)
 62 |             latest_user_aggregates_cron_endtime = self.get_most_recent(UserAggregate)
 63 |             latest_pageproject_aggregates_cron_endtime = self.get_most_recent(
 64 |                 PageProjectAggregate
 65 |             )
 66 |             cutoff_datetime = now() - timedelta(days=2)
 67 |             if latest_link_aggregates_cron_endtime < cutoff_datetime:
 68 |                 status_msg = "out of date"
 69 |             elif latest_user_aggregates_cron_endtime < cutoff_datetime:
 70 |                 status_msg = "out of date"
 71 |             elif latest_pageproject_aggregates_cron_endtime < cutoff_datetime:
 72 |                 status_msg = "out of date"
 73 |             else:
 74 |                 status_code = 200
 75 |                 status_msg = "ok"
 76 |         except:
 77 |             status_code = 404
 78 |             status_msg = "not found"
 79 |         response = JsonResponse({"status": status_msg})
 80 |         response.status_code = status_code
 81 |         return response
 82 | 
 83 | 
 84 | @method_decorator(cache_page(60 * 1), name="dispatch")
 85 | class MonthlyAggregatesCronHealthCheckView(View):
 86 |     """
 87 |     Healthcheck that passes only if the monthly aggregate jobs have all run successfully in the last month
 88 |     """
 89 | 
 90 |     def get(self, request, *args, **kwargs):
 91 |         status_code = 500
 92 |         status_msg = "error"
 93 |         try:
 94 |             latest_link_aggregates_cron_endtime = self.get_most_recent(
 95 |                 LinkAggregate, True
 96 |             )
 97 |             latest_user_aggregates_cron_endtime = self.get_most_recent(
 98 |                 UserAggregate, True
 99 |             )
100 |             latest_pageproject_aggregates_cron_endtime = self.get_most_recent(
101 |                 PageProjectAggregate, True
102 |             )
103 |             # Monthly jobs may take some time to run, let's give 35 days to make sure
104 |             cutoff_datetime = now() - timedelta(days=35)
105 |             if latest_link_aggregates_cron_endtime < cutoff_datetime:
106 |                 status_msg = "out of date"
107 |             elif latest_user_aggregates_cron_endtime < cutoff_datetime:
108 |                 status_msg = "out of date"
109 |             elif latest_pageproject_aggregates_cron_endtime < cutoff_datetime:
110 |                 status_msg = "out of date"
111 |             else:
112 |                 status_code = 200
113 |                 status_msg = "ok"
114 |         except:
115 |             status_code = 404
116 |             status_msg = "not found"
117 |         response = JsonResponse({"status": status_msg})
118 |         response.status_code = status_code
119 |         return response
120 | 
121 | 
122 | @method_decorator(cache_page(60 * 1), name="dispatch")
123 | class CommonCronHealthCheckView(View):
124 |     """
125 |     Healthcheck that passes only if a backup file has been created in the last 3 days
126 |     """
127 | 
128 |     def get(self, request, *args, **kwargs):
129 |         status_code = 500
130 |         status_msg = "error"
131 |         status_msg = "out of date"
132 |         for i in range(3):
133 |             date = now() - timedelta(days=i)
134 |             filename = "links_linkevent_{}.json.gz".format(date.strftime("%Y%m%d"))
135 |             file = os.path.join(os.environ["HOST_BACKUP_DIR"], filename)
136 |             if os.path.isfile(file):
137 |                 status_code = 200
138 |                 status_msg = "ok"
139 |                 break
140 |         response = JsonResponse({"status": status_msg})
141 |         response.status_code = status_code
142 |         return response
143 | 
144 | 
145 | @method_decorator(cache_page(60 * 1), name="dispatch")
146 | class LinksCronHealthCheckView(View):
147 |     """
148 |     Healthcheck that passes only if the links jobs have all run successfully in the last 9 days
149 |     """
150 | 
151 |     def get(self, request, *args, **kwargs):
152 |         status_code = 500
153 |         status_msg = "error"
154 |         try:
155 |             latest_total_links_endtime = LinkSearchTotal.objects.latest("date").date
156 |             cutoff_datetime = now().date() - timedelta(days=9)
157 |             if latest_total_links_endtime < cutoff_datetime:
158 |                 status_msg = "out of date"
159 |             else:
160 |                 status_code = 200
161 |                 status_msg = "ok"
162 |         except:
163 |             status_code = 404
164 |             status_msg = "not found"
165 |         response = JsonResponse({"status": status_msg})
166 |         response.status_code = status_code
167 |         return response
168 | 
169 | 
170 | @method_decorator(cache_page(60 * 1), name="dispatch")
171 | class OrganizationsCronHealthCheckView(View):
172 |     """
173 |     Healthcheck that passes only if the Organizations jobs have all run successfully in the last 2 hours
174 |     """
175 | 
176 |     def get(self, request, *args, **kwargs):
177 |         status_code = 500
178 |         status_msg = "error"
179 |         try:
180 |             latest_user_lists_endtime = Organisation.objects.latest(
181 |                 "username_list_updated"
182 |             ).username_list_updated
183 |             cutoff_datetime = now() - timedelta(hours=2)
184 |             if latest_user_lists_endtime < cutoff_datetime:
185 |                 status_msg = "out of date"
186 |             else:
187 |                 status_code = 200
188 |                 status_msg = "ok"
189 |         except:
190 |             status_code = 404
191 |             status_msg = "not found"
192 |         response = JsonResponse({"status": status_msg})
193 |         response.status_code = status_code
194 |         return response
195 | 


--------------------------------------------------------------------------------
/extlinks/links/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/links/__init__.py


--------------------------------------------------------------------------------
/extlinks/links/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | from django.contrib.contenttypes.admin import GenericTabularInline
 3 | 
 4 | from .models import URLPattern, LinkSearchTotal, LinkEvent
 5 | 
 6 | 
 7 | class LinkEventURLPatternAdminInline(GenericTabularInline):
 8 |     model = LinkEvent
 9 |     # Although not ideal, changing this to False has improved performance
10 |     show_change_link = False
11 |     exclude = ["user_id", "url"]
12 |     readonly_fields = [
13 |         "link",
14 |         "timestamp",
15 |         "domain",
16 |         "rev_id",
17 |         "page_title",
18 |         "page_namespace",
19 |         "event_id",
20 |         "user_is_bot",
21 |         "hash_link_event_id",
22 |         "change",
23 |         "username",
24 |         "on_user_list",
25 |     ]
26 | 
27 |     def get_queryset(self, request):
28 |         qs = super().get_queryset(request)
29 | 
30 |         return qs.select_related("username")
31 | 
32 | 
33 | class URLPatternAdmin(admin.ModelAdmin):
34 |     list_display = ("url",)
35 |     exclude = ["collections"]
36 |     autocomplete_fields = ["collection"]
37 |     inlines = [
38 |         LinkEventURLPatternAdminInline,
39 |     ]
40 | 
41 | 
42 | admin.site.register(URLPattern, URLPatternAdmin)
43 | 
44 | 
45 | class LinkSearchTotalAdmin(admin.ModelAdmin):
46 |     list_display = ("url", "date", "total")
47 |     list_select_related = ["url"]
48 | 
49 | 
50 | admin.site.register(LinkSearchTotal, LinkSearchTotalAdmin)
51 | 
52 | 
53 | class LinkEventAdmin(admin.ModelAdmin):
54 |     list_display = ("link", "timestamp", "domain", "username", "change")
55 |     list_select_related = ["username", "content_type"]
56 |     readonly_fields = ["url_pattern_display", "username"]
57 |     exclude = ["content_type", "object_id", "url"]
58 | 
59 |     @admin.display(description="URLPattern")
60 |     def url_pattern_display(self, instance):
61 |         return URLPattern.objects.filter(pk=instance.content_object.pk).first()
62 | 
63 | 
64 | admin.site.register(LinkEvent, LinkEventAdmin)
65 | 


--------------------------------------------------------------------------------
/extlinks/links/factories.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timezone
 2 | import factory
 3 | import random
 4 | 
 5 | from extlinks.organisations.factories import UserFactory, CollectionFactory
 6 | from .models import LinkEvent, LinkSearchTotal, URLPattern
 7 | 
 8 | class URLPatternFactory(factory.django.DjangoModelFactory):
 9 |     class Meta:
10 |         model = URLPattern
11 |         strategy = factory.CREATE_STRATEGY
12 | 
13 |     # factory.Faker returns a Faker object by default, rather than str
14 |     url = str(factory.Faker("url", schemes=["https"]))[8:-1]
15 | 
16 | 
17 | class LinkEventFactory(factory.django.DjangoModelFactory):
18 |     class Meta:
19 |         model = LinkEvent
20 |         strategy = factory.CREATE_STRATEGY
21 | 
22 |     # We don't define any automatically generated link here, because it
23 |     # needs to directly correspond to the url field for this object too.
24 |     timestamp = datetime.now(timezone.utc)
25 |     domain = "en.wikipedia.org"
26 |     username = factory.SubFactory(UserFactory)
27 |     rev_id = random.randint(10000000, 100000000)
28 |     user_id = random.randint(10000000, 100000000)
29 |     page_title = factory.Faker("word")
30 |     page_namespace = 0
31 |     event_id = factory.Faker("uuid4")
32 |     change = LinkEvent.ADDED
33 |     on_user_list = False
34 | 
35 | 
36 | class LinkSearchTotalFactory(factory.django.DjangoModelFactory):
37 |     class Meta:
38 |         model = LinkSearchTotal
39 |         strategy = factory.CREATE_STRATEGY
40 | 
41 |     url = factory.SubFactory(URLPatternFactory)
42 |     date = datetime.today()
43 |     total = random.randint(1, 1000)
44 | 


--------------------------------------------------------------------------------
/extlinks/links/helpers.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import unquote
 2 | 
 3 | from .models import URLPattern
 4 | 
 5 | 
 6 | def split_url_for_query(url):
 7 |     """
 8 |     Given a URL pattern, split it into two components:
 9 |     url_optimised: URL and domain name in the el_index format
10 |     (https://www.mediawiki.org/wiki/Manual:Externallinks_table#el_index)
11 |     url_pattern_end: Anything following the domain name
12 |     """
13 |     url = url.strip()  # Catch any trailing spaces
14 |     # Start after *. if present
15 |     if url.startswith("*."):
16 |         url = url[2:]
17 | 
18 |     url_start = url.split("/")[0].split(".")[::-1]
19 |     url_optimised = ".".join(url_start) + ".%"
20 | 
21 |     if "/" in url:
22 |         url_end = "/".join(url.split("/")[1:])
23 |         url_pattern_end = "%./" + url_end + "%"
24 |     else:
25 |         url_pattern_end = "%"
26 | 
27 |     return url_optimised, url_pattern_end
28 | 
29 | 
30 | def link_is_tracked(link):
31 |     all_urlpatterns = URLPattern.objects.all()
32 |     tracked_links_list = list(all_urlpatterns.values_list("url", flat=True))
33 |     proxied_url = False
34 | 
35 |     # If this looks like a TWL proxied URL we're going to need to match
36 |     # it against a longer list of strings
37 |     if "wikipedialibrary.idm.oclc" in link:
38 |         proxied_url = True
39 |         proxied_urls = [urlpattern.get_proxied_url for urlpattern in all_urlpatterns]
40 |         tracked_links_list.extend(proxied_urls)
41 | 
42 |     # This is a quick check so we can filter the majority of events
43 |     # which won't be matching our filters
44 |     if any(links in link for links in tracked_links_list):
45 |         # Then we do a more detailed check, to make sure this is the
46 |         # root URL.
47 |         for tracked_link in tracked_links_list:
48 |             # If we track apa.org, we don't want to match iaapa.org
49 |             # so we make sure the URL is actually pointing at apa.org
50 |             url_starts = ["//" + tracked_link, "." + tracked_link]
51 |             if proxied_url:
52 |                 # Proxy URLs may contain //www- not //www.
53 |                 url_starts.append("-" + tracked_link)
54 | 
55 |             # We want to avoid link additions from e.g. InternetArchive
56 |             # where the URL takes the structure
57 |             # https://web.archive.org/https://test.com/
58 |             protocol_count = link.count("//")
59 | 
60 |             if any(start in link for start in url_starts) and protocol_count < 2:
61 |                 return True
62 |     else:
63 |         return False
64 | 


--------------------------------------------------------------------------------
/extlinks/links/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/links/management/__init__.py


--------------------------------------------------------------------------------
/extlinks/links/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/links/management/commands/__init__.py


--------------------------------------------------------------------------------
/extlinks/links/management/commands/fix_proxy_linkevents_on_user_list.py:
--------------------------------------------------------------------------------
 1 | from extlinks.common.management.commands import BaseCommand
 2 | from django.core.management import call_command
 3 | 
 4 | from extlinks.aggregates.models import (
 5 |     LinkAggregate,
 6 |     PageProjectAggregate,
 7 |     UserAggregate,
 8 | )
 9 | from extlinks.links.models import LinkEvent
10 | 
11 | 
12 | class Command(BaseCommand):
13 |     help = "Fixes all those proxy linkevents that aren't in the user list"
14 | 
15 |     def _handle(self, *args, **options):
16 |         proxy_not_on_user_list_linkevents = LinkEvent.objects.filter(
17 |             link__contains="wikipedialibrary.idm.oclc", on_user_list=False
18 |         )
19 | 
20 |         if proxy_not_on_user_list_linkevents.exists():
21 |             earliest_link_date = proxy_not_on_user_list_linkevents.earliest(
22 |                 "timestamp"
23 |             ).timestamp
24 |             collection_list = set()
25 |             for linkevent in proxy_not_on_user_list_linkevents:
26 |                 # Get URLPatterns associated with the linkevent
27 |                 url = linkevent.content_object
28 |                 # Get the organisation from the first url
29 |                 if url:
30 |                     collections = url.collections.all()
31 |                     for collection in collections:
32 |                         collection_list.add(collection.id)
33 |                         organisation = collection.organisation
34 |                         username_list = organisation.username_list
35 |                         if username_list:
36 |                             if linkevent.username in username_list.all():
37 |                                 linkevent.on_user_list = True
38 |                                 linkevent.save()
39 | 
40 |             if collection_list:
41 |                 LinkAggregate.objects.filter(
42 |                     collection__in=collection_list, full_date__gte=earliest_link_date
43 |                 ).delete()
44 |                 PageProjectAggregate.objects.filter(
45 |                     collection__in=collection_list, full_date__gte=earliest_link_date
46 |                 ).delete()
47 |                 UserAggregate.objects.filter(
48 |                     collection__in=collection_list, full_date__gte=earliest_link_date
49 |                 ).delete()
50 | 
51 |                 call_command("fill_link_aggregates", collections=collection_list)
52 |                 call_command("fill_pageproject_aggregates", collections=collection_list)
53 |                 call_command("fill_user_aggregates", collections=collection_list)
54 | 


--------------------------------------------------------------------------------
/extlinks/links/management/commands/linkevent_example_data.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timedelta, timezone
 2 | import random
 3 | from faker import Faker
 4 | 
 5 | from extlinks.common.management.commands import BaseCommand
 6 | 
 7 | from extlinks.organisations.models import User
 8 | from ...models import URLPattern, LinkEvent
 9 | 
10 | 
11 | class Command(BaseCommand):
12 |     help = "Backfills a set of linkevents for each url pattern"
13 | 
14 |     def add_arguments(self, parser):
15 |         parser.add_argument("num_events", nargs="+", type=int)
16 | 
17 |     def _handle(self, *args, **options):
18 |         # Number of link events to log in total
19 |         num_events = options["num_events"][0]
20 | 
21 |         fake = Faker()
22 |         languages = ["en", "de", "fr", "he", "hi", "ta"]
23 |         users = User.objects.all()
24 |         # Hacky way of adding a weighted random choice of change type.
25 |         # Addition is likely to be more prevalent.
26 |         change_choices = [
27 |             LinkEvent.ADDED,
28 |             LinkEvent.ADDED,
29 |             LinkEvent.ADDED,
30 |             LinkEvent.REMOVED,
31 |         ]
32 | 
33 |         urlpatterns = URLPattern.objects.all()
34 | 
35 |         for _ in range(num_events):
36 |             urlpattern = random.choice(urlpatterns)
37 |             organisation = urlpattern.collections.first().organisation
38 |             random_user = random.choice(users)
39 | 
40 |             # If this org limits by user, choose either a random user who
41 |             # isn't on the org's user list, or from the org's user list.
42 |             on_user_list = False
43 |             if organisation.limit_by_user:
44 |                 username_list = organisation.username_list.all()
45 |                 if random_user in username_list:
46 |                     on_user_list = True
47 | 
48 |             link_event = LinkEvent.objects.create(
49 |                 link=urlpattern.url + "/" + fake.word(),
50 |                 timestamp=fake.date_time_between(
51 |                     start_date=datetime.now() - timedelta(days=365),
52 |                     end_date="now",
53 |                     tzinfo=timezone.utc,
54 |                 ),
55 |                 domain=random.choice(languages) + ".wikipedia.org",
56 |                 username=random_user,
57 |                 rev_id=random.randint(10000000, 100000000),
58 |                 user_id=random.randint(10000000, 100000000),
59 |                 page_title=fake.word(),
60 |                 page_namespace=0,
61 |                 event_id=fake.uuid4(),
62 |                 change=random.choice(change_choices),
63 |                 on_user_list=on_user_list,
64 |             )
65 |             urlpattern.link_events.add(link_event)
66 |             urlpattern.save()
67 | 


--------------------------------------------------------------------------------
/extlinks/links/management/commands/linksearchtotal_collect.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import MySQLdb
 3 | import os
 4 | 
 5 | from extlinks.common.management.commands import BaseCommand
 6 | from django.db import close_old_connections
 7 | 
 8 | from extlinks.links.helpers import split_url_for_query
 9 | from extlinks.links.models import LinkSearchTotal, URLPattern
10 | from extlinks.settings.base import BASE_DIR
11 | 
12 | 
13 | class Command(BaseCommand):
14 |     help = "Updates link totals from externallinks table"
15 | 
16 |     def _handle(self, *args, **options):
17 |         protocols = ["http", "https"]
18 | 
19 |         with open(os.path.join(BASE_DIR, "wiki-list.csv"), "r") as wiki_list:
20 |             csv_reader = csv.reader(wiki_list)
21 |             wiki_list_data = []
22 |             for row in csv_reader:
23 |                 wiki_list_data.append(row[0])
24 | 
25 |         all_urlpatterns = URLPattern.objects.all()
26 | 
27 |         total_links_dictionary = {}
28 |         for i, language in enumerate(wiki_list_data):
29 |             db = MySQLdb.connect(
30 |                 host="{lang}wiki.analytics.db.svc.wikimedia.cloud".format(
31 |                     lang=language
32 |                 ),
33 |                 user=os.environ["REPLICA_DB_USER"],
34 |                 passwd=os.environ["REPLICA_DB_PASSWORD"],
35 |                 db="{lang}wiki_p".format(lang=language),
36 |             )
37 | 
38 |             cur = db.cursor()
39 | 
40 |             for urlpattern in all_urlpatterns:
41 |                 # For the first language, initialise tracking
42 |                 if i == 0:
43 |                     total_links_dictionary[urlpattern.pk] = 0
44 | 
45 |                 url = urlpattern.url
46 |                 optimised_url, url_pattern_end = split_url_for_query(url)
47 | 
48 |                 for protocol in protocols:
49 |                     url_pattern_start = protocol + "://" + optimised_url
50 | 
51 |                     cur.execute(
52 |                         """SELECT COUNT(*) FROM externallinks
53 |                                 WHERE el_to_domain_index LIKE '{url_start}'
54 |                                 AND el_to_domain_index LIKE '{url_end}'
55 |                                 """.format(
56 |                             url_start=url_pattern_start, url_end=url_pattern_end
57 |                         )
58 |                     )
59 | 
60 |                     this_num_urls = cur.fetchone()[0]
61 | 
62 |                     total_links_dictionary[urlpattern.pk] += this_num_urls
63 | 
64 |         for urlpattern_pk, total_count in total_links_dictionary.items():
65 |             linksearch_object = LinkSearchTotal(
66 |                 url=URLPattern.objects.get(pk=urlpattern_pk), total=total_count
67 |             )
68 |             linksearch_object.save()
69 | 
70 |         close_old_connections()
71 | 


--------------------------------------------------------------------------------
/extlinks/links/management/commands/linksearchtotal_example_data.py:
--------------------------------------------------------------------------------
 1 | from datetime import date, timedelta
 2 | import random
 3 | 
 4 | from extlinks.common.management.commands import BaseCommand
 5 | 
 6 | from ...models import URLPattern, LinkSearchTotal
 7 | 
 8 | 
 9 | class Command(BaseCommand):
10 |     help = "Backfills a set of LinkSearchTotals for each url pattern"
11 | 
12 |     def add_arguments(self, parser):
13 |         parser.add_argument("weeks", nargs="+", type=int)
14 | 
15 |     def _handle(self, *args, **options):
16 |         # The number of weeks to go back
17 |         num_dates = options["weeks"][0]
18 | 
19 |         urlpatterns = URLPattern.objects.all()
20 |         for urlpattern in urlpatterns:
21 |             date_total = random.randint(500, 30000)
22 |             this_date = date.today()
23 | 
24 |             for _ in range(num_dates):
25 | 
26 |                 # Each week, going backwards, we lose between 0 and 10%
27 |                 # of the total number of links.
28 |                 less_total = random.randint(0, int(date_total * 0.1))
29 |                 date_total -= less_total
30 | 
31 |                 new_total = LinkSearchTotal(
32 |                     url=urlpattern, date=this_date, total=date_total
33 |                 )
34 |                 new_total.save()
35 | 
36 |                 this_date = this_date - timedelta(days=7)
37 | 


--------------------------------------------------------------------------------
/extlinks/links/management/commands/remove_ezproxy_collection.py:
--------------------------------------------------------------------------------
  1 | from django.contrib.contenttypes.models import ContentType
  2 | from extlinks.common.management.commands import BaseCommand
  3 | from django.core.management import call_command
  4 | 
  5 | from extlinks.aggregates.models import (
  6 |     LinkAggregate,
  7 |     PageProjectAggregate,
  8 |     UserAggregate,
  9 | )
 10 | from extlinks.links.models import URLPattern, LinkEvent
 11 | from extlinks.organisations.models import Organisation, Collection
 12 | 
 13 | 
 14 | class Command(BaseCommand):
 15 |     help = "Deletes the EZProxy collection and organisation and reassigns those LinkEvents to new URLPatterns"
 16 | 
 17 |     def _handle(self, *args, **options):
 18 |         ezproxy_org = self._get_ezproxy_organisation()
 19 |         ezproxy_collection = self._get_ezproxy_collection()
 20 |         url_patterns = ezproxy_collection.get_url_patterns().all()
 21 | 
 22 |         linkevents = LinkEvent.objects.get_queryset()
 23 |         for url_pattern in url_patterns:
 24 |             linkevents.filter(object_id=url_pattern.id)
 25 |         collections = Collection.objects.all()
 26 |         self._process_linkevents_collections(linkevents, collections)
 27 |         self._delete_aggregates_ezproxy(ezproxy_org, ezproxy_collection, url_patterns)
 28 | 
 29 |     def _get_ezproxy_organisation(self):
 30 |         """
 31 |         Gets the EZProxy organisation, or returns None if it's already been deleted
 32 | 
 33 |         Parameters
 34 |         ----------
 35 | 
 36 |         Returns
 37 |         -------
 38 |         Organisation object or None
 39 |         """
 40 |         if Organisation.objects.filter(name="Wikipedia Library OCLC EZProxy").exists():
 41 |             return Organisation.objects.get(name="Wikipedia Library OCLC EZProxy")
 42 | 
 43 |         return None
 44 | 
 45 |     def _get_ezproxy_collection(self):
 46 |         """
 47 |         Gets the EZProxy collection, or returns None if it's already been deleted
 48 | 
 49 |         Parameters
 50 |         ----------
 51 | 
 52 |         Returns
 53 |         -------
 54 |         Collection object or None
 55 |         """
 56 |         if Collection.objects.filter(name="EZProxy").exists():
 57 |             return Collection.objects.get(name="EZProxy")
 58 | 
 59 |         return None
 60 | 
 61 |     def _get_ezproxy_url_patterns(self, collection):
 62 |         """
 63 |         Gets the EZProxy collection, or returns None if it's already been deleted
 64 | 
 65 |         Parameters
 66 |         ----------
 67 |         collection: The collection the URLPatterns belong to
 68 | 
 69 |         Returns
 70 |         -------
 71 |         URLPattern object or None
 72 |         """
 73 |         if collection and URLPattern.objects.filter(collection=collection).exists():
 74 |             return URLPattern.objects.get(collection=collection)
 75 | 
 76 |         return None
 77 | 
 78 |     def _delete_aggregates_ezproxy(self, ezproxy_org, ezproxy_collection, url_patterns):
 79 |         """
 80 |         Deletes any aggregate with the EZProxy collection and organisation,
 81 |         then deletes the collection, organisation and url patterns
 82 | 
 83 |         Parameters
 84 |         ----------
 85 |         ezproxy_org: Organisation
 86 |         The organisation to filter and delete the aggregates tables and that
 87 |         will later be deleted
 88 | 
 89 |         ezproxy_collection: Collection
 90 |         The collection to filter and delete the aggregates tables and that
 91 |         will later be deleted
 92 | 
 93 |         url_patterns: URLPattern
 94 |         The EZProxy URLPatterns that will be deleted
 95 | 
 96 |         Returns
 97 |         -------
 98 | 
 99 |         """
100 |         LinkAggregate.objects.filter(
101 |             organisation=ezproxy_org, collection=ezproxy_collection
102 |         ).delete()
103 |         PageProjectAggregate.objects.filter(
104 |             organisation=ezproxy_org, collection=ezproxy_collection
105 |         ).delete()
106 |         UserAggregate.objects.filter(
107 |             organisation=ezproxy_org, collection=ezproxy_collection
108 |         ).delete()
109 | 
110 |         url_patterns.delete()
111 |         ezproxy_collection.delete()
112 |         ezproxy_org.delete()
113 | 
114 |     def _process_linkevents_collections(self, linkevents, collections):
115 |         """
116 |         Loops through all collections to get their url patterns. If a linkevent
117 |         link coincides with a URLPattern, it is added to that LinkEvent. That way,
118 |         it will be counted when the aggregates commands are run again
119 | 
120 |         Parameters
121 |         ----------
122 |         linkevents: Queryset[LinkEvent]
123 |         LinkEvent that have no URLPatterns assigned (therefore no collection assigned)
124 | 
125 |         collections: Queryset[Collection]
126 |         All of the collections
127 | 
128 |         Returns
129 |         -------
130 | 
131 |         """
132 |         for collection in collections:
133 |             linkevents_changed = 0
134 |             collection_urls = collection.get_url_patterns()
135 |             for url_pattern in collection_urls:
136 |                 for linkevent in linkevents:
137 |                     proxy_url = url_pattern.url.replace(".", "-")
138 |                     if url_pattern.url in linkevent.link or proxy_url in linkevent.link:
139 |                         url_pattern.link_events.add(linkevent)
140 |                         url_pattern.save()
141 |                         linkevents_changed += 1
142 |             if linkevents_changed > 0:
143 |                 # There have been changes to this collection, so we must delete
144 |                 # the aggregates tables for that collection and run the commands
145 |                 # for it
146 |                 LinkAggregate.objects.filter(collection=collection).delete()
147 |                 PageProjectAggregate.objects.filter(collection=collection).delete()
148 |                 UserAggregate.objects.filter(collection=collection).delete()
149 | 
150 |                 call_command("fill_link_aggregates", collections=[collection.pk])
151 |                 call_command("fill_pageproject_aggregates", collections=[collection.pk])
152 |                 call_command("fill_user_aggregates", collections=[collection.pk])
153 | 


--------------------------------------------------------------------------------
/extlinks/links/management/commands/upload_all_archived.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from extlinks.common.management.commands import BaseCommand
 4 | from django.core.management import call_command
 5 | 
 6 | 
 7 | class Command(BaseCommand):
 8 |     help = "Uploads all archives currently located in the backup directory to object storage"
 9 | 
10 |     def add_arguments(self, parser):
11 |         parser.add_argument(
12 |             "--dir",
13 |             help="The directory from which to upload archives.",
14 |             type=str
15 |         )
16 | 
17 |     def _handle(self, *args, **options):
18 |         path = options['dir']
19 |         for filename in os.listdir(path):
20 |             if filename.endswith('.json.gz') and filename.startswith('links_linkevent_'):
21 |                 file_path = os.path.join(path, filename)
22 |                 if os.path.isfile(file_path):
23 |                     call_command("linkevents_archive", "upload", file_path)
24 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0001_initial.py:
--------------------------------------------------------------------------------
  1 | # Generated by Django 2.2 on 2019-05-20 14:01
  2 | 
  3 | from django.db import migrations, models
  4 | import django.db.models.deletion
  5 | 
  6 | 
  7 | class Migration(migrations.Migration):
  8 | 
  9 |     initial = True
 10 | 
 11 |     dependencies = [
 12 |         ("organisations", "0001_initial"),
 13 |     ]
 14 | 
 15 |     operations = [
 16 |         migrations.CreateModel(
 17 |             name="URLPattern",
 18 |             fields=[
 19 |                 (
 20 |                     "id",
 21 |                     models.AutoField(
 22 |                         auto_created=True,
 23 |                         primary_key=True,
 24 |                         serialize=False,
 25 |                         verbose_name="ID",
 26 |                     ),
 27 |                 ),
 28 |                 ("url", models.CharField(max_length=60)),
 29 |                 (
 30 |                     "collection",
 31 |                     models.ForeignKey(
 32 |                         null=True,
 33 |                         on_delete=django.db.models.deletion.SET_NULL,
 34 |                         related_name="url",
 35 |                         to="organisations.Collection",
 36 |                     ),
 37 |                 ),
 38 |             ],
 39 |             options={
 40 |                 "verbose_name_plural": "URL patterns",
 41 |                 "verbose_name": "URL pattern",
 42 |             },
 43 |         ),
 44 |         migrations.CreateModel(
 45 |             name="LinkSearchTotal",
 46 |             fields=[
 47 |                 (
 48 |                     "id",
 49 |                     models.AutoField(
 50 |                         auto_created=True,
 51 |                         primary_key=True,
 52 |                         serialize=False,
 53 |                         verbose_name="ID",
 54 |                     ),
 55 |                 ),
 56 |                 ("date", models.DateField(auto_now_add=True)),
 57 |                 ("total", models.PositiveIntegerField()),
 58 |                 (
 59 |                     "url",
 60 |                     models.ForeignKey(
 61 |                         null=True,
 62 |                         on_delete=django.db.models.deletion.SET_NULL,
 63 |                         to="links.URLPattern",
 64 |                     ),
 65 |                 ),
 66 |             ],
 67 |             options={
 68 |                 "verbose_name_plural": "LinkSearch totals",
 69 |                 "verbose_name": "LinkSearch total",
 70 |             },
 71 |         ),
 72 |         migrations.CreateModel(
 73 |             name="LinkEvent",
 74 |             fields=[
 75 |                 (
 76 |                     "id",
 77 |                     models.AutoField(
 78 |                         auto_created=True,
 79 |                         primary_key=True,
 80 |                         serialize=False,
 81 |                         verbose_name="ID",
 82 |                     ),
 83 |                 ),
 84 |                 ("link", models.CharField(max_length=2083)),
 85 |                 ("timestamp", models.DateTimeField()),
 86 |                 ("domain", models.CharField(max_length=32)),
 87 |                 ("username", models.CharField(max_length=255)),
 88 |                 ("rev_id", models.PositiveIntegerField(null=True)),
 89 |                 ("user_id", models.PositiveIntegerField()),
 90 |                 ("page_title", models.CharField(max_length=255)),
 91 |                 ("page_namespace", models.IntegerField()),
 92 |                 ("event_id", models.CharField(max_length=36)),
 93 |                 ("change", models.IntegerField(choices=[(0, "Removed"), (1, "Added")])),
 94 |                 ("on_user_list", models.BooleanField(default=False)),
 95 |                 (
 96 |                     "url",
 97 |                     models.ManyToManyField(
 98 |                         related_name="linkevent", to="links.URLPattern"
 99 |                     ),
100 |                 ),
101 |             ],
102 |             options={
103 |                 "get_latest_by": "timestamp",
104 |             },
105 |         ),
106 |     ]
107 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0002_auto_20190520_1530.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-05-20 15:30
 2 | 
 3 | import datetime
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("links", "0001_initial"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.AlterField(
15 |             model_name="linksearchtotal",
16 |             name="date",
17 |             field=models.DateField(default=datetime.date.today),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0003_auto_20190530_1045.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-05-30 10:45
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("links", "0002_auto_20190520_1530"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="urlpattern",
15 |             name="url",
16 |             field=models.CharField(max_length=150),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0004_auto_20190603_1110.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-03 11:10
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("links", "0003_auto_20190530_1045"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="linkevent",
15 |             name="user_id",
16 |             field=models.PositiveIntegerField(null=True),
17 |         ),
18 |         migrations.AddConstraint(
19 |             model_name="linksearchtotal",
20 |             constraint=models.UniqueConstraint(
21 |                 fields=("url", "date"), name="unique_date_total"
22 |             ),
23 |         ),
24 |     ]
25 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0005_linkevent_user_is_bot.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-04 09:43
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("links", "0004_auto_20190603_1110"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="linkevent",
15 |             name="user_is_bot",
16 |             field=models.BooleanField(default=False),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0006_auto_20190628_1221.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-28 12:21
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ("organisations", "0005_auto_20190628_1221"),
11 |         ("links", "0005_linkevent_user_is_bot"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AlterField(
16 |             model_name="linkevent",
17 |             name="username",
18 |             field=models.ForeignKey(
19 |                 null=True,
20 |                 on_delete=django.db.models.deletion.SET_NULL,
21 |                 to="organisations.User",
22 |             ),
23 |         ),
24 |     ]
25 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0007_auto_20190730_1355.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.3 on 2019-07-30 13:55
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("links", "0006_auto_20190628_1221"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="linkevent",
15 |             name="change",
16 |             field=models.IntegerField(
17 |                 choices=[(0, "Removed"), (1, "Added")], db_index=True
18 |             ),
19 |         ),
20 |         migrations.AlterField(
21 |             model_name="linkevent",
22 |             name="domain",
23 |             field=models.CharField(db_index=True, max_length=32),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0008_fill_proquest_openurl.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations
 2 | 
 3 | 
 4 | def add_link_events_to_proquest_openurl_collection(apps, schema_editor):
 5 |     LinkEvent = apps.get_model("links", "LinkEvent")
 6 |     Collection = apps.get_model("organisations", "Collection")
 7 |     URLPattern = apps.get_model("links", "URLPattern")
 8 |     proquest_openurl_collection = Collection.objects.filter(name="Proquest OpenURL")
 9 |     if proquest_openurl_collection:
10 |         proquest_openurl_linkevents = LinkEvent.objects.filter(
11 |             link__icontains="gateway.proquest.com/openurl"
12 |         )
13 |         for proquest_openurl_linkevent in proquest_openurl_linkevents:
14 |             proquest_openurl_linkevent.url.add(proquest_openurl_collection[0].url.get())
15 |             proquest_openurl_linkevent.save()
16 | 
17 | 
18 | class Migration(migrations.Migration):
19 | 
20 |     dependencies = [("links", "0007_auto_20190730_1355")]
21 | 
22 |     operations = [migrations.RunPython(add_link_events_to_proquest_openurl_collection)]
23 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0009_auto_20230215_1656.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.14 on 2023-02-15 16:56
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('links', '0008_fill_proquest_openurl'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name='linkevent',
15 |             name='hash_link_event_id',
16 |             field=models.CharField(blank=True, max_length=256),
17 |         ),
18 |         migrations.AddIndex(
19 |             model_name='linkevent',
20 |             index=models.Index(fields=['hash_link_event_id'], name='links_linke_hash_li_594ad2_idx'),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0010_data_link_event_id_hash.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.14 on 2023-02-15 17:15
 2 | import hashlib
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | def add_hash_link_event_id(apps, schema_editor):
 7 |     LinkEvent = apps.get_model("links", "LinkEvent")
 8 |     unhashed = LinkEvent.objects.filter(hash_link_event_id__exact='')
 9 |     for i in range(100):
10 |         if unhashed.count() == 0:
11 |             break
12 |         else:
13 |             for event in unhashed.all()[:100000]:
14 |                 link_event_id = event.link + event.event_id
15 |                 hash = hashlib.sha256()
16 |                 hash.update(link_event_id.encode("utf-8"))
17 |                 event.hash_link_event_id = hash.hexdigest()
18 |                 event.save(update_fields=(['hash_link_event_id']))
19 | 
20 | 
21 | class Migration(migrations.Migration):
22 | 
23 |     dependencies = [
24 |         ("links", "0009_auto_20230215_1656"),
25 |     ]
26 | 
27 |     operations = [migrations.RunPython(add_hash_link_event_id)]
28 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0011_auto_20230217_1326.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.14 on 2023-02-17 13:26
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('links', '0010_data_link_event_id_hash'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddIndex(
14 |             model_name='linkevent',
15 |             index=models.Index(fields=['timestamp'], name='links_linke_timesta_4a56de_idx'),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0012_alter_linkevent_id_alter_linksearchtotal_id_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.7 on 2023-11-07 19:28
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('links', '0011_auto_20230217_1326'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='linkevent',
15 |             name='id',
16 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name='linksearchtotal',
20 |             name='id',
21 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
22 |         ),
23 |         migrations.AlterField(
24 |             model_name='urlpattern',
25 |             name='id',
26 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
27 |         ),
28 |     ]
29 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0013_add_linkevent_url_linkevent_content_type_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.14 on 2024-09-27 17:43
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     dependencies = [
10 |         ('contenttypes', '0002_remove_content_type_name'),
11 |         ('organisations', '0008_alter_collection_id_alter_organisation_id_and_more'),
12 |         ('links', '0012_alter_linkevent_id_alter_linksearchtotal_id_and_more'),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.AddField(
17 |             model_name='linkevent',
18 |             name='content_type',
19 |             field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='content_type', to='contenttypes.contenttype'),
20 |         ),
21 |         migrations.AddField(
22 |             model_name='linkevent',
23 |             name='object_id',
24 |             field=models.PositiveIntegerField(null=True),
25 |         ),
26 |         migrations.AddField(
27 |             model_name='urlpattern',
28 |             name='collections',
29 |             field=models.ManyToManyField(related_name='urlpatterns', to='organisations.collection'),
30 |         ),
31 |         migrations.AddIndex(
32 |             model_name='linkevent',
33 |             index=models.Index(fields=['content_type', 'object_id'], name='links_linke_content_1a162a_idx'),
34 |         ),
35 |     ]
36 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/0014_migrate_url_pattern_relationships.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.14 on 2024-08-20 17:14
 2 | from django.contrib.contenttypes.models import ContentType
 3 | from django.core.paginator import Paginator
 4 | from django.core.serializers import deserialize
 5 | from django.db import migrations
 6 | from extlinks.organisations.models import Collection
 7 | 
 8 | import logging
 9 | 
10 | logger = logging.getLogger(__name__)
11 | handler = logging.StreamHandler()
12 | logger.addHandler(handler)
13 | logger.setLevel(logging.INFO)
14 | 
15 | def import_url_collection_json():
16 |     with open("./extlinks/links/migrations/urlpatterns.json", "r") as f:
17 |         logger.info("\timporting urlpatterns ...")
18 |         url_pattern_data = deserialize("json", f)
19 |         for url_pattern in url_pattern_data:
20 |             # ensuring the related collection exists before attempting to save url pattern
21 |             if Collection.objects.filter(pk=url_pattern.object.collection_id).first():
22 |                 url_pattern.save()
23 | 
24 | 
25 | def process_link_event(link_event):
26 |     #logger.info("\tprocessing linkevent: {}".format(link_event.id))
27 |     urlpatterns = (
28 |         link_event.url.all()
29 |         .order_by("-url__length")
30 |     )
31 | 
32 |     # Find opportunities for early exit
33 |     if not urlpatterns:
34 |         logger.info("\t\tlinkevent {}:\tno url patterns found!".format(link_event.id))
35 |         return
36 |     pattern_count = len(urlpatterns)
37 |     if pattern_count == 1:
38 |         link_event.object_id = urlpatterns[0].pk
39 |         link_event.content_type_id = ContentType.objects.get(model="urlpattern").id
40 |         link_event.content_object = urlpatterns[0]
41 |         link_event.save()
42 |         return
43 |     elif pattern_count > 2:
44 |         logger.info("\t\tlinkevent {}:\tmore than 2 url patterns found!".format(link_event.id))
45 |         return
46 | 
47 |     # @FIXME: unreachable?
48 |     # Save the longest (i.e. most specific) URL pattern in the link event
49 |     # link_event.object_id = urlpatterns[0].pk
50 |     # link_event.content_type_id = ContentType.objects.get(model="urlpattern").id
51 |     # link_event.content_object = urlpatterns[0]
52 |     # link_event.save()
53 | 
54 | def migrate_relationships(apps, schema):
55 |     logger.info("\n")
56 |     import_url_collection_json()
57 |     LinkEvent = apps.get_model("links", "LinkEvent")
58 |     paginator = Paginator(
59 |         LinkEvent.objects.filter(content_type__isnull=True).order_by("id"), 1000
60 |     )
61 |     last_page = paginator.page_range[-1]
62 |     for page_num in paginator.page_range:
63 |         logger.info("\tprocessing linkevent page: {page_num}/{last_page}".format(page_num=page_num,last_page=last_page))
64 |         for link_event in paginator.page(page_num).object_list:
65 |             process_link_event(link_event)
66 | 
67 | 
68 | class Migration(migrations.Migration):
69 |     atomic = False
70 | 
71 |     dependencies = [
72 |         ("links", "0013_add_linkevent_url_linkevent_content_type_and_more"),
73 |     ]
74 | 
75 |     operations = [
76 |         migrations.RunPython(migrate_relationships),
77 |     ]
78 | 


--------------------------------------------------------------------------------
/extlinks/links/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/links/migrations/__init__.py


--------------------------------------------------------------------------------
/extlinks/links/models.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import logging
  3 | from datetime import date
  4 | 
  5 | from django.contrib.contenttypes.fields import GenericRelation, GenericForeignKey
  6 | from django.contrib.contenttypes.models import ContentType
  7 | from django.core.cache import cache
  8 | from django.db import models
  9 | from django.db.models.signals import post_save
 10 | from django.dispatch import receiver
 11 | from django.utils.functional import cached_property
 12 | 
 13 | logger = logging.getLogger("django")
 14 | 
 15 | 
 16 | 
 17 | class URLPatternManager(models.Manager):
 18 |     models.CharField.register_lookup(models.functions.Length)
 19 |     def cached(self):
 20 |         cached_patterns = cache.get('url_pattern_cache')
 21 |         if not cached_patterns:
 22 |             cached_patterns = self.all()
 23 |             logger.info('set url_pattern_cache')
 24 |             cache.set('url_pattern_cache', cached_patterns, None)
 25 |         return cached_patterns
 26 | 
 27 |     def matches(self, link):
 28 |         # All URL patterns matching this link
 29 |         tracked_urls = self.cached()
 30 |         return [
 31 |             pattern
 32 |             for pattern in tracked_urls
 33 |             if pattern.url in link or pattern.get_proxied_url in link
 34 |         ]
 35 | 
 36 | class URLPattern(models.Model):
 37 |     class Meta:
 38 |         app_label = "links"
 39 |         verbose_name = "URL pattern"
 40 |         verbose_name_plural = "URL patterns"
 41 | 
 42 |     objects = URLPatternManager()
 43 |     # This doesn't have to look like a 'real' URL so we'll use a CharField.
 44 |     url = models.CharField(max_length=150)
 45 |     link_events = GenericRelation("LinkEvent",
 46 |                                   null=True,
 47 |                                   blank=True,
 48 |                                   default=None,
 49 |                                   related_query_name="url_pattern",
 50 |                                   on_delete=models.SET_NULL)
 51 |     collection = models.ForeignKey(
 52 |         "organisations.Collection",
 53 |         null=True,
 54 |         on_delete=models.SET_NULL,
 55 |         related_name="url",
 56 |     )
 57 |     collections = models.ManyToManyField(
 58 |         "organisations.Collection", related_name="urlpatterns"
 59 |     )
 60 | 
 61 |     def __str__(self):
 62 |         return self.url
 63 | 
 64 |     @cached_property
 65 |     def get_proxied_url(self):
 66 |         # This isn't everything that happens, but it's good enough
 67 |         # for us to make a decision about whether we have a match.
 68 |         return self.url.replace(".", "-")
 69 | 
 70 | 
 71 | @receiver(post_save, sender=URLPattern)
 72 | def delete_url_pattern_cache(sender, instance, **kwargs):
 73 |     if cache.delete("url_pattern_cache"):
 74 |         logger.info("delete url_pattern_cache")
 75 | 
 76 | 
 77 | class LinkSearchTotal(models.Model):
 78 |     class Meta:
 79 |         app_label = "links"
 80 |         verbose_name = "LinkSearch total"
 81 |         verbose_name_plural = "LinkSearch totals"
 82 |         # We only want one record for each URL on any particular date
 83 |         constraints = [
 84 |             models.UniqueConstraint(fields=["url", "date"], name="unique_date_total")
 85 |         ]
 86 | 
 87 |     url = models.ForeignKey(URLPattern, null=True, on_delete=models.SET_NULL)
 88 | 
 89 |     date = models.DateField(default=date.today)
 90 |     total = models.PositiveIntegerField()
 91 | 
 92 | 
 93 | class LinkEvent(models.Model):
 94 |     """
 95 |     Stores data from the page-links-change EventStream
 96 | 
 97 |     https://stream.wikimedia.org/?doc#!/Streams/get_v2_stream_page_links_change
 98 |     """
 99 | 
100 |     class Meta:
101 |         app_label = "links"
102 |         get_latest_by = "timestamp"
103 |         indexes = [
104 |             models.Index(
105 |                 fields=[
106 |                     "hash_link_event_id",
107 |                 ]
108 |             ),
109 |             models.Index(
110 |                 fields=[
111 |                     "timestamp",
112 |                 ]
113 |             ),
114 |             models.Index(fields=["content_type", "object_id"]),
115 |         ]
116 |     url = models.ManyToManyField(URLPattern, related_name="linkevent")
117 |     # URLs should have a max length of 2083
118 |     link = models.CharField(max_length=2083)
119 |     timestamp = models.DateTimeField()
120 |     domain = models.CharField(max_length=32, db_index=True)
121 |     content_type = models.ForeignKey(ContentType, on_delete=models.SET_NULL, related_name="content_type", null=True)
122 |     object_id = models.PositiveIntegerField(null=True)
123 |     content_object = GenericForeignKey("content_type", "object_id")
124 | 
125 |     username = models.ForeignKey(
126 |         "organisations.User",
127 |         null=True,
128 |         on_delete=models.SET_NULL,
129 |     )
130 |     # rev_id has null=True because some tracked revisions don't have a
131 |     # revision ID, like page moves.
132 |     rev_id = models.PositiveIntegerField(null=True)
133 |     # IPs have no user_id, so this can be blank too.
134 |     user_id = models.PositiveIntegerField(null=True)
135 |     page_title = models.CharField(max_length=255)
136 |     page_namespace = models.IntegerField()
137 |     event_id = models.CharField(max_length=36)
138 |     user_is_bot = models.BooleanField(default=False)
139 |     hash_link_event_id = models.CharField(max_length=256, blank=True)
140 | 
141 |     # Were links added or removed?
142 |     REMOVED = 0
143 |     ADDED = 1
144 | 
145 |     CHANGE_CHOICES = (
146 |         (REMOVED, "Removed"),
147 |         (ADDED, "Added"),
148 |     )
149 | 
150 |     change = models.IntegerField(choices=CHANGE_CHOICES, db_index=True)
151 | 
152 |     # Flags whether this event was from a user on the user list for the
153 |     # organisation tracking its URL.
154 |     on_user_list = models.BooleanField(default=False)
155 | 
156 |     @property
157 |     def get_organisation(self):
158 |         url_pattern = URLPattern.objects.all()
159 |         for url_pattern in url_pattern:
160 |             link_events = url_pattern.link_events.all()
161 |             if self in link_events:
162 |                 return url_pattern.collection.organisation
163 | 
164 |     def save(self, **kwargs):
165 |         link_event_id = self.link + self.event_id
166 |         hash = hashlib.sha256()
167 |         hash.update(link_event_id.encode("utf-8"))
168 |         self.hash_link_event_id = hash.hexdigest()
169 |         super().save(**kwargs)
170 | 


--------------------------------------------------------------------------------
/extlinks/logs/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore


--------------------------------------------------------------------------------
/extlinks/organisations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/organisations/__init__.py


--------------------------------------------------------------------------------
/extlinks/organisations/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | 
 3 | from .models import Organisation, Collection, User
 4 | 
 5 | 
 6 | class UserAdmin(admin.ModelAdmin):
 7 |     list_display = ("username",)
 8 | 
 9 | 
10 | admin.site.register(User, UserAdmin)
11 | 
12 | 
13 | class OrganisationAdmin(admin.ModelAdmin):
14 |     list_display = ("name",)
15 |     list_filter = ("name",)
16 |     exclude = ("username_list",)
17 | 
18 | 
19 | admin.site.register(Organisation, OrganisationAdmin)
20 | 
21 | 
22 | class CollectionAdmin(admin.ModelAdmin):
23 |     list_display = ("name", "organisation")
24 |     list_filter = ("name", "organisation")
25 |     list_select_related = ["organisation"]
26 |     search_fields = ["name"]
27 | 
28 | 
29 | admin.site.register(Collection, CollectionAdmin)
30 | 


--------------------------------------------------------------------------------
/extlinks/organisations/factories.py:
--------------------------------------------------------------------------------
 1 | import factory
 2 | 
 3 | from .models import User, Organisation, Collection
 4 | 
 5 | 
 6 | class UserFactory(factory.django.DjangoModelFactory):
 7 | 
 8 |     class Meta:
 9 |         model = User
10 |         strategy = factory.CREATE_STRATEGY
11 | 
12 |     username = factory.Faker('name')
13 | 
14 | 
15 | class OrganisationFactory(factory.django.DjangoModelFactory):
16 | 
17 |     class Meta:
18 |         model = Organisation
19 |         strategy = factory.CREATE_STRATEGY
20 | 
21 |     name = factory.Faker('company')
22 | 
23 |     @factory.post_generation
24 |     def program(self, create, extracted, **kwargs):
25 |         if not create:
26 |             return
27 |         if extracted:
28 |             for program in extracted:
29 |                 self.program.add(program)
30 | 
31 | 
32 | class CollectionFactory(factory.django.DjangoModelFactory):
33 | 
34 |     class Meta:
35 |         model = Collection
36 |         strategy = factory.CREATE_STRATEGY
37 | 
38 |     name = factory.Faker('word')
39 |     organisation = factory.SubFactory(OrganisationFactory)
40 | 


--------------------------------------------------------------------------------
/extlinks/organisations/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/organisations/management/__init__.py


--------------------------------------------------------------------------------
/extlinks/organisations/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/organisations/management/commands/__init__.py


--------------------------------------------------------------------------------
/extlinks/organisations/management/commands/users_update_lists.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | 
 4 | from extlinks.common.management.commands import BaseCommand
 5 | from django.db import close_old_connections
 6 | from django.utils.timezone import now
 7 | 
 8 | from extlinks.organisations.models import Organisation, User
 9 | 
10 | 
11 | class Command(BaseCommand):
12 |     help = "Updates organisation user lists who have a user_list_url"
13 | 
14 |     def _handle(self, *args, **options):
15 |         user_list_orgs = Organisation.objects.filter(username_list_url__isnull=False)
16 | 
17 |         for organisation in user_list_orgs:
18 |             username_list_url = organisation.username_list_url
19 | 
20 |             # TODO: Hacky way to get TWL working, needs to be flexible.
21 |             auth_key = os.environ["TWL_API_TOKEN"]
22 |             response = requests.get(
23 |                 username_list_url,
24 |                 headers={"Authorization": "Token {}".format(auth_key)},
25 |             )
26 |             if response.status_code == 200:
27 |                 json_response = response.json()
28 |             else:
29 |                 continue
30 | 
31 |             # If we got a valid response, clear the previous username list
32 |             organisation.username_list.clear()
33 | 
34 |             for result in json_response:
35 |                 username = result["wp_username"]
36 | 
37 |                 user_object, _ = User.objects.get_or_create(username=username)
38 | 
39 |                 organisation.username_list.add(user_object)
40 |             # Useful for health check
41 |             organisation.username_list_updated = now()
42 |             organisation.save()
43 | 
44 |         close_old_connections()
45 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-05-20 14:01
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 | 
 9 |     initial = True
10 | 
11 |     dependencies = [
12 |         ("programs", "0001_initial"),
13 |     ]
14 | 
15 |     operations = [
16 |         migrations.CreateModel(
17 |             name="Organisation",
18 |             fields=[
19 |                 (
20 |                     "id",
21 |                     models.AutoField(
22 |                         auto_created=True,
23 |                         primary_key=True,
24 |                         serialize=False,
25 |                         verbose_name="ID",
26 |                     ),
27 |                 ),
28 |                 ("name", models.CharField(max_length=40)),
29 |                 ("limit_by_user", models.BooleanField(default=False)),
30 |                 ("username_list", models.TextField(blank=True, null=True)),
31 |                 ("username_list_url", models.URLField(blank=True, null=True)),
32 |                 (
33 |                     "program",
34 |                     models.ForeignKey(
35 |                         blank=True,
36 |                         null=True,
37 |                         on_delete=django.db.models.deletion.SET_NULL,
38 |                         to="programs.Program",
39 |                     ),
40 |                 ),
41 |             ],
42 |         ),
43 |         migrations.CreateModel(
44 |             name="Collection",
45 |             fields=[
46 |                 (
47 |                     "id",
48 |                     models.AutoField(
49 |                         auto_created=True,
50 |                         primary_key=True,
51 |                         serialize=False,
52 |                         verbose_name="ID",
53 |                     ),
54 |                 ),
55 |                 ("name", models.CharField(max_length=40)),
56 |                 (
57 |                     "organisation",
58 |                     models.ForeignKey(
59 |                         null=True,
60 |                         on_delete=django.db.models.deletion.SET_NULL,
61 |                         to="organisations.Organisation",
62 |                     ),
63 |                 ),
64 |             ],
65 |         ),
66 |     ]
67 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0002_auto_20190603_1255.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-03 12:55
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("organisations", "0001_initial"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterModelOptions(
14 |             name="collection",
15 |             options={"ordering": ["name"]},
16 |         ),
17 |         migrations.AlterModelOptions(
18 |             name="organisation",
19 |             options={"ordering": ["name"]},
20 |         ),
21 |     ]
22 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0003_auto_20190603_1325.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-03 13:25
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("programs", "0002_auto_20190603_1255"),
10 |         ("organisations", "0002_auto_20190603_1255"),
11 |     ]
12 | 
13 |     operations = [
14 |         migrations.RemoveField(
15 |             model_name="organisation",
16 |             name="program",
17 |         ),
18 |         migrations.AddField(
19 |             model_name="organisation",
20 |             name="program",
21 |             field=models.ManyToManyField(blank=True, null=True, to="programs.Program"),
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0004_auto_20190603_1325.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-03 13:25
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("organisations", "0003_auto_20190603_1325"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="organisation",
15 |             name="program",
16 |             field=models.ManyToManyField(to="programs.Program"),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0005_auto_20190628_1221.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-28 12:21
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("organisations", "0004_auto_20190603_1325"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="User",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.AutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("username", models.CharField(max_length=235)),
26 |             ],
27 |         ),
28 |         migrations.RemoveField(
29 |             model_name="organisation",
30 |             name="limit_by_user",
31 |         ),
32 |         migrations.RemoveField(
33 |             model_name="organisation",
34 |             name="username_list",
35 |         ),
36 |         migrations.AddField(
37 |             model_name="organisation",
38 |             name="username_list",
39 |             field=models.ManyToManyField(to="organisations.User"),
40 |         ),
41 |     ]
42 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0006_auto_20190730_1355.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2.3 on 2019-07-30 13:55
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("organisations", "0005_auto_20190628_1221"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="organisation",
15 |             name="program",
16 |             field=models.ManyToManyField(blank=True, to="programs.Program"),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name="organisation",
20 |             name="username_list",
21 |             field=models.ManyToManyField(blank=True, to="organisations.User"),
22 |         ),
23 |     ]
24 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0007_auto_20230216_1931.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1.14 on 2023-02-16 19:31
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('organisations', '0006_auto_20190730_1355'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='user',
15 |             name='username',
16 |             field=models.CharField(max_length=235, unique=True),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0008_alter_collection_id_alter_organisation_id_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.7 on 2023-11-07 19:28
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('organisations', '0007_auto_20230216_1931'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='collection',
15 |             name='id',
16 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
17 |         ),
18 |         migrations.AlterField(
19 |             model_name='organisation',
20 |             name='id',
21 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
22 |         ),
23 |         migrations.AlterField(
24 |             model_name='user',
25 |             name='id',
26 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
27 |         ),
28 |     ]
29 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/0009_organisation_username_list_updated.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.20 on 2025-04-02 03:32
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("organisations", "0008_alter_collection_id_alter_organisation_id_and_more"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="organisation",
15 |             name="username_list_updated",
16 |             field=models.DateTimeField(auto_now=True),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/organisations/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/organisations/migrations/__init__.py


--------------------------------------------------------------------------------
/extlinks/organisations/models.py:
--------------------------------------------------------------------------------
 1 | from django.contrib.contenttypes.models import ContentType
 2 | from django.db import models
 3 | 
 4 | from extlinks.links.models import LinkEvent, URLPattern
 5 | 
 6 | 
 7 | class User(models.Model):
 8 |     class Meta:
 9 |         app_label = "organisations"
10 | 
11 |     username = models.CharField(max_length=235, unique=True)
12 | 
13 |     def __str__(self):
14 |         return self.username
15 | 
16 | 
17 | class Organisation(models.Model):
18 |     class Meta:
19 |         app_label = "organisations"
20 |         ordering = ["name"]
21 | 
22 |     name = models.CharField(max_length=40)
23 | 
24 |     # programs.Program syntax required to avoid circular import.
25 |     program = models.ManyToManyField("programs.Program", blank=True)
26 | 
27 |     username_list = models.ManyToManyField(User, blank=True)
28 |     # If a URL is placed here, we'll use it to regularly update username_list
29 |     username_list_url = models.URLField(blank=True, null=True)
30 |     username_list_updated = models.DateTimeField(auto_now=True)
31 | 
32 |     def __str__(self):
33 |         return self.name
34 | 
35 |     @property
36 |     def limit_by_user(self):
37 |         return self.username_list.exists()
38 | 
39 | 
40 | class Collection(models.Model):
41 |     class Meta:
42 |         app_label = "organisations"
43 |         ordering = ["name"]
44 | 
45 |     name = models.CharField(max_length=40)
46 | 
47 |     organisation = models.ForeignKey(Organisation, null=True, on_delete=models.SET_NULL)
48 | 
49 |     def __str__(self):
50 |         return self.name
51 | 
52 |     def get_linkevents(self):
53 |         url_patterns = URLPattern.objects.filter(collections__name__contains=self.name)
54 |         url_pattern_type = ContentType.objects.get_for_model(URLPattern)
55 |         return LinkEvent.objects.filter(
56 |             content_type__pk=url_pattern_type.id, object_id__in=url_patterns
57 |         )
58 | 
59 |     def get_url_patterns(self):
60 |         return URLPattern.objects.filter(collections__name__contains=self.name)
61 | 


--------------------------------------------------------------------------------
/extlinks/organisations/templates/organisations/organisation_detail.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 |   <div class="wrapper">
 5 |     <nav id="sidebar">
 6 |       <ul class="list-unstyled components">
 7 |         <p style="font-size: 18px">Collections</p>
 8 |         {% for key, collection in collections.items %}
 9 |           <li>
10 |             <a href="#{{ key }}">{{ collection.object }}</a>
11 |             {% for urlpattern in collection.urls %}
12 |               <div class="sidebar-sub-entry">{{ urlpattern }}</div>
13 |             {% endfor %}
14 |           </li>
15 |         {% endfor %}
16 |       </ul>
17 |     </nav>
18 |     <nav id="content" class="charts-body">
19 |       <div class="row">
20 |         <div class="col-6">
21 |           <h1>{{ object }}</h1>
22 |         </div>
23 |         <div class="col-6">
24 |           <form method="GET" id="graph-date-form">
25 |             <div class="form-row">
26 |               <div class="fieldWrapper" style="padding: 15px;">
27 |                 {{ form.start_date.errors }}
28 |                 {{ form.start_date.label_tag }} {{ form.start_date }}
29 |                 {% if form.start_date.help_text %}
30 |                   <p class="help">{{ form.start_date.help_text|safe }}</p>
31 |                 {% endif %}
32 |               </div>
33 |               <div class="fieldWrapper" style="padding: 15px;">
34 |                 {{ form.end_date.errors }}
35 |                 {{ form.end_date.label_tag }} {{ form.end_date }}
36 |                 {% if form.end_date.help_text %}
37 |                   <p class="help">{{ form.end_date.help_text|safe }}</p>
38 |                 {% endif %}
39 |               </div>
40 |             </div>
41 |             <div class="form-row">
42 |               <p class="help" style="padding-left: 15px;">
43 |                 Note: Date filters always consider full month.
44 |               </p>
45 |             </div>
46 |             <div class="form-row">
47 |               {% if object.limit_by_user %}
48 |                 <div class="fieldWrapper" style="padding: 15px;">
49 |                   {{ form.limit_to_user_list.errors }}
50 |                   {{ form.limit_to_user_list.label_tag }} {{ form.limit_to_user_list }}
51 |                   {% if form.limit_to_user_list.help_text %}
52 |                       <p class="help">{{ form.limit_to_user_list.help_text|safe }}</p>
53 |                   {% endif %}
54 |                 </div>
55 |               {% endif %}
56 |               <div class="col">
57 |                 <button type="submit" class="btn btn-primary" style="margin: 5px;">Submit</button>
58 |               </div>
59 |             </div>
60 |           </form>
61 |         </div>
62 |       </div>
63 |       {% include "organisations/organisation_charts_include.html" %}
64 |     </nav>
65 |   </div>
66 | {% endblock %}
67 | 


--------------------------------------------------------------------------------
/extlinks/organisations/templates/organisations/organisation_list.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 |     <div class="body">
 5 |         <h1>Organisations</h1>
 6 |         <div class="row row-flex">
 7 |             {% for organisation in object_list %}
 8 |                 <div class="card">
 9 |                     <div class="card-body">
10 |                         <h5 class="card-title">{{ organisation }}</h5>
11 |                         {% if organisation.collection_count > 1 %}
12 |                             <h6 class="card-subtitle mb-2 text-muted">Collections: {{ organisation.collection_count }}</h6>
13 |                         {% endif %}
14 |                         <a href="{% url 'organisations:detail' organisation.pk %}" class="btn btn-outline-dark">Overview</a>
15 |                     </div>
16 |                 </div>
17 |             {% empty %}
18 |                 No organisations.
19 |             {% endfor %}
20 |         </div>
21 |     </div>
22 | {% endblock %}
23 | 


--------------------------------------------------------------------------------
/extlinks/organisations/urls.py:
--------------------------------------------------------------------------------
 1 | from django.urls import path
 2 | 
 3 | from extlinks.common.views import CSVPageTotals
 4 | from extlinks.common.urls import urlpatterns as shared_urls
 5 | from .views import (
 6 |     OrganisationDetailView,
 7 |     OrganisationListView,
 8 |     get_editor_count,
 9 |     get_project_count,
10 |     get_links_count,
11 |     get_top_pages,
12 |     get_top_projects,
13 |     get_top_users,
14 |     get_latest_link_events,
15 | )
16 | 
17 | urlpatterns = [
18 |     path("", OrganisationListView.as_view(), name="list"),
19 |     path("<int:pk>", OrganisationDetailView.as_view(), name="detail"),
20 |     path("editor_count/", get_editor_count, name="editor_count"),
21 |     path("project_count/", get_project_count, name="project_count"),
22 |     path("links_count/", get_links_count, name="links_count"),
23 |     path("top_pages/", get_top_pages, name="top_pages"),
24 |     path("top_projects/", get_top_projects, name="top_projects"),
25 |     path("top_users/", get_top_users, name="top_users"),
26 |     path("latest_link_events/", get_latest_link_events, name="latest_link_events"),
27 |     # CSV downloads
28 |     path("<int:pk>/csv/page_totals", CSVPageTotals.as_view(), name="csv_page_totals"),
29 | ]
30 | 
31 | urlpatterns += shared_urls
32 | 


--------------------------------------------------------------------------------
/extlinks/programs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/programs/__init__.py


--------------------------------------------------------------------------------
/extlinks/programs/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | 
 3 | from .models import Program
 4 | 
 5 | 
 6 | class ProgramAdmin(admin.ModelAdmin):
 7 |     list_display = ("name",)
 8 | 
 9 | 
10 | admin.site.register(Program, ProgramAdmin)
11 | 


--------------------------------------------------------------------------------
/extlinks/programs/factories.py:
--------------------------------------------------------------------------------
 1 | import factory
 2 | 
 3 | from .models import Program
 4 | 
 5 | 
 6 | class ProgramFactory(factory.django.DjangoModelFactory):
 7 |     class Meta:
 8 |         model = Program
 9 |         strategy = factory.CREATE_STRATEGY
10 | 
11 |     name = factory.Faker("company")
12 |     description = factory.Faker("text", max_nb_chars=200)
13 | 


--------------------------------------------------------------------------------
/extlinks/programs/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/programs/management/__init__.py


--------------------------------------------------------------------------------
/extlinks/programs/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/programs/management/commands/__init__.py


--------------------------------------------------------------------------------
/extlinks/programs/management/commands/programs_example_data.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from faker import Faker
 3 | 
 4 | from extlinks.common.management.commands import BaseCommand
 5 | 
 6 | from extlinks.links.models import URLPattern
 7 | from extlinks.organisations.models import Organisation, Collection, User
 8 | from extlinks.programs.models import Program
 9 | 
10 | 
11 | class Command(BaseCommand):
12 |     help = "Creates a range of test programs, organisations, and collections"
13 | 
14 |     def add_arguments(self, parser):
15 |         parser.add_argument("num", nargs="+", type=int)
16 | 
17 |     def _handle(self, *args, **options):
18 |         num_programs = options["num"][0]
19 | 
20 |         fake = Faker()
21 | 
22 |         for i in range(num_programs):
23 |             new_program = Program(
24 |                 name="Program {num}".format(num=i),
25 |                 description=fake.text(max_nb_chars=200),
26 |             )
27 |             new_program.save()
28 | 
29 |             for j in range(random.randint(1, 20)):
30 |                 # Will this org limit by user?
31 |                 limit_by_user = random.choice([True, False])
32 | 
33 |                 new_org = Organisation(name=fake.company())
34 |                 new_org.save()
35 |                 if limit_by_user:
36 |                     # Between 10 and 50 users on the list.
37 |                     username_list = [
38 |                         fake.user_name() for _ in range(random.randint(10, 50))
39 |                     ]
40 |                     for username in username_list:
41 |                         user, _ = User.objects.get_or_create(username=username)
42 |                         new_org.username_list.add(user)
43 |                 new_org.program.add(new_program)
44 | 
45 |                 for k in range(random.randint(1, 3)):
46 |                     new_collection = Collection(
47 |                         name=fake.sentence(nb_words=3)[:-1], organisation=new_org
48 |                     )
49 |                     new_collection.save()
50 | 
51 |                     for l in range(random.randint(1, 2)):
52 |                         new_urlpattern = URLPattern(
53 |                             # Strip https:// and /
54 |                             url=fake.url(schemes=["https"])[8:-1],
55 |                         )
56 |                         new_urlpattern.save()
57 |                         new_urlpattern.collections.add(new_collection)
58 |                         new_urlpattern.save()
59 | 


--------------------------------------------------------------------------------
/extlinks/programs/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-05-20 14:01
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     initial = True
 9 | 
10 |     dependencies = []
11 | 
12 |     operations = [
13 |         migrations.CreateModel(
14 |             name="Program",
15 |             fields=[
16 |                 (
17 |                     "id",
18 |                     models.AutoField(
19 |                         auto_created=True,
20 |                         primary_key=True,
21 |                         serialize=False,
22 |                         verbose_name="ID",
23 |                     ),
24 |                 ),
25 |                 ("name", models.CharField(max_length=40)),
26 |                 ("description", models.TextField(blank=True, null=True)),
27 |             ],
28 |         ),
29 |     ]
30 | 


--------------------------------------------------------------------------------
/extlinks/programs/migrations/0002_auto_20190603_1255.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-06-03 12:55
 2 | 
 3 | from django.db import migrations
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ("programs", "0001_initial"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterModelOptions(
14 |             name="program",
15 |             options={"ordering": ["name"]},
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/extlinks/programs/migrations/0003_alter_program_id.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2.7 on 2023-11-07 19:28
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 | 
 8 |     dependencies = [
 9 |         ('programs', '0002_auto_20190603_1255'),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name='program',
15 |             name='id',
16 |             field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'),
17 |         ),
18 |     ]
19 | 


--------------------------------------------------------------------------------
/extlinks/programs/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/extlinks/programs/migrations/__init__.py


--------------------------------------------------------------------------------
/extlinks/programs/models.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | 
 3 | from extlinks.links.models import LinkEvent
 4 | from extlinks.organisations.models import Organisation
 5 | 
 6 | 
 7 | class Program(models.Model):
 8 |     class Meta:
 9 |         app_label = "programs"
10 |         ordering = ["name"]
11 | 
12 |     name = models.CharField(max_length=40)
13 | 
14 |     description = models.TextField(blank=True, null=True)
15 | 
16 |     def __str__(self):
17 |         return self.name
18 | 
19 |     def get_linkevents(self):
20 |         return LinkEvent.objects.filter(
21 |             urlpattern__collection__organisation__program=self
22 |         ).distinct()
23 | 
24 |     @property
25 |     def any_orgs_user_list(self):
26 |         """
27 |         Returns True if any of this program's organisations limit by user
28 |         """
29 |         return Organisation.objects.filter(
30 |             program=self, username_list__isnull=False
31 |         ).exists()
32 | 


--------------------------------------------------------------------------------
/extlinks/programs/templates/programs/program_detail.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 |   <div class="wrapper">
 5 |     <nav id="sidebar">
 6 |       <ul class="list-unstyled components">
 7 |         <p style="font-size: 18px">Organisations</p>
 8 |         {% for organisation in organisations %}
 9 |           <li>
10 |             <a href="{% url 'organisations:detail' pk=organisation.pk %}?{{ query_string }}">{{ organisation }}</a>
11 |           </li>
12 |         {% endfor %}
13 |       </ul>
14 |     </nav>
15 |     <nav id="content" class="charts-body">
16 |       <div class="row">
17 |         <div class="col-6">
18 |           <h1>{{ object }}</h1>
19 |           <p>Organisations: {{ program.organisation_set.count }}</p>
20 |         </div>
21 |         <div class="col-6">
22 |           <form method="GET" id="graph-date-form">
23 |             <div class="form-row">
24 |               <div class="fieldWrapper" style="padding: 15px;">
25 |                 {{ form.start_date.errors }}
26 |                 {{ form.start_date.label_tag }} {{ form.start_date }}
27 |                 {% if form.start_date.help_text %}
28 |                   <p class="help">{{ form.start_date.help_text|safe }}</p>
29 |                 {% endif %}
30 |               </div>
31 |               <div class="fieldWrapper" style="padding: 15px;">
32 |                 {{ form.end_date.errors }}
33 |                 {{ form.end_date.label_tag }} {{ form.end_date }}
34 |                 {% if form.end_date.help_text %}
35 |                   <p class="help">{{ form.end_date.help_text|safe }}</p>
36 |                 {% endif %}
37 |               </div>
38 |             </div>
39 |             <div class="form-row">
40 |               <p class="help" style="padding-left: 15px;">
41 |                 Note: Date filters always consider full month.
42 |               </p>
43 |             </div>
44 |             <div class="form-row">
45 |               {% if program.any_orgs_user_list %}
46 |                 <div class="fieldWrapper" style="padding: 15px;">
47 |                   {{ form.limit_to_user_list.errors }}
48 |                   {{ form.limit_to_user_list.label_tag }} {{ form.limit_to_user_list }}
49 |                   {% if form.limit_to_user_list.help_text %}
50 |                     <p class="help">{{ form.limit_to_user_list.help_text|safe }}</p>
51 |                   {% endif %}
52 |                 </div>
53 |               {% endif %}
54 |               <div class="col">
55 |                   <button type="submit" class="btn btn-primary" style="margin: 5px;">Submit</button>
56 |               </div>
57 |             </div>
58 |           </form>
59 |         </div>
60 |       </div>
61 |       {% include "programs/program_charts_include.html" %}
62 |     </nav>
63 |   </div>
64 | {% endblock %}
65 | 


--------------------------------------------------------------------------------
/extlinks/programs/templates/programs/program_list.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 |     <div class="body">
 5 |         <h1>Programs</h1>
 6 |         <div class="row row-flex">
 7 |             {% for program in object_list %}
 8 |                 <div class="card">
 9 |                     <div class="card-body">
10 |                         <h5 class="card-title">
11 |                             <a href="{% url 'programs:detail' program.pk %}" style="color: #000000">{{ program }}</a>
12 |                         </h5>
13 |                         <h6 class="card-subtitle mb-2 text-muted">Organisations: {{ program.organisation_count }}</h6>
14 |                         <p class="card-text">{{ program.description }}</p>
15 |                         <a href="{% url 'programs:detail' program.pk %}" class="btn btn-outline-dark">Overview</a>
16 |                     </div>
17 |                 </div>
18 |             {% empty %}
19 |                 No programs.
20 |             {% endfor %}
21 |         </div>
22 |     </div>
23 | {% endblock %}
24 | 


--------------------------------------------------------------------------------
/extlinks/programs/urls.py:
--------------------------------------------------------------------------------
 1 | from django.urls import path
 2 | 
 3 | from extlinks.common.views import CSVOrgTotals
 4 | from extlinks.common.urls import urlpatterns as shared_urls
 5 | from .views import (
 6 |     ProgramListView,
 7 |     ProgramDetailView,
 8 |     get_editor_count,
 9 |     get_project_count,
10 |     get_links_count,
11 |     get_top_organisations,
12 |     get_top_projects,
13 |     get_top_users,
14 | )
15 | 
16 | urlpatterns = [
17 |     path("", ProgramListView.as_view(), name="list"),
18 |     path("<int:pk>", ProgramDetailView.as_view(), name="detail"),
19 |     path("editor_count/", get_editor_count, name="editor_count"),
20 |     path("project_count/", get_project_count, name="project_count"),
21 |     path("links_count/", get_links_count, name="links_count"),
22 |     path("top_organisations/", get_top_organisations, name="top_organisations"),
23 |     path("top_projects/", get_top_projects, name="top_projects"),
24 |     path("top_users/", get_top_users, name="top_users"),
25 |     # CSV downloads
26 |     path("<int:pk>/csv/org_totals", CSVOrgTotals.as_view(), name="csv_org_totals"),
27 | ]
28 | 
29 | urlpatterns += shared_urls
30 | 


--------------------------------------------------------------------------------
/extlinks/settings/base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for extlinks project.
  3 | """
  4 | 
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | 
  9 | SECRET_KEY = os.environ["SECRET_KEY"]
 10 | # Usually we'd define this relative to the settings file, but we're always
 11 | # starting from /app in Docker.
 12 | BASE_DIR = "/app"
 13 | 
 14 | ALLOWED_HOSTS = ["127.0.0.1", "localhost", "0.0.0.0"]
 15 | 
 16 | # Application definition
 17 | 
 18 | INSTALLED_APPS = [
 19 |     "django.contrib.admin",
 20 |     "django.contrib.auth",
 21 |     "django.contrib.contenttypes",
 22 |     "django.contrib.sessions",
 23 |     "django.contrib.messages",
 24 |     "django.contrib.staticfiles",
 25 |     "extlinks.common",
 26 |     "extlinks.healthcheck",
 27 |     "extlinks.links",
 28 |     "extlinks.organisations",
 29 |     "extlinks.programs",
 30 |     "extlinks.aggregates",
 31 |     "django_extensions",
 32 | ]
 33 | 
 34 | MIDDLEWARE = [
 35 |     "django.middleware.security.SecurityMiddleware",
 36 |     "django.contrib.sessions.middleware.SessionMiddleware",
 37 |     "django.middleware.common.CommonMiddleware",
 38 |     "django.middleware.csrf.CsrfViewMiddleware",
 39 |     "django.contrib.auth.middleware.AuthenticationMiddleware",
 40 |     "django.contrib.messages.middleware.MessageMiddleware",
 41 |     "django.middleware.clickjacking.XFrameOptionsMiddleware",
 42 | ]
 43 | 
 44 | ROOT_URLCONF = "extlinks.urls"
 45 | 
 46 | TEMPLATES = [
 47 |     {
 48 |         "BACKEND": "django.template.backends.django.DjangoTemplates",
 49 |         "DIRS": [os.path.join(BASE_DIR, "extlinks", "templates")],
 50 |         "APP_DIRS": True,
 51 |         "OPTIONS": {
 52 |             "context_processors": [
 53 |                 "django.template.context_processors.debug",
 54 |                 "django.template.context_processors.request",
 55 |                 "django.contrib.auth.context_processors.auth",
 56 |                 "django.contrib.messages.context_processors.messages",
 57 |             ],
 58 |         },
 59 |     },
 60 | ]
 61 | 
 62 | WSGI_APPLICATION = "extlinks.wsgi.application"
 63 | 
 64 | # Database
 65 | # https://docs.djangoproject.com/en/4.2/ref/settings/#databases
 66 | 
 67 | DATABASES = {
 68 |     "default": {
 69 |         "ENGINE": "django.db.backends.mysql",
 70 |         "NAME": os.environ["MYSQL_DATABASE"],
 71 |         "USER": "root",
 72 |         "PASSWORD": os.environ["MYSQL_ROOT_PASSWORD"],
 73 |         "HOST": "db",
 74 |         "PORT": "3306",
 75 |         "OPTIONS": {"charset": "utf8mb4"},
 76 |         "CONN_MAX_AGE": None,
 77 |         "CONN_HEALTH_CHECKS": True,
 78 |     }
 79 | }
 80 | 
 81 | # Password validation
 82 | # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
 83 | 
 84 | AUTH_PASSWORD_VALIDATORS = [
 85 |     {
 86 |         "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
 87 |     },
 88 |     {
 89 |         "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
 90 |     },
 91 |     {
 92 |         "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
 93 |     },
 94 |     {
 95 |         "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
 96 |     },
 97 |     {
 98 |         "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
 99 |     },
100 |     {
101 |         "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
102 |     },
103 |     {
104 |         "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
105 |     },
106 | ]
107 | 
108 | # Internationalization
109 | # https://docs.djangoproject.com/en/4.2/topics/i18n/
110 | 
111 | LANGUAGE_CODE = "en-us"
112 | 
113 | TIME_ZONE = "UTC"
114 | 
115 | USE_I18N = True
116 | 
117 | USE_L10N = True
118 | 
119 | USE_TZ = True
120 | 
121 | # Cache
122 | 
123 | CACHES = {
124 |     "default": {
125 |         "BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
126 |         "LOCATION": "cache:11211",
127 |         "TIMEOUT": 600,
128 |         "OPTIONS": {
129 |             "no_delay": True,
130 |             "ignore_exc": True,
131 |             "max_pool_size": 4,
132 |             "use_pooling": True,
133 |         },
134 |     }
135 | }
136 | 
137 | # Static files (CSS, JavaScript, Images)
138 | # https://docs.djangoproject.com/en/4.2/howto/static-files/
139 | 
140 | STATIC_URL = "/static/"
141 | STATIC_ROOT = os.path.join(BASE_DIR, "static")
142 | 
143 | # EMAIL CONFIGURATION
144 | # ------------------------------------------------------------------------------
145 | EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"
146 | EMAIL_HOST = os.environ.get("DJANGO_EMAIL_HOST", "localhost")
147 | EMAIL_PORT = 25
148 | EMAIL_HOST_USER = ""
149 | EMAIL_HOST_PASSWORD = ""
150 | EMAIL_USE_TLS = False
151 | 
152 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
153 | 


--------------------------------------------------------------------------------
/extlinks/settings/helpers.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Any
 3 | 
 4 | 
 5 | def sentry_before_send(event: dict, hint: dict):
 6 |     """
 7 |     Callback for sentry's client-side event filtering.
 8 |     We're using it to mask sensitive data.
 9 |     https://docs.sentry.io/platforms/python/configuration/filtering/#filtering-error-events
10 |     Parameters
11 |     ----------
12 |     event : dict
13 |         Sentry event dictionary object
14 |     hint : dict
15 |         Source data dictionary used to create the event.
16 |         https://docs.sentry.io/platforms/python/configuration/filtering/#using-hints
17 |     Returns
18 |     -------
19 |     dict
20 |         The modified event.
21 |     """
22 |     # We catch any exception, because if we don't, the event is dropped.
23 |     # We want to keep passing them on so we can continually improve our scrubbing
24 |     # while still sending events.
25 |     # noinspection PyBroadException
26 |     try:
27 |         event = _scrub_event(event)
28 |     except:
29 |         pass
30 | 
31 |     return event
32 | 
33 | 
34 | def _mask_pattern(dirty: str):
35 |     """
36 |     Masks out known sensitive data from string.
37 |     Parameters
38 |     ----------
39 |     dirty : str
40 |         Input that may contain sensitive information.
41 |     Returns
42 |     -------
43 |     str
44 |         Output with any known sensitive information masked out.
45 |     """
46 |     # DB credentials as found in called processes.
47 |     call_proc_db_creds = re.compile(r"--(user|password)=[^', ]+([', ])")
48 |     clean = call_proc_db_creds.sub(r"--\1=*****\2", dirty)
49 | 
50 |     return clean
51 | 
52 | 
53 | def _scrub_event(event_data: Any):
54 |     """
55 |     Recursively traverses sentry event data returns a scrubbed version.
56 |     Parameters
57 |     ----------
58 |     event_data : Any
59 |         Input that may contain sensitive information.
60 |     Returns
61 |     -------
62 |     Any
63 |         Output with any known sensitive information masked out.
64 |     """
65 |     # Basically cribbed from stackoverflow:
66 |     # https://stackoverflow.com/a/38970181
67 |     # Get dictionary items
68 |     if isinstance(event_data, dict):
69 |         items = event_data.items()
70 |     # Enumerate list/tuple items
71 |     elif isinstance(event_data, (list, tuple)):
72 |         items = enumerate(event_data)
73 |     # Mask sensitive patterns from stringlike elements
74 |     else:
75 |         return _mask_pattern(str(event_data))
76 | 
77 |     for key, value in items:
78 |         # When we can id sensitive data by the key, do a simple replacement.
79 |         if key == "user" or key == "password" or key == "passwd":
80 |             event_data[key] = "*****"
81 |         # Otherwise, continue recursion.
82 |         else:
83 |             event_data[key] = _scrub_event(value)
84 | 
85 |     return event_data
86 | 


--------------------------------------------------------------------------------
/extlinks/settings/local.py:
--------------------------------------------------------------------------------
 1 | from .base import *
 2 | from .logging import *
 3 | from os import getenv
 4 | import sys
 5 | 
 6 | DEBUG = True
 7 | 
 8 | SERVER_EMAIL = "Wikilink Local <wikilink.local@localhost.localdomain>"
 9 | DEFAULT_FROM_EMAIL = SERVER_EMAIL
10 | 
11 | 
12 | # Django Debug Toolbar config
13 | # ------------------------------------------------------------------------------
14 | 
15 | # Sometimes, developers do not want the debug toolbar on their local environments,
16 | # so we can disable it by not passing a REQUIREMENTS_FILE variable when building
17 | # the docker containers
18 | reqs = getenv("REQUIREMENTS_FILE", "django.txt")
19 | if reqs == "local.txt":
20 |     TESTING = "test" in sys.argv
21 |     if not TESTING:
22 |         INSTALLED_APPS += [
23 |             "debug_toolbar",
24 |         ]
25 | 
26 |         MIDDLEWARE += [
27 |             "debug_toolbar.middleware.DebugToolbarMiddleware",
28 |         ]
29 | 
30 |     INTERNAL_IPS = ["127.0.0.1", "localhost", "0.0.0.0"]
31 | 
32 |     def show_toolbar(request):
33 |         return True
34 | 
35 |     DEBUG_TOOLBAR_CONFIG = {
36 |         "SHOW_TOOLBAR_CALLBACK": show_toolbar,
37 |     }
38 |     # Dummy Cache
39 |     CACHES = {
40 |         "default": {
41 |             "BACKEND": "django.core.cache.backends.dummy.DummyCache",
42 |         }
43 |     }
44 | 


--------------------------------------------------------------------------------
/extlinks/settings/logging.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging.config
 3 | 
 4 | # LOGGING CONFIGURATION
 5 | # ------------------------------------------------------------------------------
 6 | # We're replacing the default logging config to get better control of the
 7 | # mail_admins behavior.
 8 | # Logging is in another file since Django 3.1 because of https://code.djangoproject.com/ticket/32016
 9 | 
10 | LOGGING_CONFIG = None
11 | 
12 | logging.config.dictConfig(
13 |     {
14 |         "version": 1,
15 |         "disable_existing_loggers": False,
16 |         "filters": {
17 |             "require_debug_false": {"()": "django.utils.log.RequireDebugFalse"},
18 |             "require_debug_true": {"()": "django.utils.log.RequireDebugTrue"},
19 |         },
20 |         "formatters": {
21 |             "django.server": {
22 |                 "()": "django.utils.log.ServerFormatter",
23 |                 "format": "[%(server_time)s] %(message)s",
24 |             }
25 |         },
26 |         "handlers": {
27 |             "nodebug_console": {
28 |                 "level": "WARNING",
29 |                 "filters": ["require_debug_false"],
30 |                 "class": "logging.StreamHandler",
31 |             },
32 |             "debug_console": {
33 |                 "level": "INFO",
34 |                 "filters": ["require_debug_true"],
35 |                 "class": "logging.StreamHandler",
36 |             },
37 |             "django.server": {
38 |                 "level": "INFO",
39 |                 "class": "logging.StreamHandler",
40 |                 "formatter": "django.server",
41 |             },
42 |         },
43 |         "loggers": {
44 |             "django": {
45 |                 "handlers": ["nodebug_console", "debug_console"],
46 |                 "level": os.environ.get("DJANGO_LOG_LEVEL", "INFO"),
47 |             },
48 |             "django.server": {
49 |                 "handlers": ["django.server"],
50 |                 "level": os.environ.get("DJANGO_LOG_LEVEL", "INFO"),
51 |                 "propagate": False,
52 |             },
53 |             "Wikilink": {
54 |                 "handlers": ["nodebug_console", "debug_console"],
55 |                 "level": os.environ.get("DJANGO_LOG_LEVEL", "INFO"),
56 |             },
57 |         },
58 |     }
59 | )
60 | 


--------------------------------------------------------------------------------
/extlinks/settings/production.py:
--------------------------------------------------------------------------------
 1 | import sentry_sdk
 2 | from sentry_sdk.integrations.django import DjangoIntegration
 3 | from extlinks.settings.helpers import sentry_before_send
 4 | 
 5 | from .base import *
 6 | from .logging import *
 7 | 
 8 | DEBUG = False
 9 | 
10 | ALLOWED_HOSTS = ["wikilink.wmflabs.org"]
11 | 
12 | # Redirect HTTP to HTTPS
13 | # SECURE_PROXY_SSL_HEADER is required because we're behind a proxy
14 | SECURE_SSL_REDIRECT = True
15 | SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
16 | 
17 | DEFAULT_FROM_EMAIL = "Wikilink Production <noreply@wikilink.wmflabs.org>"
18 | 
19 | sentry_sdk.init(
20 |     dsn="https://cdabef0803434e3c97cb2c15f9a7da37@glitchtip-wikilink.wmflabs.org/1",
21 |     integrations=[DjangoIntegration()],
22 |     before_send=sentry_before_send,
23 | )
24 | 


--------------------------------------------------------------------------------
/extlinks/templates/base.html:
--------------------------------------------------------------------------------
 1 | {% load static %}
 2 | 
 3 | <!DOCTYPE html>
 4 | <html lang="en">
 5 | <head>
 6 |     <title>
 7 |         Wikimedia External Links Tool
 8 |     </title>
 9 | 
10 |     <link rel="icon" type="image/png" href="{% static "favicon.ico" %}">
11 |     <link rel="stylesheet" href="https://tools-static.wmflabs.org/cdnjs/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
12 |     <link rel="stylesheet" href="https://tools-static.wmflabs.org/cdnjs/ajax/libs/Chart.js/2.8.0/Chart.css" />
13 |     <link rel="stylesheet" href="{% static "css/local.css" %}" type="text/css">
14 | </head>
15 | <body>
16 | 
17 | <nav class="navbar navbar-expand-lg navbar-light shadow-sm">
18 |     <a class="navbar-brand" href="{% url 'homepage' %}">Wikilink</a>
19 |     <div class="navbar-nav">
20 |         <a class="nav-item nav-link" href="{% url 'programs:list' %}">Programs</a>
21 |         <a class="nav-item nav-link" href="{% url 'organisations:list' %}">Organisations</a>
22 |         <a class="nav-item nav-link" href="{% url 'documentation' %}">What is this?</a>
23 |     </div>
24 | </nav>
25 | 
26 | <script type="text/javascript" src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
27 | <script type="text/javascript" src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/Chart.js/2.8.0/Chart.js"></script>
28 | <script type="text/javascript" src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
29 | <script type="text/javascript" src="https://tools-static.wmflabs.org/cdnjs/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
30 | <div class="main-content">
31 |     {% block content %}
32 |     {% endblock %}
33 | </div>
34 | 
35 | <hr/>
36 | <div class="footer">
37 |     A <a href="https://meta.wikimedia.org/wiki/The_Wikipedia_Library">Wikipedia Library</a> project -
38 |     <a href="https://github.com/WikipediaLibrary/externallinks">Github</a> -
39 |     <a href="https://phabricator.wikimedia.org/project/board/4082/">Phabricator</a> -
40 |     <a href="https://meta.wikimedia.org/wiki/Wikilink_tool">Meta</a>
41 | </div>
42 | 
43 | </body>
44 | </html>
45 | 


--------------------------------------------------------------------------------
/extlinks/templates/documentation.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 | <div class="body">
 5 |     <h1>Wikilink</h1>
 6 |     <p>
 7 |         The Wikilink tool helps program organisers and organisations track external links on Wikimedia projects. While
 8 |         MediaWiki has the ability to <a href="https://en.wikipedia.org/wiki/Special:LinkSearch">search existing
 9 |         links</a>, at the time of writing there is no way to easily monitor link additions and removals over time. The
10 |         tool was built primarily for The Wikipedia Library's use case. Publishers donate access to Wikipedia editors,
11 |         and while it was possible to monitor the total number of links over time, there was no simple way to investigate
12 |         that data further - to find out where links were being added, who was adding them, or in the case of a drop
13 |         in link numbers, why those links were removed.
14 |     </p>
15 |     <hr style="width:30%">
16 |     <h2>Using the tool</h2>
17 |     <p>
18 |         There are two primary views into the data - the 'program' level and 'organisation' level.
19 |     </p>
20 |     <h3>Programs</h3>
21 |     <p>
22 |         Programs are collections of organisations. Program pages provide a high level overview of the link additions
23 |         and removals for many organisations in one place. If you have partnerships with multiple organisations,
24 |         the program pages can provide data about their data in aggregate for reporting purposes.
25 |     </p>
26 |     <h3>Organisations</h3>
27 |     <p>
28 |         Organisation pages provide data relevant to an individual organisation. Organisations can have multiple
29 |         collections of tracked URLs - these could be different websites or simply different URL patterns. Results
30 |         for each collection are presented individually. Additionally, each collection can have multiple URLs. This is
31 |         useful primarily in the case that a website has moved; both URLs can continue to be tracked in the same place.
32 |     </p>
33 |     <hr style="width:30%">
34 |     <h2>Data collection</h2>
35 |     <p>
36 |         Two sets of data are collected: Link events and totals
37 |     </p>
38 |     <h2>Link events</h2>
39 |     <p>
40 |         A <a href="https://github.com/Samwalton9/externallinks/blob/master/extlinks/links/management/commands/linkevents_collect.py">
41 |         script</a> is always monitoring the
42 |         <a href="https://stream.wikimedia.org/?doc#!/Streams/get_v2_stream_page_links_change">page-links-change</a>
43 |         event stream; when a link tracked by Wikilink is added or removed, the data is stored in Wikilink's database.
44 |     </p>
45 |     <p>
46 |         The event stream reports link additions and removals from all Wikimedia projects and languages, and tracks
47 |         events from all namespaces. If a link is changed, it will register both an addition (the new URL) and a removal
48 |         (the old URL). Editing the same URL multiple times in one edit will only send a single event.
49 |     </p>
50 |     <p>
51 |         Please be aware there is currently <a href="https://phabricator.wikimedia.org/T216504">a known bug</a> with the
52 |         event stream whereby some additional events are being sent related to template transclusions.
53 |     </p>
54 |     <h2>Link totals</h2>
55 |     <p>
56 |         The tool also tracks the total number of links to each tracked URL on a weekly basis. These totals are
57 |         retrieved from the <a href="https://www.mediawiki.org/wiki/Manual:Externallinks_table">externallinks</a> table.
58 |         Currently, these totals only consider Wikipedia projects, however they do cover every language. Unlike with the
59 |         event stream, queries have to be made against each project's database individually, and it is therefore
60 |         prohibitive to collect total data for every Wikimedia project.
61 |     </p>
62 | </div>
63 | {% endblock %}
64 | 


--------------------------------------------------------------------------------
/extlinks/templates/homepage.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 | <div class="body">
 5 |     <div class="container-fluid">
 6 |         <div class="row justify-content-center">
 7 |             <div class="intro">
 8 |                 Wikilink
 9 |             </div>
10 |         </div>
11 |     </div>
12 | 
13 |     <p>
14 |         The Wikilink tool collates and presents data on the addition, removal, and total number of links to websites on
15 |         Wikimedia projects. It tracks the
16 |         <a href="https://stream.wikimedia.org/?doc#!/Streams/get_v2_stream_page_links_change">page-links-change</a>
17 |         event stream for link additions and removals for specified URL Patterns, and queries the
18 |         <a href="https://www.mediawiki.org/wiki/Manual:Externallinks_table">externallinks table</a> to retrieve totals.
19 |     </p>
20 |     <p>
21 |         The tool currently only supports <a href="https://meta.wikimedia.org/wiki/The_Wikipedia_Library">The Wikipedia
22 |         Library</a> program, however support for other programs and partnerships is planned.
23 |     </p>
24 | </div>
25 | 
26 | {% endblock %}
27 | 


--------------------------------------------------------------------------------
/extlinks/tests.py:
--------------------------------------------------------------------------------
 1 | from django.test import TestCase, RequestFactory
 2 | from django.urls import reverse
 3 | 
 4 | from .views import Homepage
 5 | 
 6 | 
 7 | class HomepageTest(TestCase):
 8 |     def test_homepage_view(self):
 9 |         """
10 |         Can we simply load the homepage successfully?
11 |         """
12 |         factory = RequestFactory()
13 | 
14 |         request = factory.get(reverse("homepage"))
15 |         response = Homepage.as_view()(request)
16 | 
17 |         self.assertEqual(response.status_code, 200)
18 | 


--------------------------------------------------------------------------------
/extlinks/urls.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from django.contrib import admin
 3 | from django.urls import include, path
 4 | from django.conf import settings
 5 | 
 6 | from extlinks.healthcheck.urls import urlpatterns as healthcheck_urls
 7 | from extlinks.programs.urls import urlpatterns as programs_urls
 8 | from extlinks.organisations.urls import urlpatterns as organisations_urls
 9 | 
10 | from .views import Homepage, Documentation
11 | 
12 | urlpatterns = [
13 |     path("admin/", admin.site.urls),
14 |     path("", Homepage.as_view(), name="homepage"),
15 |     path("docs", Documentation.as_view(), name="documentation"),
16 |     path(
17 |         "healthcheck/",
18 |         include((healthcheck_urls, "healthcheck"), namespace="healthcheck"),
19 |     ),
20 |     path("programs/", include((programs_urls, "programs"), namespace="programs")),
21 |     path(
22 |         "organisations/",
23 |         include((organisations_urls, "organisations"), namespace="organisations"),
24 |     ),
25 | ]
26 | 
27 | reqs = getenv("REQUIREMENTS_FILE", "django.txt")
28 | if settings.DEBUG and reqs == "local.txt":
29 |     if not settings.TESTING:
30 |         import debug_toolbar
31 | 
32 |         urlpatterns += [
33 |             path("__debug__/", include(debug_toolbar.urls)),
34 |         ]
35 | 


--------------------------------------------------------------------------------
/extlinks/views.py:
--------------------------------------------------------------------------------
 1 | from django.views.generic import TemplateView
 2 | 
 3 | 
 4 | class Homepage(TemplateView):
 5 |     template_name = "homepage.html"
 6 | 
 7 | 
 8 | class Documentation(TemplateView):
 9 |     template_name = "documentation.html"
10 | 


--------------------------------------------------------------------------------
/extlinks/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for extlinks project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | """
 6 | 
 7 | import os
 8 | 
 9 | from django.core.wsgi import get_wsgi_application
10 | 
11 | application = get_wsgi_application()
12 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | from dotenv import load_dotenv
 6 | 
 7 | 
 8 | def main():
 9 |     try:
10 |         from django.core.management import execute_from_command_line
11 |     except ImportError as exc:
12 |         raise ImportError(
13 |             "Couldn't import Django. Are you sure it's installed and "
14 |             "available on your PYTHONPATH environment variable? Did you "
15 |             "forget to activate a virtual environment?"
16 |         ) from exc
17 |     load_dotenv(".env")
18 |     execute_from_command_line(sys.argv)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/nginx.conf:
--------------------------------------------------------------------------------
  1 | map $http_x_forwarded_proto $web_proxy_scheme {
  2 |   default $scheme;
  3 |   https https;
  4 | }
  5 | 
  6 | map $http_user_agent $limit_bots {
  7 |   default "";
  8 |   ~*(GoogleBot|bingbot|YandexBot|mj12bot|Apache-HttpClient|Adsbot|Barkrowler|FacebookBot|dotbot|Googlebot|Bytespider|SemrushBot|AhrefsBot|Amazonbot|GPTBot|DotBot) $binary_remote_addr;
  9 | }
 10 | 
 11 | ## Testing the request method
 12 | # Only GET and HEAD are caching safe.
 13 | map $request_method $no_cache_method {
 14 |   default 1;
 15 |   HEAD 0;
 16 |   GET 0;
 17 | }
 18 | 
 19 | ## Testing for Cache-Control header
 20 | # Only checking for no-cache because chrome annoyingly sets max-age=0 when hitting enter in the address bar.
 21 | map $http_cache_control $no_cache_control {
 22 |   default 0;
 23 |   no-cache 1;
 24 | }
 25 | 
 26 | ## Testing for the session cookie being present
 27 | map $http_cookie $no_cache_session {
 28 |   default 0;
 29 |   ~sessionid 1; # Django session cookie
 30 | }
 31 | 
 32 | ## proxy caching settings.
 33 | proxy_cache_path /var/lib/nginx/cache levels=1:2 keys_zone=cache:8m max_size=10g inactive=10m;
 34 | proxy_cache_key "$scheme$proxy_host$uri$is_args$args$http_accept_language";
 35 | proxy_cache_lock on;
 36 | proxy_cache_use_stale error timeout invalid_header updating http_500 http_502 http_503 http_504;
 37 | 
 38 | # remote address is a joke here since we don't have x-forwarded-for
 39 | limit_req_zone $limit_bots zone=bots:10m rate=1r/s;
 40 | limit_req_zone $binary_remote_addr zone=one:10m rate=500r/s;
 41 | 
 42 | upstream django_server {
 43 |  server externallinks:8000 fail_timeout=0;
 44 | }
 45 | 
 46 | server {
 47 |   listen 80 deferred;
 48 |   client_max_body_size 4G;
 49 |   server_name wikilink.wmflabs.org;
 50 |   keepalive_timeout 5;
 51 | 
 52 |   # Definied explicitly to avoid caching
 53 |   location /healthcheck/link_event {
 54 |     # Rate limit
 55 |     limit_req zone=bots burst=2 nodelay;
 56 |     limit_req zone=one burst=1000 nodelay;
 57 |     limit_req_status 429;
 58 |     # Proxy
 59 |     proxy_set_header X-Forwarded-Proto $web_proxy_scheme;
 60 |     proxy_set_header Host $http_host;
 61 |     proxy_redirect off;
 62 |     proxy_pass http://django_server;
 63 |   }
 64 | 
 65 |   location = /robots.txt {
 66 |     add_header Content-Type text/plain;
 67 |     alias /app/robots.txt;
 68 |   }
 69 | 
 70 |   location / {
 71 |     root /app/;
 72 |     expires 30d;
 73 | 
 74 |     if ($http_user_agent ~* (GoogleBot|bingbot|YandexBot|mj12bot|Apache-HttpClient|Adsbot|Barkrowler|FacebookBot|dotbot|Bytespider|SemrushBot|AhrefsBot|Amazonbot|GPTBot) ) {
 75 |         return 403;
 76 |     }
 77 |     location /admin/links/ {
 78 |         try_files $uri @django-admin-slow;
 79 |     }
 80 |     # checks for static file, if not found proxy to app
 81 |     try_files $uri @django;
 82 |   }
 83 |   location @django {
 84 |     # Cache
 85 |     proxy_cache_valid 200 301 302 401 403 404 1d;
 86 |     proxy_cache_bypass $http_pragma $no_cache_method $no_cache_control $no_cache_session;
 87 |     proxy_cache_revalidate on;
 88 |     proxy_cache cache;
 89 |     add_header X-Cache-Status $upstream_cache_status;
 90 |     # Rate limit
 91 |     limit_req zone=bots burst=2 nodelay;
 92 |     limit_req zone=one burst=1000 nodelay;
 93 |     limit_req_status 429;
 94 |     # Proxy
 95 |     proxy_set_header X-Forwarded-Proto $web_proxy_scheme;
 96 |     proxy_set_header Host $http_host;
 97 |     proxy_redirect off;
 98 |     proxy_pass http://django_server;
 99 |   }
100 |   location @django-admin-slow {
101 |     # https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_send_timeout
102 |     proxy_connect_timeout 120s;
103 |     proxy_send_timeout 120s;
104 |     proxy_read_timeout 120s;
105 |     # https://nginx.org/en/docs/http/ngx_http_core_module.html#send_timeout
106 |     send_timeout 120s;
107 |     keepalive_timeout 120s;
108 |     # Cache
109 |     proxy_cache_valid 200 301 302 401 403 404 1d;
110 |     proxy_cache_bypass $http_pragma $no_cache_method $no_cache_control $no_cache_session;
111 |     proxy_cache_revalidate on;
112 |     proxy_cache cache;
113 |     add_header X-Cache-Status $upstream_cache_status;
114 |     # Rate limit
115 |     limit_req zone=bots burst=2 nodelay;
116 |     limit_req zone=one burst=1000 nodelay;
117 |     limit_req_status 429;
118 |     # Proxy
119 |     proxy_set_header X-Forwarded-Proto $web_proxy_scheme;
120 |     proxy_set_header Host $http_host;
121 |     proxy_redirect off;
122 |     proxy_pass http://django_server;
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/requirements/django.txt:
--------------------------------------------------------------------------------
 1 | Django>=4.2.7,<5.0.0
 2 | django_extensions==3.2.3
 3 | factory_boy>=3.0.1
 4 | faker==26.0.0
 5 | filelock==3.18.0
 6 | mysqlclient==2.2.4
 7 | pymemcache==4.0.0
 8 | python-dotenv==1.1.0
 9 | sentry-sdk==2.10.0
10 | sseclient==0.0.27
11 | time_machine==2.14.2
12 | python-swiftclient>=4.6.0,<5.0.0
13 | keystoneauth1>=5.9.2,<6.0.0
14 | coverage==7.8.0
15 | 


--------------------------------------------------------------------------------
/requirements/local.txt:
--------------------------------------------------------------------------------
1 | -r django.txt
2 | django-debug-toolbar==4.4.6
3 | pudb==2024.1.3
4 | 


--------------------------------------------------------------------------------
/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /
3 | 


--------------------------------------------------------------------------------
/static/css/local.css:
--------------------------------------------------------------------------------
  1 | h1 {
  2 |     margin-bottom: 20px;
  3 | }
  4 | 
  5 | hr {
  6 |     margin-top: 2%;
  7 |     width: 70%;
  8 | }
  9 | 
 10 | .body {
 11 |     padding-left: 10%;
 12 |     padding-right: 10%;
 13 |     padding-top: 2%;
 14 | }
 15 | 
 16 | .charts-body {
 17 |     padding: 2%;
 18 |     width: 100%;
 19 | 
 20 | }
 21 | 
 22 | .navbar {
 23 |     background-color: #fbfbfb;
 24 | }
 25 | 
 26 | .navbar-light .navbar-nav .nav-link {
 27 |     color: rgba(0, 0, 0, 0.7);
 28 | }
 29 | 
 30 | .intro {
 31 |     font-size:30pt;
 32 |     padding-bottom:25px;
 33 | }
 34 | 
 35 | .footer {
 36 |     font-size: 14px;
 37 |     text-align: center;
 38 |     padding-bottom:5%;
 39 | }
 40 | 
 41 | .wrapper {
 42 |     display: flex;
 43 |     width: 100%;
 44 |     align-items: stretch;
 45 | }
 46 | 
 47 | .stat-box {
 48 |     border-style: solid;
 49 |     border-width: 2px;
 50 |     border-top-width: 0;
 51 |     border-bottom-width: 0;
 52 |     border-color: rgba(197, 197, 197);
 53 |     border-radius: 25px;
 54 |     padding: 25px 30px;
 55 | }
 56 | 
 57 | .stat-table-header {
 58 |     border-bottom-style: solid;
 59 |     border-bottom-color: lightgrey;
 60 |     border-bottom-width: 1px;
 61 | }
 62 | 
 63 | .tr {
 64 |     line-height: 25px;
 65 | }
 66 | 
 67 | .tabcontent {
 68 |     display: none;
 69 | }
 70 | 
 71 | #sidebar {
 72 |     min-width: 250px;
 73 |     max-width: 250px;
 74 |     min-height: 100vh;
 75 |     padding: 20px;
 76 |     padding-top: 40px;
 77 |     background-color: #fbfbfb;
 78 | }
 79 | 
 80 | .sidebar-sub-entry {
 81 |     text-indent: 2em;
 82 |     font-style: italic;
 83 |     font-size: 14px;
 84 | }
 85 | 
 86 | .card {
 87 |     margin: 1%;
 88 |     width: 20em;
 89 | }
 90 | 
 91 | .tablinks {
 92 |     border: none;
 93 |     min-height: 50px;
 94 |     margin: 0;
 95 |     float: left;
 96 |     background-color: #f2f2f4;
 97 |     transition: 0.2s;
 98 |     border-top-style: solid;
 99 |     border-top-color: #cdcdcd;
100 |     border-top-width: 2px;
101 | }
102 | 
103 | .tab {
104 |     border-bottom-color: #ebebeb;
105 |     border-bottom-style: solid;
106 |     border-bottom-width: 2px;
107 |     padding-left: 0;
108 |     padding-right: 30px;
109 |     margin-bottom: 10px;
110 | }
111 | 
112 | .tab button:hover {
113 |     background-color: #e5e5e5;
114 | }
115 | 
116 | .tab button:focus {
117 |     outline: 0;
118 | }
119 | 
120 | .active {
121 |     background-color: #d6d6d6;
122 |     border-top-style: solid;
123 |     border-top-color: #9a9a9a;
124 |     border-top-width: 4px;
125 | }
126 | 


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WikipediaLibrary/externallinks/f86118b12123616b6c3f1462f7397e1caf6b68c3/static/favicon.ico


--------------------------------------------------------------------------------
/template.env:
--------------------------------------------------------------------------------
 1 | ENV=local
 2 | DJANGO_SETTINGS_MODULE=extlinks.settings.local
 3 | # To enable admin error emails, change to django.core.mail.backends.smtp.EmailBackend
 4 | DJANGO_EMAIL_ADMINS_BACKEND=django.core.mail.backends.console.EmailBackend
 5 | DJANGO_EMAIL_HOST=localhost
 6 | SECRET_KEY=et=o&42)q3r8ztu&b^sjs2+%vv3^mg%3amzcia8^)fq*w6#oj3
 7 | MYSQL_DATABASE=extlinks_db
 8 | MYSQL_ROOT_PASSWORD=links
 9 | REPLICA_DB_USER=user
10 | REPLICA_DB_PASSWORD=password
11 | TWL_API_TOKEN=token
12 | # When building, these define how the local images are tagged
13 | # When pulling, these define which images are pulled
14 | EVENTSTREAM_TAG=latest
15 | EXTERNALLINKS_TAG=latest
16 | # Change to something like /data/project/prod for real servers.
17 | HOST_BACKUP_DIR=./backup
18 | # Swift Object Store (for storing archives), `local` will just ignore Swift usage
19 | SWIFT_APPLICATION_CREDENTIAL_ID=local
20 | SWIFT_APPLICATION_CREDENTIAL_SECRET=local
21 | SWIFT_CONTAINER_NAME=archive-linkevents
22 | # See the readme file for setting up a local swift store
23 | # In production, it should use the known URL https://openstack.eqiad1.wikimediacloud.org:25000/v3
24 | OPENSTACK_AUTH_URL=http://externallinks-swift:5001/v3
25 | LINKEVENTS_ARCHIVE_OBJECT_STORAGE_ONLY=false
26 | 


--------------------------------------------------------------------------------
/wiki-list.csv:
--------------------------------------------------------------------------------
  1 | en
  2 | ceb
  3 | sv
  4 | de
  5 | fr
  6 | nl
  7 | ru
  8 | it
  9 | es
 10 | pl
 11 | war
 12 | vi
 13 | ja
 14 | zh
 15 | pt
 16 | uk
 17 | ar
 18 | fa
 19 | sr
 20 | ca
 21 | no
 22 | id
 23 | fi
 24 | ko
 25 | hu
 26 | sh
 27 | cs
 28 | ro
 29 | eu
 30 | tr
 31 | ms
 32 | eo
 33 | bg
 34 | hy
 35 | da
 36 | he
 37 | sk
 38 | zh_min_nan
 39 | kk
 40 | min
 41 | ce
 42 | hr
 43 | lt
 44 | et
 45 | be
 46 | sl
 47 | el
 48 | gl
 49 | nn
 50 | az
 51 | ur
 52 | simple
 53 | azb
 54 | th
 55 | hi
 56 | uz
 57 | la
 58 | ka
 59 | vo
 60 | ta
 61 | cy
 62 | mk
 63 | ast
 64 | tg
 65 | lv
 66 | mg
 67 | tt
 68 | oc
 69 | af
 70 | bs
 71 | ky
 72 | sq
 73 | tl
 74 | zh_yue
 75 | new
 76 | te
 77 | bn
 78 | br
 79 | pms
 80 | ml
 81 | lb
 82 | jv
 83 | ht
 84 | sco
 85 | mr
 86 | sw
 87 | ga
 88 | nds
 89 | su
 90 | ba
 91 | pnb
 92 | is
 93 | my
 94 | fy
 95 | cv
 96 | lmo
 97 | an
 98 | ne
 99 | yo
100 | pa
101 | gu
102 | io
103 | bar
104 | scn
105 | ku
106 | als
107 | bpy
108 | kn
109 | ckb
110 | ia
111 | qu
112 | arz
113 | wuu
114 | mn
115 | bat_smg
116 | si
117 | or
118 | wa
119 | gd
120 | am
121 | yi
122 | cdo
123 | nap
124 | bug
125 | hsb
126 | mai
127 | map_bms
128 | mzn
129 | fo
130 | xmf
131 | li
132 | ilo
133 | eml
134 | sah
135 | vec
136 | os
137 | sd
138 | sa
139 | diq
140 | mrj
141 | ps
142 | mhr
143 | hif
144 | zh_classical
145 | roa_tara
146 | bcl
147 | ace
148 | hak
149 | frr
150 | pam
151 | szl
152 | nso
153 | nv
154 | se
155 | km
156 | mi
157 | rue
158 | nah
159 | bh
160 | nds_nl
161 | vls
162 | crh
163 | gan
164 | sc
165 | vep
166 | bo
167 | glk
168 | myv
169 | co
170 | as
171 | tk
172 | fiu_vro
173 | so
174 | kv
175 | lrc
176 | csb
177 | gv
178 | udm
179 | zea
180 | ay
181 | ie
182 | pcd
183 | sn
184 | nrm
185 | ug
186 | stq
187 | lez
188 | kw
189 | lad
190 | mwl
191 | gom
192 | ab
193 | gn
194 | haw
195 | rm
196 | ha
197 | lij
198 | kab
199 | koi
200 | lfn
201 | lo
202 | mt
203 | fur
204 | frp
205 | dsb
206 | ln
207 | ang
208 | ext
209 | olo
210 | dty
211 | cbk_zam
212 | dv
213 | ksh
214 | gag
215 | pi
216 | pag
217 | pfl
218 | bjn
219 | av
220 | bxr
221 | xal
222 | gor
223 | krc
224 | za
225 | pap
226 | kaa
227 | pdc
228 | rw
229 | tyv
230 | to
231 | kl
232 | nov
233 | jam
234 | arc
235 | kbp
236 | kbd
237 | tpi
238 | tet
239 | ig
240 | ki
241 | na
242 | jbo
243 | wo
244 | roa_rup
245 | lbe
246 | bi
247 | ty
248 | kg
249 | mdf
250 | lg
251 | zu
252 | srn
253 | tcy
254 | inh
255 | atj
256 | chr
257 | ltg
258 | sat
259 | sm
260 | xh
261 | om
262 | pih
263 | cu
264 | rmy
265 | tw
266 | bm
267 | tn
268 | chy
269 | rn
270 | got
271 | ts
272 | tum
273 | ak
274 | st
275 | ny
276 | ch
277 | ss
278 | pnt
279 | fj
280 | iu
281 | ady
282 | ee
283 | ks
284 | ve
285 | ik
286 | sg
287 | ff
288 | dz
289 | ti
290 | cr
291 | din
292 | ng
293 | cho
294 | kj
295 | mh
296 | ho
297 | ii
298 | aa
299 | mus
300 | hz
301 | kr
302 | shn
303 | hyw
304 | 


--------------------------------------------------------------------------------