├── src
    ├── static
    │   ├── moon.png
    │   ├── favicon.ico
    │   ├── placeholder.png
    │   ├── user_profile.png
    │   ├── favicons
    │   │   ├── favicon.png
    │   │   ├── favicon-128.png
    │   │   ├── favicon-16x16.png
    │   │   ├── favicon-32x32.png
    │   │   ├── favicon-96x96.png
    │   │   ├── favicon-196x196.png
    │   │   ├── apple-touch-icon-57x57.png
    │   │   ├── apple-touch-icon-60x60.png
    │   │   ├── apple-touch-icon-72x72.png
    │   │   ├── apple-touch-icon-76x76.png
    │   │   ├── apple-touch-icon-114x114.png
    │   │   ├── apple-touch-icon-120x120.png
    │   │   ├── apple-touch-icon-144x144.png
    │   │   └── apple-touch-icon-152x152.png
    │   ├── Twitter_Logo_Blue.svg
    │   ├── tweet.css
    │   └── sourcecatcher.css
    ├── wsgi.py
    ├── gunicorn.config.py
    ├── templates
    │   ├── dc_app_image.html
    │   ├── theme_toggle.html
    │   ├── error.html
    │   ├── twitter_users.html
    │   ├── sourcecatcher.html
    │   ├── dc_app.html
    │   ├── input_forms.html
    │   ├── navbar.html
    │   ├── head.html
    │   ├── match_results.html
    │   └── scripts.html
    ├── sc_exceptions.py
    ├── gen_phashes.py
    ├── find_similar.py
    ├── find_match.py
    ├── sc_helpers.py
    ├── image_search.py
    ├── feature_match.py
    ├── web_server.py
    └── bot.py
├── .containerignore
├── systemd
    ├── sourcecatcher-update.timer
    ├── sourcecatcher.service
    ├── sourcecatcher-update.service
    └── nitter.service
├── requirements.txt
├── .gitignore
├── scripts
    ├── prune_backups.sh
    ├── initial.sh
    ├── backup.sh
    └── update.sh
├── .github
    └── workflows
    │   └── build.yml
├── nitter
    └── nitter.conf
├── Containerfile
├── README.md
└── LICENSE


/src/static/moon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/moon.png


--------------------------------------------------------------------------------
/src/wsgi.py:
--------------------------------------------------------------------------------
1 | from web_server import app
2 | 
3 | if __name__ == "__main__":
4 |     app.run()
5 | 


--------------------------------------------------------------------------------
/src/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicon.ico


--------------------------------------------------------------------------------
/.containerignore:
--------------------------------------------------------------------------------
1 | *
2 | 
3 | !src/
4 | !scripts/
5 | !systemd/
6 | !nitter/
7 | 
8 | !requirements.txt
9 | 


--------------------------------------------------------------------------------
/src/static/placeholder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/placeholder.png


--------------------------------------------------------------------------------
/src/static/user_profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/user_profile.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon-128.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon-16x16.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon-32x32.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon-96x96.png


--------------------------------------------------------------------------------
/src/static/favicons/favicon-196x196.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/favicon-196x196.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-57x57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-57x57.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-72x72.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-114x114.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-114x114.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-144x144.png


--------------------------------------------------------------------------------
/src/static/favicons/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/evanc577/sourcecatcher/HEAD/src/static/favicons/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/src/gunicorn.config.py:
--------------------------------------------------------------------------------
1 | from gevent import monkey; monkey.patch_all()
2 | print("Successfully applied monkey patch")
3 | 
4 | worker_class = 'gevent'
5 | preload_app = True
6 | 


--------------------------------------------------------------------------------
/src/templates/dc_app_image.html:
--------------------------------------------------------------------------------
1 | {% extends "sourcecatcher.html" %}
2 | 
3 | {% block results %}
4 | <div class="results">
5 |   <img class="app_img" src="{{ image_link|e }}">
6 | </div>
7 | {% endblock results %}
8 | 


--------------------------------------------------------------------------------
/systemd/sourcecatcher-update.timer:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Update Sourcecatcher database
 3 | 
 4 | [Timer]
 5 | OnCalendar=*-*-* 12:00:00
 6 | RandomizedDelaySec=900
 7 | 
 8 | [Install]
 9 | WantedBy=timers.target
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | annoy==1.16.3
 3 | Flask
 4 | gunicorn[gevent]
 5 | ImageHash
 6 | piexif
 7 | Pillow
 8 | PyYAML
 9 | Werkzeug
10 | beautifulsoup4
11 | tldextract
12 | opencv-python
13 | opencv-contrib-python
14 | joblib
15 | requests-cache
16 | redis
17 | youtube_dl
18 | bsddb3
19 | cachetools
20 | MarkupSafe
21 | python-dateutil
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # images in root
 2 | /*.png
 3 | /*.jpg
 4 | /*.jpeg
 5 | 
 6 | # python virtual environment
 7 | /sourcecatcher_venv/
 8 | /__pycache__/
 9 | 
10 | # config files
11 | *.yaml
12 | 
13 | # misc directories
14 | /live/
15 | /working/
16 | /backups/
17 | /uploads/
18 | 
19 | # database files
20 | *.sqlite
21 | *.ann
22 | *.db
23 | *.pck
24 | 


--------------------------------------------------------------------------------
/systemd/sourcecatcher.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Sourcecatcher web server
 3 | After=network.target redis
 4 | 
 5 | [Service]
 6 | WorkingDirectory=/sourcecatcher/src/
 7 | ExecStart=/usr/local/bin/gunicorn -c gunicorn.config.py -w 9 -b 0.0.0.0:80 -m 007 wsgi:app
 8 | Restart=always
 9 | RestartSec=1s
10 | 
11 | [Install]
12 | WantedBy=multi-user.target
13 | 


--------------------------------------------------------------------------------
/systemd/sourcecatcher-update.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Update Sourcecatcher database
 3 | After=network.target
 4 | 
 5 | [Service]
 6 | WorkingDirectory=/sourcecatcher
 7 | ExecStart=/bin/bash /sourcecatcher/scripts/update.sh
 8 | ExecStopPost=/usr/bin/find /sourcecatcher/images/ -type f -mtime +3 -delete
 9 | CPUWeight=20
10 | IOWeight=20
11 | MemoryHigh=4G
12 | MemoryMax=6G
13 | 


--------------------------------------------------------------------------------
/systemd/nitter.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Nitter
 3 | After=network.target redis
 4 | Requires=redis
 5 | 
 6 | [Service]
 7 | User=nitter
 8 | Group=nitter
 9 | WorkingDirectory=/nitter
10 | ExecStartPre=/bin/sh -c 'until redis-cli ping; do sleep 1; done'
11 | ExecStart=/usr/local/bin/nitter
12 | Restart=always
13 | RestartSec=1s
14 | 
15 | [Install]
16 | WantedBy=multi-user.target
17 | 


--------------------------------------------------------------------------------
/scripts/prune_backups.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | idx=0
 4 | 
 5 | if [[ -z "${SOURCECATCHER_NUM_BACKUPS}" ]]; then
 6 |   NUM_BACKUPS="10"
 7 | else
 8 |   NUM_BACKUPS="${SOURCECATCHER_NUM_BACKUPS}"
 9 | fi
10 | 
11 | for backup in $(find backups/ -mindepth 1 -maxdepth 1 | sort -r); do
12 |   if [ $idx -lt ${NUM_BACKUPS} ]; then
13 |     ((++idx))
14 |   else
15 |     echo "Removing backup $backup"
16 |     rm -rf $backup
17 |   fi
18 | done
19 | 


--------------------------------------------------------------------------------
/src/templates/theme_toggle.html:
--------------------------------------------------------------------------------
 1 | <div id="theme-switch-toggle">
 2 |     <input name="state-theme" type="radio" value="auto" />
 3 |     <input name="state-theme" type="radio" value="light" />
 4 |     <input name="state-theme" type="radio" value="dark" />
 5 | </div>
 6 | <label class="theme-switch" for="theme-switch-toggle" onclick="nextTheme()">
 7 |     <span class="slider">
 8 |         <div class="slider-icon"></div>
 9 |     </span>
10 | </label>
11 | 


--------------------------------------------------------------------------------
/src/templates/error.html:
--------------------------------------------------------------------------------
 1 | {% extends "sourcecatcher.html" %}
 2 | 
 3 | {% block error_msg %}
 4 | <div class="error">
 5 |   {{ error_msg|safe }}
 6 |   {% if error_link != None %}
 7 |   <br />
 8 |   <a target="_blank" rel="noopener noreferrer" href="{{ error_link|e }}">{{ error_link|e }}</a>
 9 |   {% endif %}
10 |   {% if error_reasons != None %}
11 |   <ul class="error_reasons">
12 |   {% for reason in error_reasons %}
13 |     <li>{{ reason|e }}</li>
14 |   {% endfor %}
15 |   </ul>
16 |   {% endif %}
17 | </div>
18 | {% endblock error_msg %}
19 | 


--------------------------------------------------------------------------------
/scripts/initial.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | read -p "This will reset the current database. Are you sure you want to continue? (y/n) " -n 1 -r
 6 | echo    # (optional) move to a new line
 7 | 
 8 | 
 9 | if [[ $REPLY =~ ^[Yy]$ ]]
10 | then
11 |   LIVE_DIR=/sourcecatcher/live/
12 |   WORKING_DIR=/sourcecatcher/working/
13 |   mkdir -p $LIVE_DIR
14 |   mkdir -p $WORKING_DIR
15 | 
16 |   rm -f $WORKING_DIR/*
17 | 
18 |   python /sourcatcher/src/bot.py
19 |   python /sourcatcher/src/gen_phashes.py
20 | 
21 |   cp $WORKING_DIR/twitter_scraper.db $LIVE_DIR
22 |   cp $WORKING_DIR/phash_index.ann $LIVE_DIR
23 | fi
24 | 


--------------------------------------------------------------------------------
/src/static/Twitter_Logo_Blue.svg:
--------------------------------------------------------------------------------
1 | <svg id="Logo_FIXED" data-name="Logo — FIXED" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 400 400"><defs><style>.cls-1{fill:none;}.cls-2{fill:#1da1f2;}</style></defs><title>Twitter_Logo_Blue</title><rect class="cls-1" width="400" height="400"/><path class="cls-2" d="M153.62,301.59c94.34,0,145.94-78.16,145.94-145.94,0-2.22,0-4.43-.15-6.63A104.36,104.36,0,0,0,325,122.47a102.38,102.38,0,0,1-29.46,8.07,51.47,51.47,0,0,0,22.55-28.37,102.79,102.79,0,0,1-32.57,12.45,51.34,51.34,0,0,0-87.41,46.78A145.62,145.62,0,0,1,92.4,107.81a51.33,51.33,0,0,0,15.88,68.47A50.91,50.91,0,0,1,85,169.86c0,.21,0,.43,0,.65a51.31,51.31,0,0,0,41.15,50.28,51.21,51.21,0,0,1-23.16.88,51.35,51.35,0,0,0,47.92,35.62,102.92,102.92,0,0,1-63.7,22A104.41,104.41,0,0,1,75,278.55a145.21,145.21,0,0,0,78.62,23"/></svg>


--------------------------------------------------------------------------------
/scripts/backup.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ -z "$BACKUP_SERVER" ]; then
 6 |   echo "BACKUP_SERVER unset"
 7 |   exit 1
 8 | fi
 9 | if [ -z "$BACKUP_SERVER_USER" ]; then
10 |   echo "BACKUP_SERVER_USER unset"
11 |   exit 1
12 | fi
13 | if [ -z "$BACKUP_SERVER_PASS" ]; then
14 |   echo "BACKUP_SERVER_PASS unset"
15 |   exit 1
16 | fi
17 | 
18 | LOCK=/tmp/sourcecatcher.lock
19 | function cleanup {
20 |   rm -rf $LOCK
21 | }
22 | trap cleanup EXIT
23 | echo "acquiring lock"
24 | while ! mkdir $LOCK 2> /dev/null; do
25 |   sleep 1
26 | done
27 | echo "acquired lock"
28 | 
29 | BACKUP_FILE=$(date --iso-8601=seconds).tar.gz
30 | LIVE_DIR=live
31 | 
32 | tar -cv config.yaml $LIVE_DIR/twitter_scraper.db $LIVE_DIR/phash_index.ann 2>/dev/null \
33 |   | pigz \
34 |   | curl -T - -u "$BACKUP_SERVER_USER:$BACKUP_SERVER_PASS" -H "X-FILE: $BACKUP_FILE" $BACKUP_SERVER
35 | 
36 | echo "Backed up to $BACKUP_SERVER"
37 | 


--------------------------------------------------------------------------------
/scripts/update.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | LOCK=/tmp/sourcecatcher.lock
 6 | function cleanup {
 7 |   rm -rf $LOCK
 8 | }
 9 | trap cleanup EXIT
10 | echo "acquiring lock"
11 | while ! mkdir $LOCK 2> /dev/null; do
12 |   sleep 1
13 | done
14 | echo "acquired lock"
15 | 
16 | LIVE_DIR=/sourcecatcher/live/
17 | WORKING_DIR=/sourcecatcher/working/
18 | mkdir -p $LIVE_DIR
19 | mkdir -p $WORKING_DIR
20 | 
21 | rm -rf $WORKING_DIR/*
22 | 
23 | echo "copying to working directory"
24 | cp $LIVE_DIR/twitter_scraper.db $WORKING_DIR
25 | cp $LIVE_DIR/phash_index.ann $WORKING_DIR
26 | cp $LIVE_DIR/discord.db $WORKING_DIR
27 | 
28 | echo "starting ingest"
29 | python /sourcecatcher/src/bot.py
30 | echo "starting phash"
31 | python /sourcecatcher/src/gen_phashes.py
32 | 
33 | echo "moving to live directory"
34 | mv -f $WORKING_DIR/twitter_scraper.db $LIVE_DIR
35 | mv -f $WORKING_DIR/phash_index.ann $LIVE_DIR
36 | mv -f $WORKING_DIR/discord.db $LIVE_DIR
37 | 
38 | echo "update complete"
39 | 


--------------------------------------------------------------------------------
/src/templates/twitter_users.html:
--------------------------------------------------------------------------------
 1 | {% extends "sourcecatcher.html" %}
 2 | 
 3 | {% block results %}
 4 | <div class="results_header twitter_user">
 5 |   <h2>All indexed Twitter users</h2>
 6 |   <h3>Currently indexing {{ user_count|e }} users</h3>
 7 |   <p>
 8 |     Users list maintained by
 9 |     <a target="_blank" rel="noopener noreferrer" title="/u/ipwnmice" href="https://www.reddit.com/u/ipwnmice">/u/ipwnmice</a>
10 |     with help from
11 |     <a target="_blank" rel="noopener noreferrer" title="/u/ipwnmice" href="https://discord.gg/dreamcatcher">Dreamcatcher Discord</a>
12 |   </p>
13 | </div>
14 | <div class="results">
15 |   <ul class="users_list">
16 |     {% for user in users %}
17 |     {% if user[1] %}
18 |     <li>
19 |     {% else %}
20 |     <li class="deleted_user">
21 |     {% endif %}
22 |         <a href="https://www.twitter.com/{{ user[0]|e }}" target="_blank" rel="noopener noreferrer">{{ user[0]|e }}</a>
23 |     </li>
24 |     {% endfor %}
25 |   </ul>
26 | </div>
27 | {% endblock results %}
28 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Images to GHCR
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - "**.md"
 7 |     branches:
 8 |       - master
 9 |     tags:
10 |       - 'v*'
11 |   release:
12 |     types: [published]
13 |   workflow_dispatch:
14 | 
15 | jobs:
16 |   build-push-image:
17 |     strategy:
18 |       matrix:
19 |         include:
20 |           - runner: ubuntu-24.04
21 |             platform: linux/amd64
22 |             tag: latest
23 |           - runner: ubuntu-24.04-arm
24 |             platform: linux/arm64
25 |             tag: latest-arm
26 |     runs-on: ${{ matrix.runner }}
27 |     steps:
28 |       - name: "Checkout GitHub Action"
29 |         uses: actions/checkout@main
30 | 
31 |       - name: "Login to GitHub Container Registry"
32 |         uses: docker/login-action@v1
33 |         with:
34 |           registry: ghcr.io
35 |           username: ${{github.actor}}
36 |           password: ${{secrets.GITHUB_TOKEN}}
37 | 
38 |       - name: "Build OCI Image"
39 |         run: |
40 |           sudo apt-get update
41 |           sudo apt-get install -y podman
42 |           podman build . --platform ${{ matrix.platform }} -t ghcr.io/evanc577/sourcecatcher:${{ matrix.tag }}
43 |           podman push ghcr.io/evanc577/sourcecatcher:${{ matrix.tag }}
44 | 


--------------------------------------------------------------------------------
/src/templates/sourcecatcher.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   {% include 'head.html' %}
 4 |   <body class="preload">
 5 |     <div id="dnd-modal">
 6 |       <div id="dnd-modal-content">
 7 |         Drag image here to search
 8 |       </div>
 9 |     </div>
10 |     <div id="content">
11 |       <div class="banner">
12 |         {% include 'navbar.html' %}
13 |       </div>
14 |       <div class="main">
15 |         <div class="description">
16 |           <p>Dreamcatcher photo source finder</p>
17 |         </div>
18 |         <div class="news">
19 |           <p>{{ news|safe }}</p>
20 |         </div>
21 | 
22 |         {% block forms %}
23 |         {% include 'input_forms.html' %}
24 |         {% endblock forms %}
25 | 
26 |         {% block error_msg %}
27 |         {% endblock error_msg %}
28 | 
29 |         {% block results %}
30 |         {% endblock results %}
31 | 
32 |       </div>
33 |     </div>
34 |     <div id="footer_wrapper">
35 |       <div id="footer">
36 |         <p class="footer_text">
37 |         Indexing {{ num_photos }} photos
38 |         </p>
39 |         <p class="footer_text">
40 |         Updated {{ mtime }} ago
41 |         </p>
42 |         <p class="footer_text">
43 |         By <a target="_blank" rel="noopener noreferrer" title="By u/ipwnmice" href="https://www.reddit.com/u/ipwnmice">/u/ipwnmice</a>
44 |         </p>
45 |       </div>
46 |     </div>
47 |     {% include 'scripts.html' %}
48 |   </body>
49 | </html>
50 | 


--------------------------------------------------------------------------------
/nitter/nitter.conf:
--------------------------------------------------------------------------------
 1 | [Server]
 2 | hostname = "nitter.net"  # for generating links, change this to your own domain/ip
 3 | title = "nitter"
 4 | address = "0.0.0.0"
 5 | port = 8080
 6 | https = false  # disable to enable cookies when not using https
 7 | httpMaxConnections = 100
 8 | staticDir = "/nitter/public"
 9 | 
10 | [Cache]
11 | listMinutes = 240  # how long to cache list info (not the tweets, so keep it high)
12 | rssMinutes = 10  # how long to cache rss queries
13 | redisHost = "localhost"  # Change to "nitter-redis" if using docker-compose
14 | redisPort = 6379
15 | redisPassword = ""
16 | redisConnections = 20  # minimum open connections in pool
17 | redisMaxConnections = 30
18 | # new connections are opened when none are available, but if the pool size
19 | # goes above this, they're closed when released. don't worry about this unless
20 | # you receive tons of requests per second
21 | 
22 | [Config]
23 | hmacKey = "secretkey"  # random key for cryptographic signing of video urls
24 | base64Media = false  # use base64 encoding for proxied media urls
25 | enableRSS = true  # set this to false to disable RSS feeds
26 | enableDebug = false  # enable request logs and debug endpoints (/.sessions)
27 | proxy = ""  # http/https url, SOCKS proxies are not supported
28 | proxyAuth = ""
29 | 
30 | # Change default preferences here, see src/prefs_impl.nim for a complete list
31 | [Preferences]
32 | theme = "Nitter"
33 | replaceTwitter = ""
34 | replaceYouTube = ""
35 | replaceReddit = ""
36 | proxyVideos = true
37 | hlsPlayback = false
38 | infiniteScroll = false
39 | 


--------------------------------------------------------------------------------
/src/templates/dc_app.html:
--------------------------------------------------------------------------------
 1 | {% extends "sourcecatcher.html" %}
 2 | 
 3 | {% block results %}
 4 | <div class="dc_app_header">
 5 |   <div class="dc_app_user">
 6 |     <img class="dc_app_profile_pic" src={{ profile_pic|e }}>
 7 |     <div class="dc_app_username">
 8 |       {{ app_poster|e }}
 9 |     </div>
10 |   </div>
11 |   <p>{{ app_text|e }}</p>
12 |   {% if app_video %}
13 |     <a class="video_download" href="/api/get_dcapp_video?url={{ urlescape(url) }}" download="dcapp_{{ dcapp_id|e }}.mp4">Download video</a>
14 |   {% endif %}
15 | </div>
16 | <div class="results">
17 |   {% if app_video %}
18 |   <script src="https://cdn.jsdelivr.net/npm/hls.js@latest/dist/hls.min.js"></script>
19 |   <video id="video" src="{{ app_video|e }}" poster="{{ app_video_poster|e }}" type="application/x-mpegURL" controls></video>
20 |   <script>
21 |     if (Hls.isSupported()) {
22 |       var video = document.getElementById('video');
23 |       var hls = new Hls();
24 |       hls.attachMedia(video);
25 |       hls.on(Hls.Events.MEDIA_ATTACHED, function () {
26 |         hls.loadSource("{{ app_video|e }}");
27 |       });
28 |     }
29 |     else if (video.canPlayType('application/vnd.apple.mpegurl')) {
30 |       video.src = '{{ app_video|e }}';
31 |       video.addEventListener('loadedmetadata',function() {
32 |         video.play();
33 |       });
34 |     }
35 |   </script>
36 |   {% endif %}
37 |   {% if app_images %}
38 |     {% for f in app_images %}
39 |     <img class="app_img" src="{{ f|e }}">
40 |     {% endfor %}
41 |   {% endif %}
42 | </div>
43 | {% endblock results %}
44 | 


--------------------------------------------------------------------------------
/src/templates/input_forms.html:
--------------------------------------------------------------------------------
 1 | <div class="inputs">
 2 |   <div class="search_wrap">
 3 |     <form action="/link">
 4 |       <input aria-label="Submit link" id="link_input" type="text" name="url" required placeholder="https://i.imgur.com/QQDz93b.jpg" value="{{ url|e }}" autocomplete="url">
 5 |       <button aria-label="Paste a link" id="link_submit_btn" type="submit" value="submit">
 6 |         <svg id="search_icon" viewBox="0 0 30 30" fill="none" stroke="#999" stroke-width="2">
 7 |           <circle cx="13" cy="13" r="9"/>
 8 |           <path d="M26 26l-6.563-6.563" stroke-linecap="round" stroke-miterlimit="10"/>
 9 |         </svg>
10 |       </button>
11 |     </form>
12 |   </div>
13 |   <p id="or">OR</p>
14 |   <form id="file_upload_form" action="/upload" method="post" enctype="multipart/form-data">
15 |     <input id="file_input" type="file" name="file" required onchange="form.submit();">
16 |     <label for="file_input" id="file_input_label"/>Upload an image</label>
17 |     <script>
18 |       // show pretty file upload form if js is available
19 |       var file_input = document.getElementById('file_input');
20 |       var file_input_label = document.getElementById('file_input_label');
21 |       file_input.style.width = '1px';
22 |       file_input.style.height = '1px';
23 |       file_input.style.overflow = 'hidden';
24 |       file_input.style.clip = 'rect(0,0,0,0)';
25 |       file_input.style.padding = '0';
26 |       file_input.style.position = 'absoulute !important';
27 |       file_input.style.whiteSpace = 'nowrap';
28 |       file_input.style.border = 0;
29 |       file_input.style.opacity = 0;
30 |       file_input_label.style.display = 'inline-block';
31 | 
32 |       window.addEventListener('paste', e => {
33 |         if (e.clipboardData.files.length !== 0) {
34 |           file_input.files = e.clipboardData.files;
35 |           form.submit();
36 |         }
37 |       });
38 |     </script>
39 |     <noscript>
40 |       <button type="submit" value="submit">
41 |         Upload
42 |       </button>
43 |     </noscript>
44 |   </form>
45 | </div>
46 | 


--------------------------------------------------------------------------------
/src/templates/navbar.html:
--------------------------------------------------------------------------------
 1 | <header>
 2 |   <span class="brand"><a href="/"></a></span>
 3 |   <nav class="menu">
 4 |     <input aria-label="Toggle menu" type="checkbox" id="menuToggle" onclick="this.blur();">
 5 |     <label for="menuToggle" class="menu-icon">
 6 |       <svg class="hamburger" viewBox="0 0 64 64">
 7 |         <path d="M8.2 13h47.5v6.3H8.2zm0 15.8h47.5v6.4H8.2zm0 15.9h47.5V51H8.2z"/>
 8 |       </svg>
 9 |     </label>
10 |     <ul>
11 |       <li>
12 |         <a href="/twitter_users">
13 |           Indexed Twitter Users
14 |         </a>
15 |       </li>
16 |       <li>
17 |         <a target="_blank" rel="noopener noreferrer" href="https://www.reddit.com/r/dreamcatcher/comments/c923qp/sourcecatchercom_a_reverse_image_search_tool_for/">
18 |           About Sourcecatcher
19 |           <svg width="0.8em" height="0.8em" viewBox="0 0 640 640">
20 |             <path d="M41.28 10.323h178.774l73.052 82.466H115.194c-17.6 0-32.02 14.4-32.02 32.02v401.403c0 17.598 14.4 32.02 32.02 32.02h406.328c17.6 0 32.02-14.422 32.02-32.02v-181.62l83.174 69.2v185.624c0 22.7-18.567 41.28-41.28 41.28H41.28c-22.712 0-41.28-18.58-41.28-41.28V51.604c0-22.702 18.568-41.28 41.28-41.28zM640-.685L311.378 13.843l104.8 104.8-230.755 230.744 101.2 101.2L517.39 219.842l106.82 106.82L640-.685z"/>
21 |           </svg>
22 |         </a>
23 |       </li>
24 |       <li>
25 |         <a target="_blank" rel="noopener noreferrer" href="https://github.com/evanc577/sourcecatcher">
26 |           GitHub
27 |           <svg width="0.8em" height="0.8em" viewBox="0 0 640 640">
28 |             <path d="M41.28 10.323h178.774l73.052 82.466H115.194c-17.6 0-32.02 14.4-32.02 32.02v401.403c0 17.598 14.4 32.02 32.02 32.02h406.328c17.6 0 32.02-14.422 32.02-32.02v-181.62l83.174 69.2v185.624c0 22.7-18.567 41.28-41.28 41.28H41.28c-22.712 0-41.28-18.58-41.28-41.28V51.604c0-22.702 18.568-41.28 41.28-41.28zM640-.685L311.378 13.843l104.8 104.8-230.755 230.744 101.2 101.2L517.39 219.842l106.82 106.82L640-.685z"/>
29 |           </svg>
30 |         </a>
31 |       </li>
32 |       <li>
33 |         {% include 'theme_toggle.html' %}
34 |       </li>
35 |     </ul>
36 |   </nav>
37 | </header>
38 | 


--------------------------------------------------------------------------------
/Containerfile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Executable builders
 3 | #
 4 | 
 5 | FROM almalinux:9 as rust-builder
 6 | RUN dnf group install -y 'Development Tools'
 7 | RUN bash -c 'curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y'
 8 | ENV PATH=/root/.cargo/bin:$PATH
 9 | 
10 | 
11 | FROM rust-builder as nitter-scraper-builder
12 | RUN cargo install --git https://github.com/evanc577/nitter-scraper.git
13 | 
14 | 
15 | FROM rust-builder as discord-scraper-builder
16 | RUN dnf install -y openssl-devel
17 | ENV RUSTFLAGS="--cfg tokio_unstable"
18 | RUN cargo install --git https://github.com/evanc577/sourcecatcher-discord-scraper.git
19 | 
20 | 
21 | FROM ubuntu:latest as nitter-builder
22 | RUN apt-get update
23 | RUN apt-get install -y libsass-dev gcc git libc-dev nim
24 | WORKDIR /src
25 | RUN git clone https://github.com/zedeus/nitter
26 | WORKDIR /src/nitter
27 | RUN nimble install -y --depsOnly
28 | RUN nimble build -d:danger -d:lto -d:strip --mm:refc && nimble scss && nimble md
29 | 
30 | 
31 | #
32 | # Main Sourcecatcher image
33 | #
34 | FROM almalinux:9-minimal
35 | LABEL org.opencontainers.image.source="https://github.com/evanc577/sourcecatcher"
36 | 
37 | WORKDIR /sourcecatcher
38 | RUN microdnf install -y python3 python3-devel python3-pip libdb-devel gcc gcc-c++ redis findutils
39 | COPY requirements.txt /sourcecatcher
40 | RUN pip install -r requirements.txt
41 | 
42 | RUN useradd -c "Nitter user" -d /nitter -s /bin/sh nitter
43 | 
44 | COPY systemd/* /etc/systemd/system
45 | RUN systemctl enable redis nitter sourcecatcher.service sourcecatcher-update.timer
46 | 
47 | COPY src/ /sourcecatcher/src/
48 | COPY scripts/ /sourcecatcher/scripts/
49 | RUN mkdir -p /sourcecatcher/images
50 | 
51 | COPY --from=nitter-scraper-builder /root/.cargo/bin/nitter-scraper /usr/local/bin
52 | COPY --from=discord-scraper-builder /root/.cargo/bin/sourcecatcher-discord-scraper /usr/local/bin
53 | 
54 | COPY nitter/nitter.conf /nitter
55 | COPY --from=nitter-builder /src/nitter/nitter /usr/local/bin
56 | COPY --from=nitter-builder /src/nitter/public /nitter/public
57 | RUN chown -R nitter:nitter /nitter
58 | 
59 | EXPOSE 80
60 | 
61 | CMD ["/sbin/init"] # Start systemd as PID 1
62 | 


--------------------------------------------------------------------------------
/src/templates/head.html:
--------------------------------------------------------------------------------
 1 | <head>
 2 |   <meta charset="utf-8">
 3 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 4 |   <title>
 5 |     Sourcecatcher{% if page_title %} | {{ page_title|e }}{% endif %}
 6 |   </title>
 7 |   <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='sourcecatcher.css') }}?{{ sha256(url_for('static', filename='sourcecatcher.css')) }}">
 8 |   <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='tweet.css') }}?{{ sha256(url_for('static', filename='tweet.css')) }}">
 9 |   <link rel="apple-touch-icon-precomposed" sizes="57x57" href="{{ url_for('static', filename='favicons/apple-touch-icon-57x57.png') }}" />
10 |   <link rel="apple-touch-icon-precomposed" sizes="114x114" href="{{ url_for('static', filename='favicons/apple-touch-icon-114x114.png') }}" />
11 |   <link rel="apple-touch-icon-precomposed" sizes="72x72" href="{{ url_for('static', filename='favicons/apple-touch-icon-72x72.png') }}" />
12 |   <link rel="apple-touch-icon-precomposed" sizes="144x144" href="{{ url_for('static', filename='favicons/apple-touch-icon-144x144.png') }}" />
13 |   <link rel="apple-touch-icon-precomposed" sizes="60x60" href="{{ url_for('static', filename='favicons/apple-touch-icon-60x60.png') }}" />
14 |   <link rel="apple-touch-icon-precomposed" sizes="120x120" href="{{ url_for('static', filename='favicons/apple-touch-icon-120x120.png') }}" />
15 |   <link rel="apple-touch-icon-precomposed" sizes="76x76" href="{{ url_for('static', filename='favicons/apple-touch-icon-76x76.png') }}" />
16 |   <link rel="apple-touch-icon-precomposed" sizes="152x152" href="{{ url_for('static', filename='favicons/apple-touch-icon-152x152.png') }}" />
17 |   <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicons/favicon-196x196.png') }}" sizes="196x196" />
18 |   <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicons/favicon-96x96.png') }}" sizes="96x96" />
19 |   <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicons/favicon-32x32.png') }}" sizes="32x32" />
20 |   <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicons/favicon-16x16.png') }}" sizes="16x16" />
21 |   <link rel="icon" type="image/png" href="{{ url_for('static', filename='favicons/favicon-128.png') }}" sizes="128x128" />
22 | </head>
23 | 


--------------------------------------------------------------------------------
/src/sc_exceptions.py:
--------------------------------------------------------------------------------
 1 | class SCError(Exception):
 2 |     pass
 3 | 
 4 | class EntityTooLarge(SCError):
 5 |     """Raised when submitted file is too large"""
 6 | 
 7 |     def __str__(self):
 8 |         return "The submitted file was too large (max 15MB)"
 9 | 
10 | class InvalidDCAppLink(SCError):
11 |     """Raised when an invalid Dreamcatcher app link is submitted"""
12 | 
13 |     def __str__(self):
14 |         return "Invalid Dreamcatcher app url"
15 | 
16 | class FullSizeDCAppImage(SCError):
17 |     """Raised when a direct link to a Dreamcatcher app image is already full size"""
18 | 
19 |     def __str__(self):
20 |         return "Invalid Dreamcatcher app url, or the image is already full size"
21 | 
22 | class NoMatchesFound(SCError):
23 |     """Raised when no matching images are found"""
24 | 
25 |     def __str__(self):
26 |         return "No matches found. Possible reasons:"
27 | 
28 |     def reasons(self):
29 |         reasons = [
30 |                 "The image was heavily altered or cropped",
31 |                 "Twitter is not the source of this image",
32 |                 "Sourcecatcher is not following the source Twitter user",
33 |                 ]
34 |         return reasons
35 | 
36 | class InvalidLink(SCError):
37 |     """Raised when an invalid link is used"""
38 | 
39 |     def __str__(self):
40 |         return "Could not find any data at this link"
41 | 
42 | class InvalidImage(SCError):
43 |     """Raised when an image cannot be opened"""
44 | 
45 |     def __str__(self):
46 |         return "Could not open image (is it actually an image?)"
47 | 
48 | class DCAppError(SCError):
49 | 
50 |     def __init__(self, reason):
51 |         self.reason = reason
52 | 
53 |     def __str__(self):
54 |         return f"Could not connect to DC app website ({self.reason})"
55 | 
56 | class TWError(SCError):
57 | 
58 |     def __init__(self, message, user=None, tweet_id=None):
59 |         self.message = message
60 |         self.link = None
61 |         if user is not None and tweet_id is not None:
62 |             self.link = f"https://twitter.com/{user}/status/{tweet_id}"
63 | 
64 |     def __str__(self):
65 |         return f"A matching image was found but the tweet no longer exists ({self.message})"
66 | 
67 | class TWRateError(SCError):
68 | 
69 |     def __str__(self):
70 |         return "Sourcecatcher reached the Twitter API rate limit, try again later"
71 | 
72 | class VideoDownloadError(SCError):
73 | 
74 |     def __str__():
75 |         return "Could not download file"
76 | 
77 | class AnimatedGIFError(SCError):
78 | 
79 |     def __str__(self):
80 |         return "Searching animated GIFs is not supported"
81 | 


--------------------------------------------------------------------------------
/src/templates/match_results.html:
--------------------------------------------------------------------------------
 1 | {% extends "sourcecatcher.html" %}
 2 | 
 3 | {% block results %}
 4 | <div class="results_header">
 5 |   <h2>Results</h2>
 6 | </div>
 7 | {% if warning_msg %}
 8 | <div class="warning">
 9 |   {{ warning_msg|safe }}
10 | </div>
11 | {% endif %}
12 | <div class="results">
13 |   {% for tweet in tweets %}
14 |   {% if tweet.custom %}
15 |   <div class="tweet_embed">
16 |     <a class="tweet_embed_link" target="_blank" rel="noopener noreferrer" title="View on Twitter" href="https://twitter.com/{{ tweet.screen_name|e }}/status/{{ tweet.tweet_id }}"></a>
17 |     <div class="match_score">Similarity: {{ tweet.score }}</div>
18 |     {% if tweet.is_backup %}
19 |     <div class="warning backup_tweet">This tweet is no longer available</div>
20 |     {% endif %}
21 |     <img class="twitter_logo" alt="Twitter logo" src="{{ url_for('static', filename='Twitter_Logo_Blue.svg') }}">
22 |     <div class="author">
23 |       {% if tweet.is_backup %}
24 |       <img class="avatar" alt="{{ tweet.screen_name|e }} avatar"src="{{ url_for('static', filename='user_profile.png') }}">
25 |       {% else %}
26 |       <img class="avatar" alt="{{ tweet.screen_name|e }} avatar" src="{{ tweet.profile_image|e }}" onerror="this.onerror=null;this.src='{{ url_for('static', filename='user_profile.png') }}';">
27 |       {% endif %}
28 |       <div class="name_container">
29 |         <span class="identity_name">
30 |           {{ tweet.identity_name|e }}
31 |         </span>
32 |         <span class="screen_name">
33 |           @{{ tweet.screen_name|e }}
34 |         </span>
35 |       </div>
36 |     </div>
37 |     {% if tweet.ts %}
38 |     <div class="datetime">
39 |       <script>datetime = new Date("{{ tweet.ts }}");
40 |         var locale;
41 |         if (window.navigator.languages) {
42 |             locale = window.navigator.languages[0];
43 |         } else {
44 |             locale = window.navigator.userLanguage || window.navigator.language;
45 |         }
46 |         var datestr = datetime.toLocaleDateString(locale);
47 |         var timestr = datetime.toLocaleTimeString(locale);
48 |         var datetimestr = timestr + " - " + datestr;
49 |         document.write(datetimestr)
50 |       </script>
51 |     </div>
52 |     {% endif %}
53 |     <div class="tweet_text">
54 |       <p style="white-space: pre-wrap">{{ tweet.text_html|e }}</p>
55 |     </div>
56 |     <div class="tweet_images">
57 |       {% for image in tweet.images %}
58 |       <a class="image_container num_media{{ tweet.num_media }}" target="_blank" rel="noopener noreferrer" title="Open image" href="{{ image|e }}?name=orig">
59 |         <img alt="Twitter image" src="{{ image|e }}?name=small" onerror="this.onerror=null;this.src='{{ url_for('static', filename='placeholder.png') }}';">
60 |       </a>
61 |       {% endfor %}
62 |     </div>
63 |   </div>
64 |   {% else %}
65 |   {{ tweet.embed_tweet|safe }}
66 |   {% endif %}
67 |   {% endfor %}
68 | </div>
69 | {% endblock results %}
70 | 


--------------------------------------------------------------------------------
/src/static/tweet.css:
--------------------------------------------------------------------------------
  1 | .author > .avatar {
  2 |   grid-column: 1;
  3 |   border-radius: 50%;
  4 |   height: 36px;
  5 |   width: 36px;
  6 |   margin: 20px 10px 20px 20px;
  7 | }
  8 | 
  9 | .author {
 10 |   float: none;
 11 |   display: -ms-inline-grid;
 12 |   display: inline-grid;
 13 |   height: 36px;
 14 | }
 15 | 
 16 | .backup_tweet {
 17 |   margin: 20px 20px 0 20px;
 18 | }.image_container > img {
 19 |   position: absolute;
 20 |   width: 100%;
 21 |   height: 100%;
 22 |   object-fit: cover;
 23 |   top: 0; bottom: 0; left: 0; right: 0;
 24 |   transition: 0.1s;
 25 | }
 26 | 
 27 | .image_container > img:hover {
 28 |   transform: scale(1.05);
 29 | }
 30 | 
 31 | .image_container {
 32 |   position: relative;
 33 |   width: 40%;
 34 |   margin: 3px;
 35 |   padding-top: 30%;
 36 |   flex: 1 0 34%;
 37 |   z-index: 1;
 38 |   overflow: hidden;
 39 | }
 40 | 
 41 | .image_container.num_media1,
 42 | .image_container.num_media2 {
 43 |   padding-top: 60%;
 44 | }
 45 | 
 46 | .image_container:hover {
 47 |   box-shadow: 0 0 1px 3px var(--dc-yellow);
 48 | }
 49 | 
 50 | .name_container > span.identity_name {
 51 |   color: var(--fg-color);
 52 |   font-weight: 700;
 53 |   font-size: 1.1em;
 54 | }
 55 | 
 56 | .name_container > span.screen_name {
 57 |   color: var(--fg2-color);
 58 |   font-size: 0.9em;
 59 | }
 60 | 
 61 | .name_container {
 62 |   grid-column: 2;
 63 |   display: flex;
 64 |   flex-direction: column;
 65 |   justify-content: center;
 66 | }
 67 | 
 68 | .tweet_embed > .datetime {
 69 |   font-size: 0.9em;
 70 |   margin-left: 20px;
 71 |   margin-right: 20px;
 72 |   margin-top: 10px;
 73 |   color: var(--fg2-color);
 74 | }
 75 | 
 76 | .tweet_embed > .match_score {
 77 |   margin: 20px 20px 0px 20px;
 78 |   font-size: 1em;
 79 |   color: var(--fg2-color);
 80 | }
 81 | 
 82 | .tweet_embed > .tweet_images {
 83 |   display: flex;
 84 |   flex-wrap: wrap;
 85 |   justify-content: center;
 86 |   align-items: center;
 87 |   margin-top: 5px;
 88 |   margin-bottom: 20px;
 89 |   padding-left: 20px;
 90 |   padding-right: 20px;
 91 | }
 92 | 
 93 | .tweet_embed > .tweet_text {
 94 |   float: none;
 95 |   font-size: 1em;
 96 |   color: var(--fg-color);
 97 |   margin: 20px;
 98 | }
 99 | 
100 | .tweet_embed {
101 |   text-decoration: none;
102 |   position: relative;
103 |   display: block;
104 |   width: 500px;
105 |   max-width: 100%;
106 |   min-width: 250px;
107 |   margin: 5px;
108 | 
109 |   border-radius: 5px;
110 |   box-shadow: 0px 0px 5px var(--box-shadow-color);
111 |   color: var(--fg-color);
112 |   background-color: var(--tweet-bg-color);
113 | }
114 | 
115 | .tweet_embed:hover {
116 |   background-color: var(--tweet-hover-bg-color);
117 |   box-shadow: 0px 0px 20px var(--box-shadow-color);
118 | }
119 | 
120 | .tweet_embed:visited {
121 |   color: inherit;
122 | }
123 | 
124 | .tweet_embed_link::after {
125 |   content: '';
126 |   position: absolute;
127 |   top: 0;
128 |   left: 0;
129 |   bottom: 0;
130 |   right: 0;
131 | }
132 | 
133 | .twitter_logo {
134 |   display: inline-block;
135 |   position: absolute;
136 |   margin-top: 10px;
137 |   margin-right: 10px;
138 |   top: 0;
139 |   right: 0;
140 |   width: 36px;
141 |   height: 36px;
142 | }
143 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [sourcecatcher.com](https://www.sourcecatcher.com)
  2 | A reverse image search tool for InSomnia
  3 | 
  4 | See the [Reddit release thread](https://www.reddit.com/r/dreamcatcher/comments/c923qp/sourcecatchercom_a_reverse_image_search_tool_for/) for more information about Sourcecatcher
  5 | 
  6 | ---
  7 | 
  8 | ## Setup
  9 | 
 10 | Sourcecatcher is published as an OCI container.
 11 | 
 12 | ### Directory structure
 13 | 
 14 | ```
 15 | $ tree
 16 | .
 17 | ├── config
 18 | │   ├── nitter
 19 | │   │   └── sessions.jsonl
 20 | │   └── sourcecatcher
 21 | │       ├── config-discord.toml
 22 | │       └── config.yaml
 23 | └── live
 24 |     ├── discord.db
 25 |     ├── phash_index.ann
 26 |     └── twitter_scraper.db
 27 | ```
 28 | 
 29 | See [Config files](#config-files) section for configuration file setup.
 30 | The `live` directory contains Sourcecatcher's databases, it should be persisted to a host directory (See next section).
 31 | 
 32 | ### Quadlet setup
 33 | 
 34 | Create quadlet generator file. Remember to configure container network and volume mounts setup.
 35 | ```
 36 | $ cat ~/.config/containers/systemd/sourcecatcher.container 
 37 | [Unit]
 38 | Description=Sourcecatcher reverse image search service
 39 | After=network.target
 40 | 
 41 | [Container]
 42 | ContainerName=sourcecatcher
 43 | Image=ghcr.io/evanc577/sourcecatcher:latest
 44 | AutoUpdate=registry
 45 | Network=bridge
 46 | PublishPort=9000:80
 47 | Volume=/home/sourcecatcher/config/sourcecatcher/:/sourcecatcher/config/:Z,ro
 48 | Volume=/home/sourcecatcher/config/nitter/sessions.jsonl:/nitter/sessions.jsonl:Z,ro
 49 | Volume=/home/sourcecatcher/live/:/sourcecatcher/live/:Z
 50 | 
 51 | [Install]
 52 | WantedBy=multi-user.target default.target
 53 | ```
 54 | Start the container
 55 | ```console
 56 | $ systemctl --user daemon-reload
 57 | $ systemctl --user start sourcecatcher.service
 58 | ```
 59 | 
 60 | ### Config files
 61 | 
 62 | #### `config.yaml`
 63 | 
 64 | `config.yaml` contains runtime information needed by Sourcecatcher.
 65 | 
 66 | ```yaml
 67 | # Don't need to change for OCI container
 68 | media_dir: "/sourcecatcher/images/"
 69 | nitter_instance: "http://0.0.0.0:8080"
 70 | 
 71 | # Image hashing options
 72 | cpus: 4
 73 | recalculate_kmeans: False
 74 | 
 75 | # Set to true to enable scraping discord server channels for Twitter links
 76 | scrape_discord: true
 77 | 
 78 | # These users will show up first in search results
 79 | priority_users:
 80 |   - "hf_dreamcatcher"
 81 |   - "jp_dreamcatcher"
 82 |   - "7_DREAMERS"
 83 |   - "2Moori"
 84 | 
 85 | # Set of users to scrape via Nitter
 86 | users:
 87 |   - "hf_dreamcatcher"
 88 |   - "7_DREAMERS"
 89 |   - "2Moori"
 90 | ```
 91 | 
 92 | #### `config-discord.toml`
 93 | 
 94 | ```
 95 | database_file = "working/discord.db"
 96 | discord_token = "your-discord-api-token"
 97 | 
 98 | # List of Discord channel IDs to scape
 99 | watched_channels = [
100 |     "253293425460248580",
101 |     "253293450030481418",
102 | ]
103 | ```
104 | 
105 | #### `sessions.jsonl`
106 | 
107 | Twitter user accounts used for running a local nitter instance.
108 | See upstream [Nitter](https://github.com/zedeus/nitter) documentation for how to generate this file.
109 | 


--------------------------------------------------------------------------------
/src/gen_phashes.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import Pool, TimeoutError, cpu_count
 2 | from pathlib import Path
 3 | from PIL import Image
 4 | from annoy import AnnoyIndex
 5 | from sc_helpers import *
 6 | import imagehash
 7 | import os
 8 | import yaml
 9 | import sqlite3
10 | import numpy as np
11 | import sys
12 | import sqlite3
13 | 
14 | def calc_phash(files):
15 |     """calculate the phash of a image"""
16 |     i = files[0]    # annoy index
17 |     filename = os.path.join(files[1][0], files[1][1])
18 | 
19 |     phash = imagehash.phash(Image.open(filename))
20 |     # print('file #{:08d}, phash: {}, filename: {}'.format(i, str(phash), filename))
21 | 
22 |     basename = os.path.basename(filename)
23 |     dirname = os.path.dirname(filename)
24 | 
25 |     return basename, dirname, i, str(phash)
26 | 
27 | 
28 | def gen_phash():
29 |     """calculate the phashes of all images, insert into a searchable database"""
30 | 
31 |     # parse config.yaml
32 |     try:
33 |         path = config_file_path()
34 |         with open(path) as f:
35 |             config = yaml.safe_load(f)
36 |     except IOError:
37 |         print("error loading config file")
38 |         sys.exit(1)
39 | 
40 |     index = AnnoyIndex(64, metric='hamming')
41 | 
42 |     # set up database
43 |     conn = sqlite3.connect(os.path.join(base_path(), 'working/twitter_scraper.db'))
44 |     c = conn.cursor()
45 |     c.execute('CREATE TABLE IF NOT EXISTS hashes (filename text, path text, idx int32, hash text, UNIQUE (idx))')
46 | 
47 |     # find previously hashed files
48 |     c.execute('SELECT path, filename FROM hashes')
49 |     done_hashes = set(c.fetchall())
50 |     print('current hashed files: {}'.format(len(done_hashes)))
51 | 
52 |     # get next starting index
53 |     c.execute('SELECT idx FROM hashes ORDER BY idx DESC LIMIT 1')
54 |     cur_max_id = c.fetchone()
55 |     if cur_max_id is None:
56 |         next_id = 0
57 |     else:
58 |         next_id = cur_max_id[0] + 1
59 | 
60 |     try:
61 |         num_cpus = config['cpus']
62 |     except KeyError:
63 |         num_cpus = cpu_count()
64 | 
65 |     # calculate phash of new images
66 |     c.execute('SELECT path, filename FROM info')
67 |     files = set(c.fetchall()) - done_hashes
68 |     print('files to hash: {}'.format(len(files)))
69 |     files = enumerate(files, next_id)
70 |     with Pool(processes=num_cpus) as pool:
71 |         for r in pool.imap(calc_phash, files, chunksize=64):
72 |             try:
73 |                 c.execute('INSERT INTO hashes VALUES (?,?,?,?)', (r[0], r[1], r[2], r[3]))
74 |             except sqlite3.IntegrityError:
75 |                 pass
76 |     print("finished hashing files")
77 | 
78 |     # insert hashes into annoy
79 |     c.execute('SELECT idx,hash from hashes')
80 |     hashes = c.fetchall()
81 |     for h in hashes:
82 |         # calculate hash array
83 |         h_int = int(h[1], 16)
84 |         h_arr = [None] * 64
85 |         for i in range(64):
86 |             h_arr[63 - i] = h_int & (1 << i) != 0
87 | 
88 |         # insert hash into annoy
89 |         index.add_item(h[0], h_arr)
90 | 
91 |     conn.commit()
92 | 
93 |     index.build(50)
94 |     index.save(os.path.join(base_path(), 'working/phash_index.ann'))
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     gen_phash()
99 | 


--------------------------------------------------------------------------------
/src/find_similar.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | import sys
  5 | import pickle
  6 | from annoy import AnnoyIndex
  7 | import sqlite3
  8 | from find_match import download_content
  9 | from sc_helpers import *
 10 | import joblib
 11 | import time
 12 | 
 13 | 
 14 | def image_detect_and_compute(img_name, location='file'):
 15 |     """Detect and compute interest points and their descriptors."""
 16 |     detector = cv2.ORB_create()
 17 |     computer = cv2.xfeatures2d.FREAK_create()
 18 | 
 19 |     # load image
 20 |     if location == 'file':
 21 |         img = cv2.imread(img_name)
 22 |     elif location == 'url':
 23 |         content = np.asarray(download_content(img_name))
 24 |         img = cv2.imdecode(content, cv2.IMREAD_UNCHANGED)
 25 | 
 26 |     # compute descriptors
 27 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 28 |     kp = detector.detect(img, None)
 29 |     kp = sorted(kp, key=lambda x: -x.response)[:2048]
 30 |     kp, des = computer.compute(img, kp)
 31 | 
 32 |     # calculate histogram
 33 |     indices = kmeans.predict(des)
 34 |     hist = np.zeros(kmeans.cluster_centers_.shape[0], dtype=np.float32)
 35 |     for i in indices:
 36 |         hist[i] = hist[i] + 1
 37 | 
 38 |     return hist
 39 |     
 40 | 
 41 | def find_similar(img_path, location='file'):
 42 |     print(img_path)
 43 |     global kmeans
 44 | 
 45 |     # load files
 46 |     annoy_map = joblib.load(os.path.join(base_path(), 'live/BOW_annoy_map.pkl'))
 47 |     kmeans = joblib.load(os.path.join(base_path(), 'live/kmeans.pkl'))
 48 | 
 49 |     index = AnnoyIndex(kmeans.n_clusters, 'angular')
 50 |     index.load(os.path.join(base_path(), 'live/BOW_index.ann'))
 51 | 
 52 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
 53 |     c = conn.cursor()
 54 | 
 55 |     # compute histogram
 56 |     start_time = time.time()
 57 |     try:
 58 |         hist = image_detect_and_compute(img_path, location=location)
 59 |     except cv2.error:
 60 |         return []
 61 | 
 62 | 
 63 |     # find most similar images
 64 |     n = 12
 65 |     n_trees = index.get_n_trees()
 66 |     ann_start_time = time.time()
 67 |     annoy_results = index.get_nns_by_vector(hist, n, include_distances=True, search_k=-1)
 68 |     ann_end_time = time.time()
 69 | 
 70 |     # process results
 71 |     results = []
 72 |     max_score = -1
 73 |     for i,idx in enumerate(annoy_results[0]):
 74 |         # discard bad results
 75 |         if annoy_results[1][i] > 1.0:
 76 |             break
 77 | 
 78 |         score = int(100 * (1 - annoy_results[1][i]))
 79 |         if i == 0:
 80 |             max_score = score
 81 |         elif max_score - score > 10:
 82 |             break
 83 | 
 84 |         # get tweet info
 85 |         path = annoy_map[idx]
 86 |         basename = os.path.basename(path)
 87 |         dirname = os.path.dirname(path)
 88 |         c.execute('SELECT id FROM info WHERE filename=(?) AND path=(?)', (basename, dirname))
 89 |         tweet_id = c.fetchone()[0]
 90 |         tup = (score, tweet_id, basename,)
 91 |         results.append(tup)
 92 | 
 93 |     end_time = time.time()
 94 | 
 95 |     print(results)
 96 |     print(f"total search time (cbir): {end_time - start_time:06f} seconds")
 97 |     print(f"annoy search time (cbir): {ann_end_time - ann_start_time:06f} seconds")
 98 | 
 99 |     return results
100 | 
101 | if __name__ == "__main__":
102 |     find_similar(sys.argv[2], location=sys.argv[1])
103 | 


--------------------------------------------------------------------------------
/src/find_match.py:
--------------------------------------------------------------------------------
  1 | from sc_exceptions import *
  2 | from pathlib import Path
  3 | import PIL
  4 | from PIL import Image
  5 | from io import BytesIO
  6 | from annoy import AnnoyIndex
  7 | from sc_helpers import *
  8 | import imagehash
  9 | import argparse
 10 | import sys
 11 | import requests
 12 | import sqlite3
 13 | import os
 14 | import time
 15 | import functools
 16 | 
 17 | Image.warnings.simplefilter('error', Image.DecompressionBombWarning)
 18 | 
 19 | def find(location, path):
 20 |     """find the closest images to an image
 21 | 
 22 |     Given a path or a url to an image, returns the closest matches
 23 |     (phash hamming distance)
 24 | 
 25 |     Arguments:
 26 |     location: 'url' or 'path'
 27 |     path: the actual url or path to the image
 28 |     """
 29 | 
 30 |     # load database and annoy index
 31 |     index = AnnoyIndex(64, metric='hamming')
 32 |     index.load(os.path.join(base_path(), 'live/phash_index.ann'))
 33 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
 34 |     c = conn.cursor()
 35 | 
 36 |     # load the requested image
 37 |     img = load_image(location, path)
 38 | 
 39 | 
 40 |     start_time = time.time()
 41 | 
 42 |     # get the image's phash
 43 |     phash = imagehash.phash(img)
 44 |     phash_arr = phash.hash.flatten()
 45 | 
 46 |     # find the closest matches
 47 |     n = 16
 48 |     n_trees = index.get_n_trees()
 49 |     ann_start_time = time.time()
 50 |     annoy_results = index.get_nns_by_vector(phash_arr, n, include_distances=True, search_k=100*n*n_trees)
 51 |     ann_end_time = time.time()
 52 | 
 53 |     # look up the location of the match and its tweet info
 54 |     results = []
 55 |     for idx, score in map(list, zip(*annoy_results)):
 56 |         # only keep close enough matches
 57 |         if score > 8:
 58 |             break
 59 | 
 60 |         # find respective image in database
 61 |         c.execute('SELECT path, filename FROM hashes WHERE idx=(?)', (idx,))
 62 |         dirname, basename = c.fetchone()
 63 |         c.execute('SELECT id FROM info WHERE filename=(?) AND path=(?)', (basename, dirname))
 64 |         tweet_id = c.fetchone()
 65 |         tweet_id = tweet_id[0]
 66 |         results.append((score, tweet_id, basename))
 67 | 
 68 |     conn.close()
 69 | 
 70 |     # sort results
 71 |     results = sorted(results, key=lambda x: (-x[0], x[1]))
 72 | 
 73 |     end_time = time.time()
 74 | 
 75 |     print(results)
 76 |     print(f"total search time (phash): {end_time - start_time:06f} seconds")
 77 |     print(f"annoy search time (phash): {ann_end_time - ann_start_time:06f} seconds")
 78 | 
 79 |     return results
 80 | 
 81 | 
 82 | def load_image(location, path):
 83 |     """Load the user requested image"""
 84 | 
 85 |     if location == 'url':
 86 |         content = download_content(path)
 87 | 
 88 |         try:
 89 |             img = Image.open(BytesIO(content))
 90 |         except IOError:
 91 |             raise InvalidImage
 92 |     else:
 93 |         try:
 94 |             img = Image.open(path)
 95 |         except IOError as e:
 96 |             raise InvalidImage
 97 | 
 98 |     # check if GIF is not animated
 99 |     try:
100 |         if img.is_animated:
101 |             raise AnimatedGIFError
102 |     except AttributeError:
103 |         pass
104 | 
105 |     return img
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     parser = argparse.ArgumentParser(description="Find a close image match")
110 |     parser.add_argument('location', help='location of image', nargs=1, choices=('url', 'file'))
111 |     parser.add_argument('path', help='url or path', nargs=1)
112 |     args = parser.parse_args()
113 | 
114 |     find(args.location[0], args.path[0])
115 | 
116 | 


--------------------------------------------------------------------------------
/src/sc_helpers.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from flask import render_template, make_response
  3 | from sc_exceptions import *
  4 | import os
  5 | import requests
  6 | import sqlite3
  7 | import yaml
  8 | 
  9 | 
 10 | def render_page(template, code=200, **kwargs):
 11 |     """Get stats and render template"""
 12 |     num_photos, mtime = stats()
 13 |     kwargs['num_photos'] = num_photos
 14 |     kwargs['mtime'] = mtime
 15 |     news = get_news()
 16 |     if news is not None:
 17 |         kwargs["news"] = news
 18 | 
 19 |     resp = make_response(render_template(template, **kwargs), code)
 20 |     return resp
 21 | 
 22 | 
 23 | def get_news():
 24 |     try:
 25 |         dirpath = os.path.dirname(os.path.realpath(__file__))
 26 |         path = os.path.join(dirpath, 'config.yaml')
 27 |         with open(path) as f:
 28 |             config = yaml.safe_load(f)
 29 |     except IOError:
 30 |         return None
 31 | 
 32 |     try:
 33 |         return config["news"]
 34 |     except KeyError:
 35 |         return None
 36 | 
 37 | 
 38 | def stats():
 39 |     """returns stats for the database"""
 40 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
 41 |     c = conn.cursor()
 42 | 
 43 |     c.execute('SELECT MAX(_ROWID_) FROM info LIMIT 1')
 44 |     num_photos = c.fetchone()[0]
 45 | 
 46 |     mtime = datetime.utcfromtimestamp(os.path.getmtime(os.path.join(base_path(), 'live/twitter_scraper.db')))
 47 |     now = datetime.utcnow()
 48 |     time_diff = secs_to_str((now - mtime).seconds)
 49 | 
 50 |     conn.close()
 51 |     return num_photos, time_diff
 52 | 
 53 | 
 54 | def secs_to_str(secs):
 55 |     """converts number of seconds to a human readable string"""
 56 |     SECS_PER_MIN = 60
 57 |     SECS_PER_HR = SECS_PER_MIN * 60
 58 |     SECS_PER_DAY = SECS_PER_HR * 24
 59 | 
 60 |     if secs < SECS_PER_MIN:
 61 |         if secs == 1:
 62 |             return '1 second'
 63 |         else:
 64 |             return '{} seconds'.format(secs)
 65 |     if secs < SECS_PER_HR:
 66 |         mins = secs // SECS_PER_MIN
 67 |         if mins == 1:
 68 |             return '1 minute'
 69 |         else:
 70 |             return '{} minutes'.format(mins)
 71 |     if secs < SECS_PER_DAY:
 72 |         hrs = secs // SECS_PER_HR
 73 |         if hrs == 1:
 74 |             return '1 hour'
 75 |         else:
 76 |             return '{} hours'.format(hrs)
 77 |     days = secs // SECS_PER_DAY
 78 |     if days == 1:
 79 |         return '1 day'
 80 |     else:
 81 |         return '{} days'.format(secs // SECS_PER_DAY)
 82 | 
 83 | 
 84 | def download_content(url):
 85 |     MAX_DOWNLOAD = 15 * 1024 * 1024
 86 |     try:
 87 |         headers = {"User-Agent": "Sourcecatcher"}
 88 |         response = requests.get(url, headers=headers, stream=True, timeout=30)
 89 |     except requests.exceptions.MissingSchema as e:
 90 |         try:
 91 |             # try https
 92 |             response = requests.get("https://" + url, stream=True, timeout=30)
 93 |         except requests.RequestException as e:
 94 |             try:
 95 |                 # try http
 96 |                 response = requests.get("http://" + url, stream=True, timeout=30)
 97 |             except requests.RequestException as e:
 98 |                 raise InvalidLink
 99 |     except requests.exceptions.RequestException as e:
100 |         raise InvalidLink
101 | 
102 |     if not response.ok:
103 |         raise InvalidLink
104 | 
105 |     size = 0
106 |     content = bytearray()
107 |     for chunk in response.iter_content(1024):
108 |         size += len(chunk)
109 |         content += chunk
110 |         if size > MAX_DOWNLOAD:
111 |             raise EntityTooLarge
112 | 
113 |     return content
114 | 
115 | 
116 | def base_path():
117 |     return "/sourcecatcher"
118 | 
119 | 
120 | def config_file_path():
121 |     return os.path.join(base_path(), "config/config.yaml")
122 | 


--------------------------------------------------------------------------------
/src/templates/scripts.html:
--------------------------------------------------------------------------------
  1 | <script>
  2 |   // File drag and drop support
  3 |   let enterTarget = null;
  4 |   let dropPopup = document.getElementById('dnd-modal');
  5 |   let form = document.getElementById('file_upload_form');
  6 |   let input = document.getElementById('file_input');
  7 | 
  8 |   function containsFiles(event) {
  9 |     if (event.dataTransfer.types) {
 10 |       for (var i = 0; i < event.dataTransfer.types.length; i++) {
 11 |         if (event.dataTransfer.types[i] == "Files") {
 12 |           return true;
 13 |         }
 14 |       }
 15 |     }
 16 |     return false;
 17 |   };
 18 |   document.ondragenter = function(event) {
 19 |     if (containsFiles(event)) {
 20 |       event.stopPropagation();
 21 |       event.preventDefault();
 22 |       enterTarget = event.target;
 23 |       dropPopup.style.display = 'block';
 24 |     }
 25 |   };
 26 |   document.ondragleave = function(event) {
 27 |     if (containsFiles(event)) {
 28 |       event.stopPropagation();
 29 |       event.preventDefault();
 30 |       //Only if the two target are equal it means the drag has left the window
 31 |       if (enterTarget == event.target){
 32 |         dropPopup.style.display = 'none';
 33 |       }
 34 |     }
 35 |   };
 36 |   document.ondragover = function(event) {
 37 |     if (containsFiles(event)) {
 38 |       event.preventDefault();
 39 |     }
 40 |   };
 41 |   document.ondrop = function(event) {
 42 |     if (containsFiles(event)) {
 43 |       event.stopPropagation();
 44 |       event.preventDefault();
 45 |       dropPopup.style.display = 'none';
 46 |       if (event.dataTransfer.files && event.dataTransfer.files.length) {
 47 |         input.files = event.dataTransfer.files;
 48 |         form.submit();
 49 |       }
 50 |     }
 51 |   };
 52 | 
 53 |   // hack to select all text on first click
 54 |   var link_input = document.getElementById("link_input")
 55 |   var focusedElement;
 56 |   link_input.onfocus = function() {
 57 |     if (focusedElement == this) return;
 58 |     focusedElement = this;
 59 |     setTimeout(function() { focusedElement.select(); }, 0);
 60 |   };
 61 |   link_input.onblur = function() {
 62 |     focusedElement = null;
 63 |   };
 64 | 
 65 |   // change search button color automatically
 66 |   var search_button = document.getElementById("link_submit_btn")
 67 |   function text_input_change() {
 68 |     if (link_input.value.length == 0) {
 69 |       search_button.classList.remove('highlight_search');
 70 |     } else {
 71 |       search_button.classList.add('highlight_search');
 72 |     }
 73 |   }
 74 |   ['input','keyup', 'keydown', 'paste', 'change'].forEach(e =>
 75 |     link_input.addEventListener(e, text_input_change, false)
 76 |   );
 77 |   text_input_change();
 78 | 
 79 |   // theme toggle
 80 |   window.addEventListener(
 81 |     'load',
 82 |     function load()
 83 |     {
 84 |       window.removeEventListener('load', load, false);
 85 |       document.body.classList.remove('preload');
 86 |     },
 87 |     false);
 88 |   const hamburger = document.querySelector('#menuToggle');
 89 | 
 90 |   function toggleMenu(e) {
 91 |     if (e.keyCode === 13) {
 92 |       if (e.target.checked === false) {
 93 |         e.target.checked = true;
 94 |       } else {
 95 |         e.target.checked = false;
 96 |       }
 97 |     }
 98 |   }
 99 | 
100 |   function currentTheme() {
101 |       let theme = localStorage.getItem("theme");
102 |       if (theme === null) {
103 |         theme = "auto";
104 |       }
105 |     return theme;
106 |   }
107 | 
108 |   function switchTheme(theme) {
109 |     if (theme === null) {
110 |       theme = currentTheme();
111 |     }
112 | 
113 |     localStorage.setItem('theme', theme);
114 |     for (let elem of document.querySelectorAll('input[type="radio"][name="state-theme"]')) {
115 |       if (elem.value === theme) {
116 |         elem.checked = true;
117 |       }
118 |     }
119 |     document.querySelector(".slider-icon").setAttribute("data-theme", theme);
120 | 
121 |     if (theme === "auto") {
122 |       theme = window.matchMedia('(prefers-color-scheme: dark)').matches ? "dark" : "light";
123 |     }
124 |     document.documentElement.setAttribute('data-theme', theme);
125 |   }
126 | 
127 |   switchTheme(null);
128 | 
129 |   function nextTheme() {
130 |     let theme = currentTheme();
131 |     if (theme === "auto") {
132 |       theme = "light";
133 |     } else if (theme === "light") {
134 |       theme = "dark";
135 |     } else if (theme === "dark") {
136 |       theme = "auto";
137 |     }
138 |     switchTheme(theme);
139 |   }
140 | 
141 |   window.matchMedia("(prefers-color-scheme: dark)")
142 |     .addEventListener('change',({ matches }) => {
143 |       switchTheme(null);
144 |     });
145 |   for (const elem of document.querySelectorAll('input[type="radio"][name="state-theme"]')) {
146 |     elem.addEventListener("change", (event) => {  
147 |       switchTheme(event.target.value);
148 |     });
149 |   }
150 | </script>
151 | 


--------------------------------------------------------------------------------
/src/image_search.py:
--------------------------------------------------------------------------------
  1 | from cachetools import cached, LRUCache
  2 | from cachetools.keys import hashkey
  3 | from datetime import timedelta, datetime
  4 | from find_match import find
  5 | from sc_exceptions import *
  6 | import hashlib
  7 | import os
  8 | import re
  9 | import requests_cache
 10 | import sqlite3
 11 | import sys
 12 | import yaml
 13 | from sc_helpers import *
 14 | 
 15 | # parse config.yaml
 16 | try:
 17 |     path = config_file_path()
 18 |     with open(path) as f:
 19 |         config = yaml.safe_load(f)
 20 | except IOError:
 21 |     print("error loading config file")
 22 |     sys.exit(1)
 23 | try:
 24 |     temp = config['priority_users']
 25 |     num_prio_users = len(temp)
 26 |     priority_users = {}
 27 |     for i in range(num_prio_users):
 28 |         priority_users[temp[i].casefold()] = i
 29 | except (KeyError, TypeError):
 30 |     num_prio_users = 0
 31 |     priority_users = {}
 32 | 
 33 | # cache http requests
 34 | req_expire_after = timedelta(seconds=600)
 35 | cached_req_session = requests_cache.CachedSession('sc_cache', backend='sqlite', expire_after=req_expire_after)
 36 | 
 37 | # image search cache
 38 | image_search_cache = LRUCache(maxsize=128)
 39 | def image_search_key(location, path):
 40 |     if location == "file":
 41 |         with open(path, "rb") as f:
 42 |             bytes = f.read() # read entire file as bytes
 43 |             return hashlib.sha256(bytes).hexdigest()
 44 |     return hashkey(path)
 45 | 
 46 | 
 47 | @cached(cache=image_search_cache, key=image_search_key)
 48 | def image_search(location, path):
 49 |     tweet_ids = []
 50 |     tweets = []
 51 |     warning_msg = None
 52 |     id_score = {}
 53 |     count = 0
 54 | 
 55 |     # try phash search first
 56 |     found = find(location, path)
 57 |     for score, tweet_id, basename in found:
 58 |         if tweet_id in id_score:
 59 |             continue
 60 | 
 61 |         score_percent = calc_score_percent(score)
 62 | 
 63 |         tweet_ids.append(str(tweet_id))
 64 |         id_score[tweet_id] = score_percent
 65 |         count += 1
 66 | 
 67 |     # show error if no results are found
 68 |     if count == 0:
 69 |         raise NoMatchesFound
 70 | 
 71 |     # add tweets that have been removed
 72 |     for tweet_id in tweet_ids:
 73 |         tweets.append(get_saved_tweet(tweet_id, id_score[int(tweet_id)]))
 74 | 
 75 |     # limit each twitter user to 3 tweets
 76 |     user_count = {}
 77 |     temp = []
 78 |     for tweet in tweets:
 79 |         if tweet['screen_name'].casefold() not in user_count:
 80 |             user_count[tweet['screen_name'].casefold()] = 0
 81 |         if user_count[tweet['screen_name'].casefold()] >= 3:
 82 |             continue
 83 |         user_count[tweet['screen_name'].casefold()] += 1
 84 |         temp.append(tweet)
 85 |     tweets = temp
 86 | 
 87 |     # show error if no tweets are found
 88 |     if len(tweets) == 0:
 89 |         raise NoMatchesFound
 90 | 
 91 |     # sort tweets by score then by id (date)
 92 |     tweets.sort(key=lambda tweet: (priority(tweet['screen_name']), -min(90, tweet['score']), tweet['tweet_id']))
 93 | 
 94 |     kwargs = {
 95 |             'tweets': tweets,
 96 |             'warning_msg': warning_msg,
 97 |             'page_title': 'Search',
 98 |             }
 99 | 
100 |     if location == 'url':
101 |         kwargs['url'] = path
102 | 
103 |     return kwargs
104 | 
105 | def priority(user):
106 |     """
107 |     return the priority if the given twitter user based on config file
108 |     """
109 | 
110 |     if user.casefold() in priority_users:
111 |         return priority_users[user.casefold()]
112 | 
113 |     return num_prio_users
114 | 
115 | 
116 | def get_saved_tweet(tweet_id, score):
117 |     """
118 |     Create tweet embed from saved data
119 |     """
120 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
121 |     c = conn.cursor()
122 | 
123 |     tweet = {}
124 |     tweet['custom'] = True
125 |     tweet['is_backup'] = False
126 |     tweet['score'] = score
127 |     tweet['tweet_id'] = int(tweet_id)
128 | 
129 |     # calculate timestamp from id
130 |     ts = id2ts(tweet_id)
131 |     tweet['ts'] = datetime.utcfromtimestamp(ts).isoformat() + "+00:00"
132 | 
133 |     # Set text
134 |     c.execute('SELECT * FROM tweet_text where id=(?)', (tweet_id,))
135 |     _, text = c.fetchone()
136 |     tweet['text_html'] = re.sub(r"https://t\.co/\w+$", "", text)
137 | 
138 |     c.execute('SELECT * FROM info where id=(?)', (tweet_id,))
139 |     info = [x for x in c.fetchall()]
140 | 
141 |     # Set screen_name
142 |     tweet['screen_name'] = info[0][2]
143 | 
144 |     # Add images
145 |     tweet["images"] = [f"https://pbs.twimg.com/media/{x[0]}" for x in info]
146 |     tweet["num_media"] = len(info)
147 | 
148 |     return tweet
149 | 
150 | def calc_score_percent(score):
151 |     """calculate the percentage score, where 100 is best and 0 is worst"""
152 |     if score > 32:
153 |         return 0
154 | 
155 |     return int(100 - 100 * score / 32)
156 | 
157 | def id2ts(tweet_id):
158 |     return ((int(tweet_id)>>22) + 1288834974657) / 1000
159 | 


--------------------------------------------------------------------------------
/src/feature_match.py:
--------------------------------------------------------------------------------
  1 | from multiprocessing import Pool, TimeoutError, cpu_count
  2 | import numpy as np
  3 | import cv2
  4 | import gc
  5 | import sys
  6 | import os
  7 | import sqlite3
  8 | import pickle
  9 | from sklearn.cluster import MiniBatchKMeans
 10 | import joblib
 11 | from annoy import AnnoyIndex
 12 | import yaml
 13 | import bsddb3
 14 | from sc_helpers import *
 15 | 
 16 | detector = cv2.ORB_create()
 17 | computer = cv2.xfeatures2d.FREAK_create()
 18 | 
 19 | # Feature extractor
 20 | def extract_features(f, des_length=2048):
 21 |     """Extract features and descriptors from images"""
 22 |     try:
 23 |         idx = f[0]
 24 |         path = f[1]
 25 |         print(f'features: idx={idx:08d} path={path}')
 26 | 
 27 |         img = cv2.imread(path)
 28 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 29 |         kp = detector.detect(img, None)
 30 |         kp = sorted(kp, key=lambda x: -x.response)
 31 |         kp, des = computer.compute(img, kp)
 32 | 
 33 |         des = des[0:des_length]
 34 | 
 35 |         return idx, path, des
 36 | 
 37 |     except Exception as e:
 38 |         print(e)
 39 |         return e
 40 | 
 41 | def compute_histograms(idx, path, descriptors):
 42 |     """Compute histograms for bag of (visual) words"""
 43 |     try:
 44 |         des = deserialize(descriptors[path.encode()])
 45 |         print(f'histograms: idx={idx:08d} path={path}')
 46 |         indices = kmeans.predict(des)
 47 |         hist = np.zeros(kmeans.cluster_centers_.shape[0], dtype=np.float32)
 48 |         for i in indices:
 49 |             hist[i] = hist[i] + 1
 50 | 
 51 |         return idx, path, hist
 52 |     except Exception as e:
 53 |         print(path)
 54 |         print(e)
 55 |         return e
 56 | 
 57 | 
 58 | def deserialize(s):
 59 |     return np.frombuffer(s, dtype="uint8").reshape((-1, 64))
 60 | 
 61 | 
 62 | def gen_cbir():
 63 |     """Generate structures needed for content-based image retrieval"""
 64 | 
 65 |     global kmeans
 66 | 
 67 |     # parse config.yaml
 68 |     print("parsing config")
 69 |     try:
 70 |         path = config_file_path()
 71 |         with open(path) as f:
 72 |             config = yaml.safe_load(f)
 73 |     except IOError:
 74 |         print("error loading config file")
 75 |         sys.exit(1)
 76 |     try:
 77 |         num_cpus = config['cpus']
 78 |     except KeyError:
 79 |         num_cpus = cpu_count()
 80 | 
 81 |     # connect to sqlite database
 82 |     print("connecting to databases")
 83 |     conn = sqlite3.connect(os.path.join(base_path(), 'working/twitter_scraper.db'))
 84 |     c = conn.cursor()
 85 | 
 86 |     # load descriptors
 87 |     descriptors = bsddb3.db.DB()
 88 |     if os.path.exists(os.path.join(base_path(), "working/descriptors.bdb")):
 89 |         descriptors.open(os.path.join(base_path(), "working/descriptors.bdb"))
 90 |     else:
 91 |         descriptors.open(os.path.join(base_path(), "working/descriptors.bdb"), dbtype=bsddb3.db.DB_BTREE, flags=bsddb3.db.DB_CREATE)
 92 | 
 93 |     # calculate descriptors of new images
 94 |     print("determine files to compute")
 95 |     c.execute('SELECT path, filename FROM info')
 96 |     files = c.fetchall()
 97 |     files = [os.path.join(a,b) for a,b in files]
 98 |     compute_files = set()
 99 |     for i,f in enumerate(files):
100 |         if descriptors.get(f.encode()) is None:
101 |             compute_files.add(f)
102 |         if i % 10000 == 0:
103 |             print(i)
104 |     print('files to compute: {}'.format(len(compute_files)))
105 |     files = enumerate(compute_files)
106 | 
107 |     # extract features from new images
108 |     print("computing descriptors")
109 |     new_descriptors = {}
110 |     with Pool(processes=num_cpus) as pool:
111 |         for r in pool.imap(extract_features, files, chunksize=64):
112 |             if not isinstance(r, Exception):
113 |                 des = deserialize(r[2])
114 |                 descriptors[r[1].encode()] = des
115 |                 new_descriptors[r[1]] = des
116 | 
117 |     # create clusters
118 |     try:
119 |         kmeans = joblib.load(os.path.join(base_path(), 'working/kmeans.pkl'))
120 |         n_clusters = kmeans.cluster_centers_.shape[0]
121 |     except:
122 |         n_clusters = 512
123 |         kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=2048)
124 | 
125 |     # calculate kmeans
126 |     print("calculating kmeans")
127 |     cur = None
128 |     for i,des in enumerate(new_descriptors.items()):
129 |         if des[1] is not None:
130 |             print(f'calculating kmeans, image: {i:08d}')
131 |             if des[1].shape[0] < n_clusters:
132 |                 if cur is None:
133 |                     cur = des[1]
134 |                 else:
135 |                     cur = np.concatenate((cur, des[1]), axis=0)
136 |                 if cur is not None and cur.shape[0] > n_clusters:
137 |                     kmeans = kmeans.partial_fit(np.float32(cur))
138 |                     cur = None
139 |             else:
140 |                 if cur is not None:
141 |                     cur = np.concatenate((cur, des[1]), axis=0)
142 |                     kmeans = kmeans.partial_fit(np.float32(cur))
143 |                     cur = None
144 |                 else:
145 |                     kmeans = kmeans.partial_fit(np.float32(des[1]))
146 |     if cur is not None:
147 |         kmeans = kmeans.partial_fit(np.float32(cur))
148 | 
149 |     del new_descriptors
150 |     gc.collect()
151 | 
152 |     # save kmeans
153 |     print("saving kmeans")
154 |     joblib.dump(kmeans, os.path.join(base_path(), 'working/kmeans.pkl'))
155 | 
156 |     # set up structures for annoy index
157 |     print("setting up annoy structures")
158 |     c.execute('SELECT path, filename FROM info')
159 |     all_images = c.fetchall()
160 |     files = []
161 |     for f in all_images:
162 |         fullpath = os.path.join(f[0], f[1])
163 |         if descriptors.get(fullpath.encode()) is not None:
164 |             files.append(fullpath)
165 |     BOW_annoy_map = {}
166 |     for i,f in enumerate(files):
167 |         BOW_annoy_map[i] = f
168 | 
169 |     index = AnnoyIndex(n_clusters, 'angular')
170 |     index.on_disk_build(os.path.join(base_path(), 'working/BOW_index.ann'))
171 | 
172 |     # add histograms to annoy index
173 |     print("computing histograms")
174 |     for i,f in enumerate(files):
175 |         r = compute_histograms(i, f, descriptors)
176 |         if not isinstance(r, Exception):
177 |             index.add_item(r[0], r[2])
178 |     
179 |     # build index
180 |     print("building index")
181 |     index.build(50)
182 | 
183 |     descriptors.sync()
184 |     descriptors.close()
185 | 
186 |     # save index map
187 |     print("saving annoy map")
188 |     joblib.dump(BOW_annoy_map, os.path.join(base_path(), 'working/BOW_annoy_map.pkl'))
189 | 
190 | if __name__ == '__main__':
191 |     gen_cbir()
192 | 


--------------------------------------------------------------------------------
/src/web_server.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta, datetime, timezone
  2 | from flask import Flask, make_response, request, jsonify, send_from_directory
  3 | from image_search import id2ts, image_search, image_search_cache
  4 | from sc_exceptions import *
  5 | from sc_helpers import *
  6 | from werkzeug.exceptions import HTTPException
  7 | import hashlib
  8 | import os
  9 | import random
 10 | import redis
 11 | import requests_cache
 12 | import sqlite3
 13 | import sys
 14 | import tldextract
 15 | import traceback
 16 | import urllib
 17 | import yaml
 18 | 
 19 | UPLOAD_FOLDER = 'uploads'
 20 | try:
 21 |     os.mkdir(UPLOAD_FOLDER)
 22 | except:
 23 |     pass
 24 | ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif'])
 25 | 
 26 | app = Flask(__name__)
 27 | app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 28 | app.config['MAX_CONTENT_LENGTH'] = 15 * 1024 * 1024
 29 | app.jinja_env.trim_blocks = True
 30 | app.jinja_env.lstrip_blocks = True
 31 | 
 32 | redis_db = redis.Redis(host='localhost', port=6379, db=0)
 33 | 
 34 | def urlescape(url):
 35 |     return urllib.parse.quote(url, safe='')
 36 | 
 37 | # calculate sha256 hash for file
 38 | def sha256(filename):
 39 |     filename = os.getcwd() + filename
 40 |     with open(filename,"rb") as f:
 41 |         bytes = f.read() # read entire file as bytes
 42 |         return hashlib.sha256(bytes).hexdigest()
 43 | 
 44 | app.jinja_env.globals.update(sha256=sha256)
 45 | app.jinja_env.globals.update(urlescape=urlescape)
 46 | 
 47 | # parse config.yaml
 48 | try:
 49 |     path = config_file_path()
 50 |     with open(path) as f:
 51 |         config = yaml.safe_load(f)
 52 | except IOError:
 53 |     print("error loading config file")
 54 |     sys.exit(1)
 55 | try:
 56 |     users = config['users']
 57 |     media_dir = config['media_dir']
 58 | except KeyError:
 59 |     print("could not parse users file")
 60 |     sys.exit(1)
 61 | 
 62 | req_expire_after = timedelta(seconds=600)
 63 | cached_req_session = requests_cache.CachedSession('sc_cache', backend='sqlite', expire_after=req_expire_after)
 64 | 
 65 | @app.after_request
 66 | def add_header(response):
 67 |     if response.mimetype == 'text/html':
 68 |         response.cache_control.public = True
 69 |         response.cache_control.max_age = 300 # 5 minutes
 70 |         response.cache_control.must_revalidate = True
 71 |         pass
 72 |     else:
 73 |         response.cache_control.public = True
 74 |         response.cache_control.max_age = 2678400 # 31 days
 75 |         response.cache_control.must_revalidate = False
 76 |     return response
 77 | 
 78 | 
 79 | @app.errorhandler(HTTPException)
 80 | def handle_exception(e):
 81 |     """Generic http error handler"""
 82 |     if request.full_path == '/' or request.full_path == '/?':
 83 |         return render_page('sourcecatcher.html')
 84 | 
 85 |     print(e)
 86 | 
 87 |     error_msg = f'<div class="error_code">{e.code} {e.name}</div><br>{e.description}'
 88 |     kwargs = {
 89 |             'embed': None,
 90 |             'app': False,
 91 |             'app_direct_image': False,
 92 |             'results': True,
 93 |             'error_msg': error_msg,
 94 |             'page_title': 'Error',
 95 |             }
 96 |     return render_page('error.html', code=e.code, **kwargs)
 97 | 
 98 | @app.errorhandler(413)
 99 | def entity_too_large(e):
100 |     """Error page if uploaded file is too large"""
101 |     kwargs = {
102 |             'app': False,
103 |             'app_direct_image': False,
104 |             'results': True,
105 |             'error_msg': EntityTooLarge().__str__(),
106 |             'page_title': 'Error',
107 |             }
108 |     return render_page('error.html', **kwargs)
109 | 
110 | 
111 | @app.route('/upload', methods=['POST'])
112 | def upload():
113 |     f = request.files['file']
114 |     filename = '{:016x}'.format(random.randint(0, 1<<128))
115 |     path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
116 |     f.save(path)
117 |     html = find_and_render('file', path)
118 | 
119 |     # remove old files
120 |     uploads = ['{}/{}'.format(app.config['UPLOAD_FOLDER'], n) for n in os.listdir(app.config['UPLOAD_FOLDER'])]
121 |     for file in uploads:
122 |         os.remove(file)
123 | 
124 |     return html
125 | 
126 | 
127 | @app.route('/')
128 | def root():
129 |     return render_page('sourcecatcher.html')
130 | 
131 | 
132 | @app.route('/link')
133 | def link():
134 |     url = request.args.get('url')
135 |     return find_and_render('url', url)
136 | 
137 | 
138 | @app.route('/twitter_users')
139 | def twitter_users():
140 |     """Show list of indexed twitter users"""
141 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
142 |     c = conn.cursor()
143 | 
144 |     # get all users
145 |     c.execute('SELECT user FROM users')
146 |     users = c.fetchall()
147 | 
148 |     # get deleted users
149 |     try:
150 |         c.execute('SELECT user FROM deleted_users')
151 |         deleted_users = [x[0] for x in c.fetchall()]
152 |     except:
153 |         deleted_users = []
154 | 
155 |     c.close()
156 | 
157 |     # combine users and deleted users
158 |     users = [(tup[0], tup[0] not in deleted_users) for tup in sorted(users)]
159 | 
160 |     user_count = len(users)
161 |     kwargs = {
162 |             'users': users,
163 |             'user_count': user_count,
164 |             'page_title': 'Indexed Twitter Users',
165 |             }
166 | 
167 |     return render_page('twitter_users.html', **kwargs)
168 | 
169 | 
170 | @app.route("/twitter_users.csv")
171 | def twitter_users_csv():
172 |     conn = sqlite3.connect(os.path.join(base_path(), 'live/twitter_scraper.db'))
173 |     c = conn.cursor()
174 | 
175 |     c.execute("""SELECT users.user, users.last_id, (deleted_users.user is NULL)
176 |               FROM users
177 |               LEFT JOIN deleted_users ON users.user=deleted_users.user
178 |               ORDER BY LOWER(users.user)""")
179 |     data = c.fetchall()
180 |     c.close()
181 | 
182 |     csv = ""
183 |     for (user, last_id, is_active) in data:
184 |         active_str = "active" if is_active else "inactive"
185 |         time = datetime.fromtimestamp(id2ts(last_id), tz=timezone.utc).isoformat()
186 |         csv += f"{user},https://twitter.com/{user}/,{active_str},{time}\n"
187 |     response = make_response(csv)
188 |     response.mimetype = "text/plain"
189 |     return response
190 | 
191 | 
192 | def find_and_render(location, path):
193 |     """Try to find a matching image and render the results webpage"""
194 |     error_msg = None
195 |     error_reasons = None
196 |     error_link = None
197 |     warning_msg = None
198 |     code = 200
199 | 
200 |     try:
201 |         # return error if url is for DC app
202 |         if location == 'url':
203 |             extract = tldextract.extract(path)
204 |             if extract.subdomain == 'dreamcatcher' and \
205 |                     extract.domain == 'candlemystar' and \
206 |                     extract.suffix == 'com':
207 |                 raise SCError('DC App has closed and is no longer supported')
208 | 
209 |         # clear image_search() lru cache if database was updated
210 |         db_mtime = os.path.getmtime(os.path.join(base_path(), 'live/twitter_scraper.db'))
211 |         if redis_db.get("db_mtime") != bytes(str(db_mtime), "utf-8"):
212 |             print("clearing image_search() cache")
213 |             image_search_cache.clear()
214 |         redis_db.set("db_mtime", str(db_mtime))
215 | 
216 |         # find matching results
217 |         ret_kwargs = image_search(location, path)
218 | 
219 |         return render_page('match_results.html', **ret_kwargs)
220 | 
221 |     except TWError as e:
222 |         error_msg = str(e)
223 |         error_link = e.link
224 |         print(e)
225 | 
226 |     except NoMatchesFound as e:
227 |         error_msg = str(e)
228 |         error_reasons = e.reasons()
229 |         code = 404
230 |         print(e)
231 | 
232 |     except SCError as e:
233 |         error_msg = str(e)
234 |         code = 400
235 |         print(e)
236 | 
237 |     except Exception as e:
238 |         error_msg = "An unknown error occurred"
239 |         traceback.print_exc()
240 |         code = 500
241 |         print(e)
242 | 
243 |     kwargs = {
244 |             'error_msg': error_msg,
245 |             'error_reasons': error_reasons,
246 |             'error_link': error_link,
247 |             'warning_msg': warning_msg,
248 |             'page_title': 'Error',
249 |             'code': code,
250 |             }
251 | 
252 |     if location == 'url':
253 |         kwargs['url'] = path
254 | 
255 |     # did not find any matches
256 |     return render_page('error.html', **kwargs)
257 | 


--------------------------------------------------------------------------------
/src/bot.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from datetime import datetime
  3 | from multiprocessing.pool import ThreadPool
  4 | from PIL import Image
  5 | from sc_helpers import *
  6 | from threading import Lock
  7 | import dateutil.parser as dt_parser
  8 | import json
  9 | import os
 10 | import piexif
 11 | import requests
 12 | import shutil
 13 | import sqlite3
 14 | import subprocess
 15 | import sys
 16 | import yaml
 17 | 
 18 | def mkdir(time_str):
 19 |     """create a directory for a given time
 20 | 
 21 |     Example:
 22 |     Given 'Sat Dec 14 04:35:55 +0000 2013', creates media_dir/2013/12/
 23 |     
 24 |     Arguments:
 25 |     time_str: time string returned by twitter api
 26 |     """
 27 |     date = dt_parser.parse(time_str)
 28 |     year = '{:04d}'.format(date.year)
 29 |     month = '{:02d}'.format(date.month)
 30 | 
 31 |     path = os.path.join(media_dir, year)
 32 |     path = os.path.join(path, month)
 33 |     if not os.path.exists(path):
 34 |         try:
 35 |             os.makedirs(path)
 36 |         except FileExistsError:
 37 |             pass
 38 | 
 39 |     return path, date
 40 | 
 41 | def download_media(url, path):
 42 |     """downloads media to path"""
 43 |     filename = url.split('/')[-1]
 44 |     path = os.path.join(path, filename)
 45 | 
 46 |     if (os.path.exists(path)):
 47 |         print('\talready downloaded {}'.format(path))
 48 |         return filename
 49 | 
 50 |     for _ in range(10):
 51 |         try:
 52 |             with requests.get(url, stream=True, timeout=10) as r:
 53 |                 if r.status_code != 200:
 54 |                     return None
 55 |                 with open(path, 'wb') as f:
 56 |                     print('\tdownloading to {}'.format(path))
 57 |                     shutil.copyfileobj(r.raw, f)
 58 |         except requests.exceptions.Timeout:
 59 |             continue
 60 |         break
 61 | 
 62 |     return filename
 63 | 
 64 | def write_exif_date(path, filename, date):
 65 |     """write exif date to an image"""
 66 |     date_str = date.strftime('%Y:%m:%d %k:%M:%S')
 67 |     fullpath = os.path.join(path, filename)
 68 | 
 69 |     exif_ifd = {piexif.ExifIFD.DateTimeOriginal: date_str}
 70 |     exif_dict = {'Exif': exif_ifd}
 71 |     exif_bytes = piexif.dump(exif_dict)
 72 |     im = Image.open(fullpath)
 73 |     im.save(fullpath, exif=exif_bytes)
 74 | 
 75 | 
 76 | def download_tweet_media(tweet):
 77 |     """try to download images linked in tweet"""
 78 |     if "images" in tweet:
 79 |         images = tweet["images"]
 80 |     elif "photos" in tweet:
 81 |         images = [image["url"] for image in tweet["photos"]]
 82 |     else:
 83 |         images = []
 84 | 
 85 |     for image in images:
 86 |         print('{}/{}:'.format(tweet['user']['screen_name'], tweet['id_str']))
 87 |         # tweet contains pictures
 88 |         path, date = mkdir(tweet['created_at'])
 89 |         filename = download_media(image, path)
 90 |         if filename is None:
 91 |             return
 92 |         try:
 93 |             write_exif_date(path, filename, date)
 94 |         except OSError:
 95 |             try:
 96 |                 os.remove(os.path.join(path, filename))
 97 |             except:
 98 |                 pass
 99 |             filename = download_media(image, path)
100 |             if filename is None:
101 |                 return
102 | 
103 |         # add info
104 |         with lock:
105 |             try:
106 |                 c.execute('INSERT INTO info VALUES (?,?,?,?)',
107 |                         (filename, path, tweet['user']['screen_name'], tweet['id_str']))
108 |             except sqlite3.IntegrityError:
109 |                 pass
110 | 
111 |     try:
112 |         if 'full_text' in tweet:
113 |             text_field = 'full_text'
114 |         else:
115 |             text_field = 'text'
116 | 
117 |         with lock:
118 |             c.execute('INSERT INTO tweet_text VALUES (?,?)', (tweet['id_str'], tweet[text_field]))
119 | 
120 |         # add hashtags
121 |         # with lock:
122 |         #     for hashtag in tweet['entities']['hashtags']:
123 |         #         c.execute('INSERT INTO hashtags VALUES (?,?)', (hashtag['text'], tweet['id_str']))
124 |     except sqlite3.IntegrityError:
125 |         pass
126 | 
127 |     with lock:
128 |         conn.commit()
129 | 
130 | def download_tweet(tweet):
131 |     # skip if tweet is actually a retweet
132 |     # if tweet["retweet"]:
133 |     #     return tweet
134 | 
135 |     # download tweet media
136 |     download_tweet_media(tweet)
137 | 
138 |     return tweet
139 | 
140 | 
141 | def create_users_list(config):
142 |     # Also read a list of additional users from another file
143 |     users = OrderedDict([(user, None) for user in config["users"]])
144 |     if "additional_users_files" in config:
145 |         for additional_users_file in config["additional_users_files"]:
146 |             with open(additional_users_file) as f:
147 |                 for additional_user in f:
148 |                     user = additional_user.strip()
149 |                     if len(user) > 0:
150 |                         users[user] = None
151 |     return [user for (user, _) in users.items()]
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     # parse config.yaml
156 |     try:
157 |         path = config_file_path()
158 |         with open(path) as f:
159 |             config = yaml.safe_load(f)
160 |     except IOError:
161 |         print("error loading config file")
162 |         sys.exit(1)
163 | 
164 |     users = create_users_list(config)
165 |     media_dir = config['media_dir']
166 | 
167 |     lock = Lock()
168 | 
169 |     conn = sqlite3.connect(os.path.join(base_path(), 'working/twitter_scraper.db'), check_same_thread=False)
170 |     c = conn.cursor()
171 |     with lock:
172 |         c.execute('CREATE TABLE IF NOT EXISTS users (user text, last_id int64, UNIQUE (user))')
173 |         c.execute('CREATE TABLE IF NOT EXISTS info (filename text, path text, user text, id int64, UNIQUE (filename, path))')
174 |         c.execute('CREATE INDEX IF NOT EXISTS id ON info(id)')
175 |         c.execute('CREATE TABLE IF NOT EXISTS tweet_text (id int64, text text, UNIQUE (id))')
176 |         c.execute('CREATE TABLE IF NOT EXISTS hashtags (hashtag text, id int64, UNIQUE (hashtag, id))')
177 |         c.execute('CREATE TABLE IF NOT EXISTS deleted_users (user text, UNIQUE (user))')
178 | 
179 | 
180 |     #
181 |     # DISCORD SCRAPER
182 |     #
183 | 
184 |     # Set up discord scraper
185 |     if "scrape_discord" in config and config["scrape_discord"]:
186 |         process_args = ["sourcecatcher-discord-scraper", os.path.join(base_path(), "config/config-discord.toml") ]
187 |         process = subprocess.Popen(process_args, stdout=subprocess.PIPE)
188 |         assert process.stdout is not None
189 | 
190 |         # Download and process tweets
191 |         with ThreadPool(20) as pool:
192 |             for tweet in pool.imap(download_tweet, map(json.loads, process.stdout)):
193 |                 pass
194 | 
195 |         # Prune discord-scraper process
196 |         process.wait(10)
197 |         if process.returncode != 0:
198 |             print(f"discord-scraper non-zero exit code: {process.returncode}")
199 |             sys.exit(1)
200 | 
201 | 
202 |     #
203 |     # NITTER SCRAPER
204 |     #
205 |     nitter_instance = config["nitter_instance"]
206 |     if nitter_instance is not None:
207 |         for i, user in enumerate(users):
208 |             user = user.lower()
209 |             # find the last read tweet
210 |             with lock:
211 |                 c.execute('SELECT last_id FROM users WHERE user=?', (user,))
212 |             last_id = c.fetchone()
213 |             first_id = None
214 | 
215 |             # Set up nitter-scraper arguments
216 |             process_args = ["nitter-scraper", nitter_instance, "--skip-retweets", "--reorder-pinned" ]
217 |             if last_id is not None:
218 |                 last_id = last_id[0]
219 |                 # Set min-id arg if we have seen this user before
220 |                 process_args.extend(["--min-id", str(last_id + 1)])
221 |             else:
222 |                 last_id = 0
223 |             process_args.extend([ f"user-media", user ])
224 |             print(f'nitter-scraper checking {user} for new tweets from id {last_id}')
225 |             print(f"{process_args}")
226 | 
227 |             process = subprocess.Popen(process_args, stdout=subprocess.PIPE)
228 |             assert process.stdout is not None
229 | 
230 |             # Download and process tweets
231 |             with ThreadPool(20) as pool:
232 |                 for tweet in pool.imap(download_tweet, map(json.loads, process.stdout)):
233 |                     assert str(tweet["id"]) == tweet["id_str"]
234 |                     last_id = max(last_id, int(tweet["id_str"]))
235 | 
236 |             # update last tweet read
237 |             with lock:
238 |                 try:
239 |                     c.execute('INSERT INTO users VALUES (?,?)', (user, last_id))
240 |                 except sqlite3.IntegrityError:
241 |                     c.execute('UPDATE users SET last_id=(?) WHERE user=(?)', (last_id ,user))
242 |                 conn.commit()
243 | 
244 |             # Prune nitter-scraper process
245 |             process.wait(10)
246 |             if process.returncode == 10:
247 |                 # This account doesn't exist
248 |                 pass
249 |             elif process.returncode != 0:
250 |                 print(f"nitter-scraper non-zero exit code: {process.returncode} (non-fatal)")
251 |             else:
252 |                 # OK
253 |                 pass
254 | 


--------------------------------------------------------------------------------
/src/static/sourcecatcher.css:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   --bg-color: #fff;
  3 |   --fg-color: #000;
  4 |   --footer-bg-color: #e0e0e0;
  5 |   --dc-yellow: #F2C763;
  6 |   --error-color: #C0000C;
  7 |   --error-bg-color: #FFD2D2;
  8 |   --error-code: #A0000C;
  9 |   --warning-color: #8F6000;
 10 |   --warning-bg-color: #FEEFB3;
 11 |   --link-color: #0000ee;
 12 |   --link-visited-color: #551a8b;
 13 |   --link_input-bg-color: #fcfcfc;
 14 |   --search-border-color: lightgray;
 15 |   --btn-border-color: lightgray;
 16 |   --fg2-color: #697882;
 17 |   --box-shadow-color: rgba(0,0,0,0.2);
 18 |   --tweet-hover-bg-color: #f8f8f8;
 19 |   --theme-switch-bg-color: #474842;
 20 |   --nav-hover-color: var(--dc-yellow);
 21 |   --moon-color: #F5F3CE;
 22 |   --sun-color: #FDB813;
 23 |   --tweet-bg-color: var(--bg-color);
 24 | }
 25 | 
 26 | [data-theme="dark"] {
 27 |   --bg-color: #272822;
 28 |   --fg-color: #e1e1da;
 29 |   --footer-bg-color: #111111;
 30 |   --link-color: #66d9ef;
 31 |   --link-visited-color: #ae81ff;
 32 |   --link_input-bg-color: #474842;
 33 |   --search-border-color: #222222;
 34 |   --btn-border-color: darkgray;
 35 |   --fg2-color: #92a0aa;
 36 |   --box-shadow-color: rgba(0,0,0,0.4);
 37 |   --tweet-hover-bg-color: #373832;
 38 |   --tweet-bg-color: #2f302a;
 39 | }
 40 | 
 41 | header {
 42 |   position: absolute;
 43 |   bottom: 0;
 44 |   width: 100%;
 45 |   height: 50px;
 46 |   line-height: 50px;
 47 |   text-align: center;
 48 |   background-color: rgba(0,0,0,0.4);
 49 | }
 50 | .brand {
 51 |   width: auto;
 52 |   height: 100%;
 53 |   float: left;
 54 |   margin: 0 10px;
 55 | }
 56 | .brand a{
 57 |   font-size: 50px;
 58 |   font-weight: bold;
 59 |   color: var(--dc-yellow);
 60 |   text-decoration: none;
 61 | }
 62 | .brand a:visited {
 63 |   color: var(--dc-yellow);
 64 | }
 65 | .brand a:hover {
 66 |   text-decoration: none;
 67 | }
 68 | .brand a::after {
 69 |   content: "Sourcecatcher";
 70 | }
 71 | .menu {
 72 |   height: 100%;
 73 |   float: right;
 74 | }
 75 | .menu ul {
 76 |   width: 100%;
 77 |   height: inherit;
 78 |   margin: 0;
 79 |   padding: 0;
 80 |   display: flex;
 81 |   justify-content: flex-end;
 82 |   list-style: none;
 83 | }
 84 | .menu ul li a {
 85 |   padding: 0 15px;
 86 |   height: inherit;
 87 |   color: #fff;
 88 |   display: inline-block;
 89 |   outline: none;
 90 | }
 91 | .menu ul li a:focus,
 92 | .menu ul li a:hover {
 93 |   background-color: var(--nav-hover-color);
 94 |   text-decoration: none;
 95 |   color: var(--bg-color);
 96 | }
 97 | .menu ul li a:focus svg,
 98 | .menu ul li a:hover svg {
 99 |   fill: var(--bg-color);
100 | }
101 | .menu ul li a svg {
102 |   fill: #e1e1da;
103 | }
104 | #menuToggle {
105 |   display: none;
106 |   width: 1px;
107 |   height: 1px;
108 |   overflow: hidden;
109 |   clip: rect(0,0,0,0);
110 |   padding: 0;
111 |   position: absolute !important;
112 |   white-space: nowrap;
113 |   border: 0;
114 |   opacity: 0;
115 | }
116 | .menu-icon {
117 |   display: none;
118 | }
119 | .menu-icon:hover {
120 |   cursor: pointer;
121 | }
122 | @media screen and (max-width: 1000px) {
123 |   .menu * {
124 |     display: relative;
125 |     z-index: 5;
126 |   }
127 |   .menu {
128 |     width: 100%;
129 |     height: auto;
130 |     margin-bottom: 20px;
131 |   }
132 |   .menu ul {
133 |     display: block;
134 |     max-height: 0;
135 |     overflow: hidden;
136 |     transition: max-height 0.2s ease-out;
137 |   }
138 |   .menu ul li a svg {
139 |     fill: var(--fg-color);
140 |   }
141 |   .menu ul li,
142 |   .menu ul li a {
143 |     text-align: left;
144 |     width: 100%;
145 |     height: 50px;
146 |     background-color: var(--footer-bg-color);
147 |     color: var(--fg-color);
148 |   }
149 |   .menu ul li a:focus li,
150 |   .menu ul li a:hover li {
151 |     background-color: var(--nav-hover-color);
152 |     color: var(--bg-color);
153 |   }
154 |   .menu ul li a:focus svg,
155 |   .menu ul li a:hover svg {
156 |     fill: var(--bg-color);
157 |   }
158 |   .menu-icon {
159 |     position: absolute;
160 |     top: 0;
161 |     right: 0;
162 |     display: block;
163 |   }
164 |   .menu-icon * {
165 |     display: block;
166 |   }
167 |   #menuToggle {
168 |     display: block;
169 |   }
170 |   #menuToggle:not(:checked) ~ ul {
171 |     display: none;
172 |   }
173 |   #menuToggle:checked ~ ul {
174 |     display: block;
175 |     max-height: 350px;
176 |   }
177 |   #menuToggle:focus + label {
178 |     outline: 1px dotted lightgray;
179 |     outline: 1px auto -webkit-focus-ring-color;
180 |   }
181 | }
182 | 
183 | .hamburger {
184 |   margin: 10px;
185 |   fill: #e1e1da;
186 |   width: 30px;
187 |   height: 30px;
188 | }
189 | 
190 | .preload * {
191 |   -webkit-transition: none !important;
192 |   -moz-transition: none !important;
193 |   -o-transition: none !important;
194 |   transition: none !important;
195 | }
196 | 
197 | .theme-switch {
198 |   position: relative;
199 |   display: inline-block;
200 |   width: 60px;
201 |   height: 34px;
202 |   margin: 8px 10px;
203 |   transform: scale(0.75);
204 | }
205 | 
206 | #theme-switch-toggle > input[name="state-theme"] {
207 |   width: 1px;
208 |   height: 1px;
209 |   overflow: hidden;
210 |   clip: rect(0,0,0,0);
211 |   padding: 0;
212 |   position: absolute !important;
213 |   white-space: nowrap;
214 |   border: 0;
215 |   opacity: 0;
216 | }
217 | 
218 | .slider {
219 |   position: absolute;
220 |   cursor: pointer;
221 |   top: 0;
222 |   left: 0;
223 |   right: 0;
224 |   bottom: 0;
225 |   border-radius: 34px;
226 |   background-color: var(--theme-switch-bg-color);
227 | }
228 | 
229 | .slider-icon {
230 |   position: absolute;
231 |   content: "";
232 |   height: 26px;
233 |   width: 26px;
234 |   left: 17px;
235 |   right: 17px;
236 |   bottom: 4px;
237 |   border-radius: 50%;
238 |   -webkit-transition: .4s;
239 |   transition: .4s;
240 | }
241 | 
242 | .slider-icon[data-theme="auto"] {
243 |   box-shadow: inset -7px -3px var(--moon-color);
244 |   background-color: var(--sun-color);
245 | }
246 | 
247 | .slider-icon[data-theme="light"] {
248 |   -webkit-transform: translateX(-13px);
249 |   -ms-transform: translateX(-13px);
250 |   transform: translateX(-13px);
251 |   background-color: var(--sun-color);
252 | }
253 | 
254 | .slider-icon[data-theme="dark"] {
255 |   -webkit-transform: translateX(13px);
256 |   -ms-transform: translateX(13px);
257 |   transform: translateX(13px);
258 |   box-shadow: inset -7px -3px var(--moon-color);
259 |   background-color: var(--theme-switch-bg-color);
260 | }
261 | 
262 | html, body {
263 |   margin: 0;
264 |   padding: 0;
265 |   height: 100%;
266 |   font-family: sans-serif;
267 |   background-color: var(--bg-color);
268 |   color: var(--fg-color);
269 | }
270 | 
271 | body {
272 |   margin: 0;
273 |   padding: 0;
274 |   height: 100%;
275 |   display: flex;
276 |   flex-direction: column;
277 | }
278 | 
279 | #dnd-modal {
280 |   position: fixed;
281 |   left: 0;
282 |   top: 0;
283 |   width: 100%;
284 |   height: 100%;
285 |   background-color: rgba(0, 0, 0, 0.8);
286 |   display: none;
287 |   z-index: 2;
288 | }
289 | 
290 | #dnd-modal-content {
291 |   position: fixed;
292 |   top: 50%;
293 |   left: 50%;
294 |   width: 500px;
295 |   max-width: 95%;
296 |   max-height: 95%;
297 |   -webkit-transform: translate(-50%, -50%);
298 |   transform: translate(-50%, -50%);
299 |   background-color: var(--bg-color);
300 |   border-radius: 5px;
301 |   padding: 30px 10px;
302 |   text-align: center;
303 |   font-size: 1.5em;
304 | }
305 | 
306 | .banner {
307 |   width: 100%;
308 |   height: 150px;
309 |   position: relative;
310 |   background-color: #101820;
311 |   background-image: url("moon.png");
312 |   background-repeat: no-repeat;
313 |   background-position: center;
314 |   margin: 0 auto;
315 | }
316 | 
317 | #content {
318 |   flex: 1 0 auto;
319 | }
320 | 
321 | #footer_wrapper {
322 |   overflow: hidden;
323 |   flex-shrink: 0;
324 | }
325 | 
326 | #footer {
327 |   background-color: var(--footer-bg-color);
328 |   display: flex;
329 |   flex-wrap: wrap;
330 |   justify-content: space-between;
331 |   margin: 0 -5px;
332 | }
333 | 
334 | .footer_text {
335 |   margin: 5px 15px;
336 | }
337 | 
338 | h2 {
339 |   text-align: center;
340 |   margin: 0 auto 20px auto;
341 | }
342 | 
343 | h3 {
344 |   text-align: center;
345 |   font-weight: normal;
346 | }
347 | 
348 | a {
349 |   text-decoration: none;
350 |   color: var(--link-color);
351 | }
352 | 
353 | a:visited {
354 |   color: var(--link-visited-color);
355 | }
356 | 
357 | a:hover {
358 |   text-decoration: underline;
359 | }
360 | 
361 | .main {
362 |   padding: 20px;
363 |   margin: auto;
364 |   width: 95%;
365 |   margin: 0 auto;
366 | }
367 | 
368 | .description {
369 |   font-size: 1.25em;
370 |   max-width: 600px;
371 |   text-align: center;
372 |   margin: 0 auto;
373 | }
374 | 
375 | .news {
376 |   font-size: 1em;
377 |   max-width: 600px;
378 |   text-align: justify;
379 |   margin: 0 auto;
380 | }
381 | 
382 | .inputs {
383 |   max-width: 600px;
384 |   margin: 2em auto;
385 |   display: flex;
386 |   flex-direction: column;
387 | }
388 | 
389 | .search_wrap {
390 |   margin: 10px auto;
391 |   width: 500px;
392 |   max-width: 100%;
393 | }
394 | 
395 | form {
396 |   text-align: center;
397 |   display: flex;
398 |   justify-content: center;
399 |   width: 100%;
400 | }
401 | 
402 | #file_upload_form {
403 |   flex-wrap: wrap;
404 |   margin: 10px auto;
405 | }
406 | 
407 | .video_download,
408 | [type="file"] + label {
409 |   border: 1px solid var(--btn-border-color);
410 |   border-radius: 5px;
411 |   cursor: pointer;
412 |   display: none;
413 |   font-size: inherit;
414 |   margin-bottom: 1rem;
415 |   outline: none;
416 |   padding: 10px 20px;
417 |   position: relative;
418 |   vertical-align: middle;
419 | }
420 | 
421 | .video_download:focus,
422 | .video_download:hover,
423 | [type="file"]:focus + label,
424 | [type="file"] + label:hover {
425 |   background-color: var(--dc-yellow);
426 |   color: var(--bg-color);
427 |   text-decoration: none;
428 | }
429 | 
430 | #link_input {
431 |   width: 100%;
432 |   padding: 10px;
433 |   float: left;
434 |   border-top-left-radius: 5px;
435 |   border-bottom-left-radius: 5px;
436 |   border-left: 1px solid var(--search-border-color);
437 |   border-top: 1px solid var(--search-border-color);
438 |   border-bottom: 1px solid var(--search-border-color);
439 |   border-right: none;
440 |   color: var(--fg-color);
441 |   background-color: var(--link_input-bg-color);
442 | }
443 | 
444 | #link_submit_btn {
445 |   border-top: 1px solid var(--search-border-color);
446 |   border-top-right-radius: 5px;
447 |   border-bottom-right-radius: 5px;
448 |   border-right: 1px solid var(--search-border-color);
449 |   border-bottom: 1px solid var(--search-border-color);
450 |   float: left;
451 |   color: white;
452 |   border-left: none;
453 |   cursor: pointer;
454 |   background-color: var(--link_input-bg-color);
455 | }
456 | 
457 | #link_submit_btn.highlight_search,
458 | #link_submit_btn:focus,
459 | #link_submit_btn:hover {
460 |   background-color: var(--dc-yellow);
461 |   outline: none;
462 | }
463 | 
464 | .highlight_search svg *,
465 | #link_submit_btn:focus svg *,
466 | #link_submit_btn:hover svg * {
467 |   stroke: var(--bg-color);
468 | }
469 | 
470 | #search_icon {
471 |   width: 1.5em;
472 | }
473 | 
474 | input[type=text] {
475 |   box-sizing: border-box;
476 | }
477 | 
478 | #or {
479 |   text-align: center;
480 |   margin: 0;
481 | }
482 | 
483 | .results_header {
484 |   margin-top: 2em;
485 |   margin: 0 auto 20px auto;
486 |   max-width: 600px;
487 | }
488 | 
489 | .results {
490 |   margin: 0 auto 2em auto;
491 |   display: flex;
492 |   flex-wrap: wrap;
493 |   justify-content: center;
494 |   align-items: flex-start;
495 | }
496 | 
497 | .result {
498 |   border-style: solid;
499 |   border-width: 1px;
500 | }
501 | 
502 | .result_link {
503 |   display: block;
504 | }
505 | 
506 | *[id^='twitter-widget-'] {
507 |   margin: 5px;
508 |   text-align: left;
509 | }
510 | 
511 | .dc_app_header {
512 |   margin: auto;
513 |   width: 500px;
514 |   max-width: 100%;
515 | }
516 | 
517 | .dc_app_user {
518 |   display: flex;
519 | }
520 | 
521 | .dc_app_profile_pic {
522 |   border-radius: 50%;
523 |   height: 3em;
524 |   width: 3em;
525 |   object-fit: cover;
526 |   box-shadow: 0px 0px 5px var(--box-shadow-color);
527 | }
528 | 
529 | .dc_app_username {
530 |   font-weight: bold;
531 |   font-size: 1.25em;
532 |   margin: auto 0 auto 0.5em;
533 | }
534 | 
535 | .video_download {
536 |   text-align: center;
537 |   margin: 10px auto;
538 |   display: block;
539 |   width: max-content;
540 | }
541 | 
542 | .video_download {
543 |   color: var(--fg-color);
544 | }
545 | 
546 | .video_download:visited {
547 |   color: var(--fg-color);
548 | }
549 | 
550 | .video_download:hover {
551 |   color: var(--bg-color);
552 | }
553 | 
554 | 
555 | #video {
556 |   display: block;
557 |   width: 500px;
558 |   max-width: 100%;
559 |   max-height: 95vh;
560 |   margin: auto;
561 |   box-shadow: 0px 0px 5px var(--box-shadow-color);
562 | }
563 | 
564 | img.app_img {
565 |   width: 500px;
566 |   max-width: 100%;
567 |   min-height: 20px;
568 |   margin: 5px;
569 |   box-shadow: 0px 0px 5px var(--box-shadow-color);
570 | }
571 | 
572 | p.message {
573 |   font-size: 0.75em;
574 | }
575 | 
576 | .error {
577 |   text-align: center;
578 |   color: var(--error-color);
579 |   background-color: var(--error-bg-color);
580 |   margin: 20px auto;
581 |   padding: 10px;
582 |   border-radius: 5px;
583 |   max-width: 600px;
584 | }
585 | 
586 | .error_reasons {
587 |   text-align: left;
588 |   width: -webkit-fill-available;
589 |   width: -webkit-fit-content;
590 |   width: -moz-fit-content;
591 |   width: fit-content;
592 |   margin: 1em auto;
593 | }
594 | 
595 | .warning {
596 |   text-align: center;
597 |   color: var(--warning-color);
598 |   background-color: var(--warning-bg-color);
599 |   margin: 20px auto;
600 |   padding: 10px;
601 |   border-radius: 5px;
602 |   max-width: 600px;
603 | }
604 | 
605 | .error_code {
606 |   color: var(--error-code);
607 |   font-size: 1.5em;
608 | }
609 | 
610 | .twitter_user > p {
611 |   text-align: center;
612 | }
613 | 
614 | ul.users_list {
615 |   -webkit-column-width: 10em;
616 |   -moz-column-width: 10em;
617 |   column-width: 10em;
618 |   column-count: 8;
619 |   list-style-type: none;
620 |   padding: 0;
621 |   margin: 20px auto;
622 | }
623 | 
624 | .deleted_user:after {
625 |     content: "\274C";
626 | }
627 | 
628 | @media screen and (max-width: 400px) {
629 |   .brand a::after {
630 |     content: "SC";
631 |   }
632 | }
633 | @media screen and (max-width: 800px) {
634 |   .main {
635 |     width: 95%;
636 |     padding: 0;
637 |   }
638 | 
639 |   input {
640 |     font-size: 1em;
641 |   }
642 | 
643 |   button {
644 |     font-size: 1em;
645 |   }
646 | 
647 |   input[type=text] {
648 |     max-width: 100%;
649 |   }
650 | 
651 |   input[type=file] {
652 |     max-width: 100%;
653 |     -webkit-box-sizing: border-box;
654 |     -moz-box-sizing: border-box;
655 |     box-sizing: border-box;
656 |   }
657 | 
658 | }
659 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <https://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------