├── PaperSorter
├── __version__.py
├── data
│ └── __init__.py
├── __init__.py
├── static
│ ├── favicon.ico
│ ├── icons
│ │ ├── favicon-16x16.png
│ │ ├── favicon-32x32.png
│ │ ├── favicon-96x96.png
│ │ ├── android-icon-48x48.png
│ │ ├── android-icon-72x72.png
│ │ ├── android-icon-96x96.png
│ │ ├── apple-icon-180x180.png
│ │ ├── android-icon-144x144.png
│ │ └── android-icon-192x192.png
│ ├── css
│ │ ├── pages
│ │ │ ├── feedback.css
│ │ │ ├── feed_struct.css
│ │ │ └── paper_detail_similar.css
│ │ └── main.css
│ └── manifest.json
├── providers
│ ├── __init__.py
│ ├── openai_client.py
│ ├── base.py
│ └── factory.py
├── tasks
│ ├── __init__.py
│ └── serve.py
├── services
│ ├── __init__.py
│ └── summarization.py
├── db
│ └── __init__.py
├── templates
│ ├── settings_base.html
│ ├── error.html
│ ├── email
│ │ ├── paper_card.html
│ │ └── newsletter.txt
│ ├── 403.html
│ ├── partials
│ │ └── similar_section.html
│ ├── feedback_error.html
│ ├── feedback_success.html
│ └── settings.html
├── web
│ ├── __init__.py
│ ├── jobs
│ │ └── __init__.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── scholarly_article.py
│ │ └── semantic_scholar.py
│ ├── auth
│ │ ├── __init__.py
│ │ ├── decorators.py
│ │ └── models.py
│ ├── api
│ │ └── __init__.py
│ ├── utils
│ │ └── __init__.py
│ └── wsgi.py
├── cli
│ ├── __init__.py
│ ├── types.py
│ ├── context.py
│ ├── parser.py
│ └── base.py
├── notification
│ ├── __init__.py
│ ├── base.py
│ └── factory.py
├── __main__.py
├── log.py
├── utils
│ └── template_filters.py
└── config.py
├── docker
├── scripts
│ ├── wsgi.py
│ ├── entrypoint.sh
│ └── scheduler-entrypoint.sh
├── postgres
│ └── init.sql
├── cron
│ └── crontab
├── caddy
│ ├── Caddyfile
│ └── Caddyfile.prod
└── config.docker.yml
├── requirements.txt
├── MANIFEST.in
├── CHANGELOG.md
├── docs
├── requirements.txt
├── getting-started
│ └── index.rst
├── Makefile
├── user-guide
│ └── index.rst
├── admin-guide
│ └── index.rst
├── cli-reference
│ └── index.rst
├── tutorials
│ └── index.rst
├── api
│ ├── index.rst
│ └── modules.rst
├── changelog.md
├── README.md
├── reference
│ └── index.rst
├── conf.py
├── development
│ ├── database.rst
│ └── index.rst
└── index.rst
├── migrations
└── add-predicted-preferences-score-index.sql
├── examples
├── crontab.example
├── papersorter.service
├── cron-broadcast.sh
├── cron-update.sh
├── README.md
└── cron-combined.sh
├── .dockerignore
├── LICENSE.txt
├── setup.py
├── Dockerfile.scheduler
├── tests
└── db
│ └── test_manager.py
├── .gitignore
├── Dockerfile
├── .github
└── workflows
│ ├── docs.yml
│ ├── claude.yml
│ └── claude-code-review.yml
├── docker-compose.prod.yml
├── AGENTS.md
├── papersorter-cli
├── pyproject.toml
└── .env.example
/PaperSorter/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.9.0'
2 |
--------------------------------------------------------------------------------
/PaperSorter/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Database schema data module
2 |
--------------------------------------------------------------------------------
/PaperSorter/__init__.py:
--------------------------------------------------------------------------------
1 | from .__version__ import __version__
2 |
3 | __all__ = ['__version__']
4 |
--------------------------------------------------------------------------------
/PaperSorter/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/favicon.ico
--------------------------------------------------------------------------------
/PaperSorter/static/icons/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-16x16.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-32x32.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-96x96.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/android-icon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-48x48.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/android-icon-72x72.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-72x72.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/android-icon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-96x96.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/apple-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/apple-icon-180x180.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/android-icon-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-144x144.png
--------------------------------------------------------------------------------
/PaperSorter/static/icons/android-icon-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-192x192.png
--------------------------------------------------------------------------------
/PaperSorter/providers/__init__.py:
--------------------------------------------------------------------------------
1 | """Feed providers for PaperSorter."""
2 |
3 | from .base import FeedProvider, FeedItem
4 | from .rss import RSSProvider
5 |
6 | __all__ = ["FeedProvider", "FeedItem", "RSSProvider"]
7 |
8 |
--------------------------------------------------------------------------------
/docker/scripts/wsgi.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """WSGI entry point for gunicorn with Docker."""
3 | from PaperSorter.web.app import create_app
4 |
5 | # Create the application with the config path
6 | app = create_app("/app/config.yml")
--------------------------------------------------------------------------------
/PaperSorter/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "init",
3 | "update",
4 | "train",
5 | "predict",
6 | "broadcast",
7 | "serve",
8 | "test",
9 | "import",
10 | "labeling",
11 | "models",
12 | "embeddings",
13 | ]
14 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | feedparser>=6.0
2 | numpy>=1.20
3 | openai>=1.30
4 | pandas>=2.0
5 | psycopg2-binary>=2.9
6 | pgvector>=0.2.0
7 | PyYAML>=6.0
8 | requests>=2.7.0
9 | scikit-learn>=1.4
10 | scipy>=1.10
11 | xgboost>2.0
12 | Flask>=2.0
13 | Flask-Login>=0.6.0
14 | Authlib>=1.2.0
15 | markdown2>=2.4.0
16 |
--------------------------------------------------------------------------------
/PaperSorter/services/__init__.py:
--------------------------------------------------------------------------------
1 | """Service-layer helpers for PaperSorter."""
2 |
3 | from .feed_prediction import ( # noqa: F401
4 | FeedPredictionService,
5 | FeedPredictor,
6 | refresh_embeddings_and_predictions,
7 | )
8 |
9 | __all__ = [
10 | "FeedPredictionService",
11 | "FeedPredictor",
12 | "refresh_embeddings_and_predictions",
13 | ]
14 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include requirements.txt
4 | include pyproject.toml
5 | recursive-include PaperSorter/templates *.html *.txt
6 | recursive-include PaperSorter/static *.css *.js *.ico *.json *.png .gitkeep
7 | recursive-include PaperSorter/data *.py
8 | recursive-include examples *
9 | global-exclude __pycache__
10 | global-exclude *.py[co]
11 | global-exclude .DS_Store
12 | prune tests
13 | prune notebook
14 | prune tools
15 | prune old
16 | prune qbio
17 | prune qtest
18 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## Release 0.2 - 2024-06-05
2 |
3 | - Implement `init` command for database setup and bulk load. (ec34a60)
4 | - Improved Excel feedback formatting for better usability. (97f382c)
5 | - Fixed error when sending notifications for articles with long titles. (f5a7b50)
6 | - Cleaned up outputs for clearer log files. (c5d5de1)
7 | - Disabled URL unfurling in Slack messages by default. (f5a7b50)
8 | - Enabled custom model names in Slack messages. (82c4e28)
9 | - Removed a divider from Slack messages. (d56b1de)
10 |
11 | ## Release 0.1 - 2024-06-01
12 |
13 | - Initial release.
14 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Documentation dependencies for PaperSorter
2 | # Core Sphinx
3 | sphinx>=7.0.0
4 | sphinx-rtd-theme>=2.0.0
5 |
6 | # Markdown support
7 | myst-parser>=2.0.0
8 | linkify-it-py>=2.0.0 # Required for myst-parser linkify extension
9 |
10 | # Extensions for better documentation
11 | sphinx-autodoc-typehints>=1.25.0
12 | sphinx-click>=5.0.0
13 | sphinx-copybutton>=0.5.0
14 | sphinx-tabs>=3.4.0
15 |
16 | # Development tools
17 | sphinx-autobuild>=2021.3.14
18 | doc8>=1.1.0
19 |
20 | # For API documentation
21 | autodoc>=0.5.0
22 |
23 | # For better code highlighting
24 | pygments>=2.17.0
--------------------------------------------------------------------------------
/PaperSorter/static/css/pages/feedback.css:
--------------------------------------------------------------------------------
1 | /* Shared feedback page button styles */
2 |
3 | .btn {
4 | padding: 12px 24px;
5 | border: none;
6 | border-radius: 6px;
7 | font-size: 16px;
8 | font-weight: bold;
9 | cursor: pointer;
10 | text-decoration: none;
11 | display: inline-block;
12 | transition: all var(--transition-base);
13 | }
14 |
15 | .btn-primary {
16 | background: var(--color-primary);
17 | color: var(--text-white);
18 | }
19 |
20 | .btn-primary:hover {
21 | background: var(--btn-primary-hover);
22 | transform: translateY(-1px);
23 | box-shadow: 0 2px 5px var(--shadow-xl);
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/docker/scripts/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Web container entrypoint script
3 |
4 | set -e
5 |
6 | echo "Starting PaperSorter web service..."
7 | echo "Data directory: $PAPERSORTER_DATADIR"
8 | echo "Config file: $PAPERSORTER_CONFIG"
9 |
10 | # Wait for database to be ready
11 | echo "Waiting for database..."
12 | until pg_isready -h postgres -U ${POSTGRES_USER:-papersorter}; do
13 | echo "Database is unavailable - sleeping"
14 | sleep 2
15 | done
16 | echo "Database is ready!"
17 |
18 | # Create data directories if they don't exist
19 | mkdir -p /data/logs /data/models /data/posters
20 |
21 | # Execute the command
22 | exec "$@"
--------------------------------------------------------------------------------
/PaperSorter/db/__init__.py:
--------------------------------------------------------------------------------
1 | """Database access helpers for PaperSorter."""
2 |
3 | from .manager import (
4 | Connection,
5 | Cursor,
6 | DatabaseManager,
7 | DatabaseSession,
8 | OperationalError,
9 | PooledConnection,
10 | PoolConfig,
11 | RealDictCursor,
12 | execute_batch,
13 | errors,
14 | sql,
15 | )
16 |
17 | __all__ = [
18 | "Connection",
19 | "Cursor",
20 | "DatabaseManager",
21 | "DatabaseSession",
22 | "OperationalError",
23 | "PooledConnection",
24 | "PoolConfig",
25 | "RealDictCursor",
26 | "execute_batch",
27 | "errors",
28 | "sql",
29 | ]
30 |
--------------------------------------------------------------------------------
/migrations/add-predicted-preferences-score-index.sql:
--------------------------------------------------------------------------------
1 | -- Migration: add covering index for predicted_preferences by model/score/feed
2 | -- This speeds up feed listing when filtering by prediction score.
3 | --
4 | -- How to apply (cannot run inside a transaction because of CONCURRENTLY):
5 | -- psql -d your_database -f migrations/add-predicted-preferences-score-index.sql
6 | --
7 | -- Recommended: run during low traffic; CREATE INDEX CONCURRENTLY takes a bit longer
8 | -- but avoids long writes locks on the table.
9 |
10 | SET search_path TO papersorter;
11 |
12 | CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_predpref_model_score_feed
13 | ON papersorter.predicted_preferences (model_id, score DESC, feed_id);
14 |
--------------------------------------------------------------------------------
/docker/scripts/scheduler-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Scheduler entrypoint script
3 |
4 | set -e
5 |
6 | echo "Starting PaperSorter scheduler..."
7 | echo "Data directory: $PAPERSORTER_DATADIR"
8 | echo "Config file: $PAPERSORTER_CONFIG"
9 |
10 | # Wait for database to be ready
11 | echo "Waiting for database..."
12 | until pg_isready -h postgres -U ${POSTGRES_USER:-papersorter}; do
13 | echo "Database is unavailable - sleeping"
14 | sleep 2
15 | done
16 | echo "Database is ready!"
17 |
18 | # Create log directory if it doesn't exist
19 | mkdir -p /data/logs
20 |
21 | # Ensure cron environment has access to environment variables
22 | printenv | grep -E '^(DATABASE_URL|PAPERSORTER_|OPENAI_|EMBEDDING_|PATH)' > /etc/environment
23 |
24 | # Start cron in foreground
25 | echo "Starting cron daemon..."
26 | cron -f
--------------------------------------------------------------------------------
/PaperSorter/templates/settings_base.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block header_actions %}
4 |
8 | {{ super() }}
9 | {% endblock %}
10 |
11 | {% block styles %}
12 |
13 |
17 | {% endblock %}
18 |
19 | {% block content %}
20 |
21 | {% block settings_content %}{% endblock %}
22 |
23 |
24 | {% block modals %}{% endblock %}
25 | {% endblock %}
26 |
27 | {% block scripts %}
28 | {% block page_scripts %}{% endblock %}
29 | {% endblock %}
30 |
--------------------------------------------------------------------------------
/PaperSorter/static/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "PaperSorter",
3 | "short_name": "PaperSorter",
4 | "description": "Academic paper recommendation system",
5 | "start_url": "/",
6 | "display": "standalone",
7 | "background_color": "#ffffff",
8 | "theme_color": "#1976d2",
9 | "icons": [
10 | {
11 | "src": "/static/icons/android-icon-48x48.png",
12 | "sizes": "48x48",
13 | "type": "image/png"
14 | },
15 | {
16 | "src": "/static/icons/android-icon-72x72.png",
17 | "sizes": "72x72",
18 | "type": "image/png"
19 | },
20 | {
21 | "src": "/static/icons/android-icon-96x96.png",
22 | "sizes": "96x96",
23 | "type": "image/png"
24 | },
25 | {
26 | "src": "/static/icons/android-icon-144x144.png",
27 | "sizes": "144x144",
28 | "type": "image/png"
29 | },
30 | {
31 | "src": "/static/icons/android-icon-192x192.png",
32 | "sizes": "192x192",
33 | "type": "image/png"
34 | }
35 | ]
36 | }
--------------------------------------------------------------------------------
/examples/crontab.example:
--------------------------------------------------------------------------------
1 | # Example crontab entries for PaperSorter
2 | # Add these lines to your crontab with: crontab -e
3 | # Adjust paths and schedules according to your needs
4 |
5 | # Option 1: Recommended - Separate update and broadcast tasks
6 | # Update every 3 hours
7 | 0 */3 * * * /path/to/papersorter/examples/cron-update.sh
8 |
9 | # Broadcast every hour (channels have individual hour restrictions configured in web interface)
10 | 0 * * * * /path/to/papersorter/examples/cron-broadcast.sh
11 |
12 | # Option 2: Combined task (update + broadcast)
13 | # Run every 3 hours
14 | 0 */3 * * * /path/to/papersorter/examples/cron-combined.sh
15 |
16 | # Option 3: More frequent updates
17 | # Update every hour
18 | 0 * * * * /path/to/papersorter/examples/cron-update.sh
19 |
20 | # Broadcast every hour (respects per-channel broadcast hours)
21 | 0 * * * * /path/to/papersorter/examples/cron-broadcast.sh
22 |
23 | # Note: Make sure the scripts are executable:
24 | # chmod +x /path/to/papersorter/examples/*.sh
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Git files
2 | .git
3 | .gitignore
4 |
5 | # Python cache
6 | __pycache__
7 | *.pyc
8 | *.pyo
9 | *.pyd
10 | .Python
11 | *.so
12 | *.egg
13 | *.egg-info
14 | dist
15 | build
16 |
17 | # Virtual environments
18 | venv/
19 | env/
20 | ENV/
21 |
22 | # IDE files
23 | .vscode/
24 | .idea/
25 | *.swp
26 | *.swo
27 | *~
28 | .DS_Store
29 |
30 | # Documentation build
31 | docs/_build/
32 | docs/build/
33 |
34 | # Test and coverage
35 | .coverage
36 | .pytest_cache/
37 | .tox/
38 | htmlcov/
39 |
40 | # Local data files
41 | *.db
42 | *.pkl
43 | *.npz
44 | *.log
45 | *.sqlite
46 | *.sqlite3
47 |
48 | # Docker files (don't copy these into the image)
49 | Dockerfile*
50 | docker-compose*.yml
51 | .dockerignore
52 |
53 | # Environment files
54 | .env
55 | .env.*
56 |
57 | # Local development directories
58 | qbio/
59 | pubmedsync/
60 | notebook/
61 | tmp/
62 | error.png
63 |
64 | # Backup files
65 | *.bak
66 | *.backup
67 | *~
68 |
69 | # Large data directories
70 | embeddings.db/
71 | models/
72 | logs/
73 | posters/
74 |
75 | # Build artifacts
76 | build/
77 | dist/
78 | *.egg-info/
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024-2025 Seoul National University
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docker/postgres/init.sql:
--------------------------------------------------------------------------------
1 | -- PostgreSQL initialization script for PaperSorter
2 | -- This script runs when the database container is first created
3 |
4 | -- Create pgvector extension if not exists
5 | CREATE EXTENSION IF NOT EXISTS vector;
6 |
7 | -- Create additional extensions that might be useful
8 | CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
9 | CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- For text search
10 |
11 | -- Set default configuration for better performance
12 | ALTER SYSTEM SET shared_buffers = '256MB';
13 | ALTER SYSTEM SET effective_cache_size = '1GB';
14 | ALTER SYSTEM SET maintenance_work_mem = '64MB';
15 | ALTER SYSTEM SET checkpoint_completion_target = '0.9';
16 | ALTER SYSTEM SET wal_buffers = '16MB';
17 | ALTER SYSTEM SET default_statistics_target = '100';
18 | ALTER SYSTEM SET random_page_cost = '1.1';
19 | ALTER SYSTEM SET effective_io_concurrency = '200';
20 | ALTER SYSTEM SET work_mem = '4MB';
21 | ALTER SYSTEM SET min_wal_size = '1GB';
22 | ALTER SYSTEM SET max_wal_size = '4GB';
23 |
24 | -- Create indexes for better performance (will be created after tables are initialized)
25 | -- Note: The actual schema will be created by 'papersorter init' command
--------------------------------------------------------------------------------
/docker/cron/crontab:
--------------------------------------------------------------------------------
1 | # PaperSorter Cron Schedule
2 | # This file is installed in the scheduler container
3 |
4 | # Environment setup
5 | SHELL=/bin/bash
6 | PATH=/usr/local/bin:/usr/bin:/bin
7 |
8 | # Load environment variables
9 | # Note: The scheduler-entrypoint.sh script writes env vars to /etc/environment
10 |
11 | # Update task - Fetch new papers and generate embeddings
12 | # Run every 3 hours
13 | 0 */3 * * * . /etc/environment && cd /app && papersorter update --config /app/config.yml --log-file /data/logs/cron-update.log --quiet >> /data/logs/cron.log 2>&1
14 |
15 | # Broadcast task - Send notifications for high-scoring papers
16 | # Run every hour (respects per-channel broadcast hours configured in web interface)
17 | 0 * * * * . /etc/environment && cd /app && papersorter broadcast --config /app/config.yml --log-file /data/logs/cron-broadcast.log --quiet >> /data/logs/cron.log 2>&1
18 |
19 | # Cleanup old logs - Remove logs older than 30 days
20 | # Run daily at 3 AM
21 | 0 3 * * * find /data/logs -name "*.log*" -mtime +30 -delete >> /data/logs/cron.log 2>&1
22 |
23 | # Health check - Write timestamp to verify cron is running
24 | */5 * * * * echo "Cron health check: $(date)" > /data/logs/cron-health.txt
25 |
--------------------------------------------------------------------------------
/examples/papersorter.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Gunicorn instance to serve PaperSorter
3 | After=network.target
4 |
5 | [Service]
6 | # Run the service as a non-root user
7 | User=papersorter
8 | Group=papersorter
9 |
10 | # The root directory of your project
11 | WorkingDirectory=/home/papersorter/papersorter
12 |
13 | # Environment variables
14 | # Point to your configuration file
15 | Environment="PAPER_SORTER_CONFIG=/home/papersorter/papersorter/config.yml"
16 | # Optional: Skip authentication for development (specify a user email)
17 | #Environment="PAPER_SORTER_SKIP_AUTH=user@example.com"
18 |
19 | # The command to start the service
20 | # Adjust the path to your Python environment's gunicorn
21 | ExecStart=/home/papersorter/venv/bin/gunicorn \
22 | --workers 2 \
23 | --threads 4 \
24 | --worker-class gthread \
25 | --bind 0.0.0.0:8000 \
26 | --timeout 120 \
27 | --access-logfile /var/log/papersorter/access.log \
28 | --error-logfile /var/log/papersorter/error.log \
29 | PaperSorter.web.wsgi:app
30 |
31 | # Restart the service if it fails
32 | Restart=on-failure
33 | RestartSec=5
34 |
35 | # Send SIGTERM for clean shutdown
36 | KillSignal=SIGTERM
37 |
38 | [Install]
39 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/docs/getting-started/index.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | Getting Started
3 | ===============
4 |
5 | Welcome to PaperSorter! This section will help you get up and running with your personal academic paper recommendation system.
6 |
7 | PaperSorter uses machine learning to automatically filter and rank research papers from RSS feeds, helping you stay current with the latest developments in your field without information overload.
8 |
9 | What You'll Learn
10 | =================
11 |
12 | - How to install and configure PaperSorter
13 | - Setting up your first feed sources
14 | - Training your initial recommendation model
15 | - Understanding the basic workflow
16 |
17 | Prerequisites
18 | =============
19 |
20 | - Python 3.9 or later
21 | - PostgreSQL database
22 | - OpenAI-compatible API access for embeddings
23 | - Basic familiarity with command-line tools
24 |
25 | .. toctree::
26 | :maxdepth: 2
27 |
28 | installation
29 | quickstart
30 | first-model
31 |
32 | Next Steps
33 | ==========
34 |
35 | Once you've completed the getting started guide, explore:
36 |
37 | - :doc:`../user-guide/index` for detailed usage instructions
38 | - :doc:`../admin-guide/index` for deployment and maintenance
39 | - :doc:`../tutorials/index` for specific integration examples
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | from setuptools import setup
25 |
26 | setup()
--------------------------------------------------------------------------------
/PaperSorter/templates/error.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}Error - {{ site_name }}{% endblock %}
4 |
5 | {% block main_container %}
6 |
7 |
Error
8 |
9 | {{ error|default("An error occurred") }}
10 |
11 |
12 | ← Go Back
13 |
14 |
15 | {% endblock %}
16 |
17 | {% block styles %}
18 |
38 | {% endblock %}
39 |
--------------------------------------------------------------------------------
/PaperSorter/web/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Web interface package for PaperSorter."""
25 |
26 | from .app import create_app
27 |
28 | __all__ = ["create_app"]
29 |
--------------------------------------------------------------------------------
/PaperSorter/web/jobs/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Background job processing for the web interface."""
25 |
26 | from .poster import process_poster_job
27 |
28 | __all__ = ["process_poster_job"]
29 |
--------------------------------------------------------------------------------
/PaperSorter/web/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Data models for the web interface."""
25 |
26 | from .semantic_scholar import SemanticScholarItem
27 |
28 | __all__ = ["SemanticScholarItem"]
29 |
--------------------------------------------------------------------------------
/docker/caddy/Caddyfile:
--------------------------------------------------------------------------------
1 | # Caddyfile for PaperSorter
2 | # Development/default configuration
3 |
4 | {$DOMAIN:localhost} {
5 | # Disable automatic HTTPS redirect for development
6 | # Reverse proxy to PaperSorter web service
7 | reverse_proxy web:5001 {
8 | header_up X-Real-IP {remote_host}
9 | header_up X-Forwarded-For {remote_host}
10 | header_up X-Forwarded-Proto {scheme}
11 | header_up X-Forwarded-Host {host}
12 |
13 | # Health check
14 | health_uri /health
15 | health_interval 30s
16 | health_timeout 5s
17 | }
18 |
19 | # Compression
20 | encode gzip
21 |
22 | # Security headers
23 | header {
24 | X-Content-Type-Options "nosniff"
25 | X-Frame-Options "SAMEORIGIN"
26 | X-XSS-Protection "1; mode=block"
27 | Referrer-Policy "strict-origin-when-cross-origin"
28 | -Server
29 | }
30 |
31 | # Logging
32 | log {
33 | output file /data/logs/caddy_access.log {
34 | roll_size 100mb
35 | roll_keep 5
36 | roll_keep_for 720h
37 | }
38 | }
39 |
40 | # Handle errors
41 | handle_errors {
42 | respond "{http.error.status_code} {http.error.status_text}" {http.error.status_code}
43 | }
44 |
45 | # Larger uploads for file imports
46 | request_body {
47 | max_size 100MB
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/PaperSorter/web/auth/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Authentication module for PaperSorter web interface."""
25 |
26 | from .models import User
27 | from .decorators import admin_required
28 | from .routes import auth_bp
29 |
30 | __all__ = ["User", "admin_required", "auth_bp"]
31 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 |
3 | # You can set these variables from the command line.
4 | SPHINXOPTS ?=
5 | SPHINXBUILD ?= sphinx-build
6 | SOURCEDIR = .
7 | BUILDDIR = _build
8 |
9 | # Put it first so that "make" without argument is like "make help".
10 | help:
11 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
12 |
13 | .PHONY: help Makefile
14 |
15 | # Custom targets
16 | .PHONY: clean
17 | clean:
18 | rm -rf $(BUILDDIR)/*
19 | @echo "Build directory cleaned."
20 |
21 | .PHONY: livehtml
22 | livehtml:
23 | sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html \
24 | --watch ../PaperSorter \
25 | --ignore "*.pyc" \
26 | --ignore "*~" \
27 | --ignore ".*"
28 |
29 | .PHONY: serve
30 | serve: html
31 | @cd $(BUILDDIR)/html && python -m http.server 8000
32 |
33 | .PHONY: github-pages
34 | github-pages: clean html
35 | @touch $(BUILDDIR)/html/.nojekyll
36 | @echo "Documentation ready for GitHub Pages in $(BUILDDIR)/html"
37 |
38 | .PHONY: check
39 | check:
40 | @$(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)/linkcheck" $(SPHINXOPTS) $(O)
41 | @$(SPHINXBUILD) -b doctest "$(SOURCEDIR)" "$(BUILDDIR)/doctest" $(SPHINXOPTS) $(O)
42 | @echo "All checks passed!"
43 |
44 | # Catch-all target: route all unknown targets to Sphinx using the new
45 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
46 | %: Makefile
47 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
48 |
--------------------------------------------------------------------------------
/PaperSorter/web/api/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """API blueprints for PaperSorter web interface."""
25 |
26 | from .feeds import feeds_bp
27 | from .settings import settings_bp
28 | from .search import search_bp
29 | from .user import user_bp
30 |
31 | __all__ = ["feeds_bp", "settings_bp", "search_bp", "user_bp"]
32 |
--------------------------------------------------------------------------------
/PaperSorter/cli/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Command-line interface infrastructure for PaperSorter."""
25 |
26 | from .base import BaseCommand, CommandRegistry
27 | from .context import CommandContext
28 | from .parser import create_parser
29 |
30 | __all__ = ['BaseCommand', 'CommandRegistry', 'CommandContext', 'create_parser']
31 |
--------------------------------------------------------------------------------
/Dockerfile.scheduler:
--------------------------------------------------------------------------------
1 | # PaperSorter Scheduler Image (for cron jobs)
2 | FROM python:3.11-slim
3 |
4 | # Install system dependencies including cron
5 | RUN apt-get update && apt-get install -y \
6 | gcc \
7 | g++ \
8 | postgresql-client \
9 | cron \
10 | curl \
11 | && rm -rf /var/lib/apt/lists/*
12 |
13 | # Set working directory
14 | WORKDIR /app
15 |
16 | # Copy requirements first for better caching
17 | COPY requirements.txt pyproject.toml setup.py ./
18 | COPY PaperSorter/__version__.py ./PaperSorter/
19 |
20 | # Install Python dependencies
21 | RUN pip install --no-cache-dir -r requirements.txt
22 |
23 | # Copy the application code
24 | COPY . .
25 |
26 | # Install PaperSorter package
27 | RUN pip install --no-cache-dir -e .
28 |
29 | # Create non-root user (same UID as main container)
30 | RUN useradd -m -u 1000 papersorter && \
31 | mkdir -p /data/logs /data/models /data/posters && \
32 | chown -R papersorter:papersorter /app /data
33 |
34 | # Copy cron configuration
35 | COPY docker/cron/crontab /etc/cron.d/papersorter-cron
36 |
37 | # Set permissions for cron file
38 | RUN chmod 0644 /etc/cron.d/papersorter-cron && \
39 | crontab -u papersorter /etc/cron.d/papersorter-cron
40 |
41 | # Copy entrypoint script
42 | COPY docker/scripts/scheduler-entrypoint.sh /entrypoint.sh
43 | RUN chmod +x /entrypoint.sh
44 |
45 | # Set environment variables
46 | ENV PAPERSORTER_DATADIR=/data \
47 | PAPERSORTER_CONFIG=/app/config.yml \
48 | PYTHONUNBUFFERED=1
49 |
50 | # Run cron in foreground
51 | CMD ["/entrypoint.sh"]
--------------------------------------------------------------------------------
/PaperSorter/web/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Utility functions for the web interface."""
25 |
26 | from .database import (
27 | get_default_model_id,
28 | get_user_model_id,
29 | get_unlabeled_item,
30 | get_labeling_stats,
31 | )
32 |
33 | __all__ = [
34 | "get_default_model_id",
35 | "get_user_model_id",
36 | "get_unlabeled_item",
37 | "get_labeling_stats",
38 | ]
39 |
--------------------------------------------------------------------------------
/tests/db/test_manager.py:
--------------------------------------------------------------------------------
1 | from PaperSorter.db import manager
2 |
3 |
4 | class DummyConnection:
5 | def __init__(self, *, closed=False, autocommit=True):
6 | self.closed = int(bool(closed))
7 | self.autocommit = autocommit
8 | self.closed_calls = 0
9 |
10 | def close(self):
11 | self.closed = 1
12 | self.closed_calls += 1
13 |
14 |
15 | class FakeThreadedPool:
16 | initial_connections = []
17 |
18 | def __init__(self, minconn, maxconn, **kwargs):
19 | self._queue = list(self.initial_connections)
20 | self.put_calls = []
21 |
22 | def getconn(self):
23 | if not self._queue:
24 | raise RuntimeError("No connections left in fake pool")
25 | return self._queue.pop(0)
26 |
27 | def putconn(self, conn, close=False):
28 | self.put_calls.append((conn, bool(close)))
29 | if not close:
30 | self._queue.append(conn)
31 |
32 | def closeall(self):
33 | self._queue.clear()
34 |
35 |
36 | def test_acquire_discards_stale_connections(monkeypatch):
37 | closed_conn = DummyConnection(closed=True)
38 | healthy_conn = DummyConnection(autocommit=True)
39 |
40 | FakeThreadedPool.initial_connections = [closed_conn, healthy_conn]
41 | monkeypatch.setattr(manager, "ThreadedConnectionPool", FakeThreadedPool)
42 |
43 | db_manager = manager.DatabaseManager({}, register_pgvector=False)
44 |
45 | conn = db_manager._acquire()
46 |
47 | assert conn is healthy_conn
48 | assert healthy_conn.autocommit is False
49 | assert db_manager._pool.put_calls == [(closed_conn, True)]
50 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | qtest/
24 | tmp/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 | *.db
31 | *.pkl
32 | *.xlsx
33 | *eaDir
34 |
35 | # Temporary files and directories
36 | *.swp
37 | *.swo
38 | *~
39 | .*.swp
40 | background-updates.log*
41 | notebook/
42 | old/
43 | qbio/
44 | run-update.sh
45 | run-broadcast.sh
46 | config.yml
47 | .activate.sh
48 |
49 | # Documentation build output
50 | docs/_build/
51 | docs/build/
52 | docs/.doctrees/
53 | docs/_static/generated/
54 | docs/_autosummary/
55 | *.doctree
56 | docs/api/generated/
57 |
58 | # Sphinx autobuild
59 | docs/.sass-cache/
60 | docs/.sphinx-build/
61 |
62 | # Temporary directories
63 | tmp/
64 | temp/
65 | .tmp/
66 | .temp/
67 | cache/
68 | .cache/
69 |
70 | # IDE and editor files
71 | .vscode/
72 | .idea/
73 | *.sublime-*
74 | .project
75 | .pydevproject
76 |
77 | # Data files
78 | *.tsv
79 | search-dump.json
80 | sqlnotes.txt
81 | dbbackup-*
82 |
83 | # Model comparison files
84 | model_comparison.tsv
85 | model_*.png
86 | *_analysis.py
87 | *_differences.py
88 | *_comparison.py
89 | compare_model_predictions.py
90 | analyze_model_differences.py
91 | summarize_model_comparison.py
92 | plot_model_comparisons.py
93 |
94 | # Temporary scripts
95 | recreate_embeddings_vertex.py
96 | openai-google-embedding.py
97 | TODO
98 | NOTES
99 |
--------------------------------------------------------------------------------
/PaperSorter/web/wsgi.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """WSGI entry point for gunicorn and other WSGI servers."""
25 |
26 | import os
27 | from .app import create_app
28 |
29 | # Get the config path from environment variable or use default
30 | config_path = os.environ.get('PAPER_SORTER_CONFIG', './config.yml')
31 |
32 | # Get optional skip authentication user from environment
33 | skip_authentication = os.environ.get('PAPER_SORTER_SKIP_AUTH')
34 |
35 | # Create the application with the config path
36 | app = create_app(config_path, skip_authentication=skip_authentication)
--------------------------------------------------------------------------------
/PaperSorter/web/auth/decorators.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Authentication decorators."""
25 |
26 | from functools import wraps
27 | from flask import abort
28 | from flask_login import login_required, current_user
29 |
30 |
31 | def admin_required(f):
32 | """Decorator to require admin privileges for a route."""
33 |
34 | @wraps(f)
35 | @login_required
36 | def decorated_function(*args, **kwargs):
37 | if not current_user.is_admin:
38 | abort(403) # Forbidden
39 | return f(*args, **kwargs)
40 |
41 | return decorated_function
42 |
--------------------------------------------------------------------------------
/PaperSorter/notification/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Notification provider system for sending alerts to various platforms."""
25 |
26 | from .base import NotificationProvider, NotificationError
27 | from .slack import SlackProvider
28 | from .discord import DiscordProvider
29 | from .email import EmailProvider
30 | from .factory import create_notification_provider
31 |
32 | __all__ = [
33 | "NotificationProvider",
34 | "NotificationError",
35 | "SlackProvider",
36 | "DiscordProvider",
37 | "EmailProvider",
38 | "create_notification_provider",
39 | ]
40 |
--------------------------------------------------------------------------------
/PaperSorter/templates/email/paper_card.html:
--------------------------------------------------------------------------------
1 | {# Reusable paper card component for email templates #}
2 | {# Usage: {% include 'email/paper_card.html' %} #}
3 | {# Expects: paper object in context #}
4 |
5 |
6 |
9 |
10 | {% if paper.author %}
11 |
12 | {{ paper.author }}
13 |
14 | {% endif %}
15 |
16 |
17 | {% if paper.origin %}
18 | {{ paper.origin }}
19 | {% endif %}
20 | {% if paper.published %}
21 | • Published: {{ paper.published.strftime('%Y-%m-%d') }}
22 | {% endif %}
23 |
24 |
25 | {% if paper.score is defined %}
26 |
27 |
28 | Score: {{ "%.2f"|format(paper.score) }}
29 |
30 |
31 | {% endif %}
32 |
33 | {% if paper.content %}
34 |
35 | {{ paper.content|safe_html|truncate(500) }}
36 |
37 | {% elif paper.tldr %}
38 |
39 | Summary: {{ paper.tldr|safe_html }}
40 |
41 | {% endif %}
42 |
43 | {% if base_url %}
44 |
50 | {% endif %}
51 |
52 |
--------------------------------------------------------------------------------
/examples/cron-broadcast.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Example cron wrapper script for PaperSorter broadcast task
3 | # Copy and customize this script for your environment
4 |
5 | # Set your PaperSorter command
6 | # For system-wide installation:
7 | PAPERSORTER_CMD="papersorter"
8 | # For virtual environment:
9 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter"
10 | # For conda environment:
11 | # PAPERSORTER_CMD="conda run -n myenv papersorter"
12 |
13 | # Configuration
14 | PAPERSORTER_DIR="/path/to/papersorter"
15 | LOGFILE="background-updates.log"
16 | CONFIG_FILE="./config.yml"
17 |
18 | # Change to PaperSorter directory
19 | cd "$PAPERSORTER_DIR" || exit 1
20 |
21 | # Function to rotate logs when they get too large
22 | rotate_logs() {
23 | # Check if log file exists and is larger than 50MB
24 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then
25 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE"
26 |
27 | # Remove oldest compressed log if we have 5 or more
28 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then
29 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1)
30 | rm -f "$oldest_log"
31 | fi
32 |
33 | # Rotate and compress current log
34 | timestamp=$(date +%Y%m%d_%H%M%S)
35 | mv "$LOGFILE" "${LOGFILE}.${timestamp}"
36 | gzip "${LOGFILE}.${timestamp}"
37 |
38 | # Create new log file
39 | touch "$LOGFILE"
40 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE"
41 | fi
42 | }
43 |
44 | $PAPERSORTER_CMD broadcast \
45 | --config "$CONFIG_FILE" \
46 | --log-file "$LOGFILE" \
47 | --quiet
48 |
49 | # Rotate logs if needed
50 | rotate_logs
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # PaperSorter Docker Image
2 | FROM python:3.11-slim
3 |
4 | # Install system dependencies
5 | RUN apt-get update && apt-get install -y \
6 | gcc \
7 | g++ \
8 | postgresql-client \
9 | curl \
10 | && rm -rf /var/lib/apt/lists/*
11 |
12 | # Set working directory
13 | WORKDIR /app
14 |
15 | # Copy requirements first for better caching
16 | COPY requirements.txt pyproject.toml setup.py ./
17 | COPY PaperSorter/__version__.py ./PaperSorter/
18 |
19 | # Install Python dependencies
20 | RUN pip install --no-cache-dir -r requirements.txt && \
21 | pip install --no-cache-dir gunicorn[gthread]
22 |
23 | # Copy the application code
24 | COPY . .
25 |
26 | # Install PaperSorter package
27 | RUN pip install --no-cache-dir -e .
28 |
29 | # Create non-root user
30 | RUN useradd -m -u 1000 papersorter && \
31 | mkdir -p /data/logs /data/models /data/posters && \
32 | chown -R papersorter:papersorter /app /data
33 |
34 | # Copy entrypoint script
35 | COPY docker/scripts/entrypoint.sh /entrypoint.sh
36 | RUN chmod +x /entrypoint.sh
37 |
38 | # Switch to non-root user
39 | USER papersorter
40 |
41 | # Set environment variables
42 | ENV PAPERSORTER_DATADIR=/data \
43 | PAPERSORTER_CONFIG=/app/config.yml \
44 | PYTHONUNBUFFERED=1
45 |
46 | # Expose port
47 | EXPOSE 5001
48 |
49 | # Use entrypoint for database wait
50 | ENTRYPOINT ["/entrypoint.sh"]
51 |
52 | # Default command (can be overridden)
53 | CMD ["gunicorn", \
54 | "--worker-class", "gthread", \
55 | "--workers", "1", \
56 | "--threads", "4", \
57 | "--bind", "0.0.0.0:5001", \
58 | "--access-logfile", "/data/logs/access.log", \
59 | "--error-logfile", "/data/logs/error.log", \
60 | "--log-level", "info", \
61 | "--timeout", "120", \
62 | "docker.scripts.wsgi:app"]
--------------------------------------------------------------------------------
/PaperSorter/templates/403.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}Access Denied - {{ site_name }}{% endblock %}
4 | {% block meta_description %}403 Forbidden - You don't have permission to access this page{% endblock %}
5 |
6 | {% block header %}
7 |
8 | {% endblock %}
9 |
10 | {% block styles %}
11 |
48 | {% endblock %}
49 |
50 | {% block content %}
51 |
52 |
403
53 |
Access Denied
54 |
55 | You don't have permission to access this page.
56 | This area is restricted to administrators only.
57 |
58 |
← Back to Papers
59 |
60 | {% endblock %}
--------------------------------------------------------------------------------
/examples/cron-update.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Example cron wrapper script for PaperSorter update task
3 | # Copy and customize this script for your environment
4 |
5 | # Set your PaperSorter command
6 | # For system-wide installation:
7 | PAPERSORTER_CMD="papersorter"
8 | # For virtual environment:
9 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter"
10 | # For conda environment:
11 | # PAPERSORTER_CMD="conda run -n myenv papersorter"
12 |
13 | # Configuration
14 | PAPERSORTER_DIR="/path/to/papersorter"
15 | LOGFILE="background-updates.log"
16 | CONFIG_FILE="./config.yml"
17 |
18 | # Change to PaperSorter directory
19 | cd "$PAPERSORTER_DIR" || exit 1
20 |
21 | # Function to rotate logs when they get too large
22 | rotate_logs() {
23 | # Check if log file exists and is larger than 50MB
24 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then
25 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE"
26 |
27 | # Remove oldest compressed log if we have 5 or more
28 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then
29 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1)
30 | rm -f "$oldest_log"
31 | fi
32 |
33 | # Rotate and compress current log
34 | timestamp=$(date +%Y%m%d_%H%M%S)
35 | mv "$LOGFILE" "${LOGFILE}.${timestamp}"
36 | gzip "${LOGFILE}.${timestamp}"
37 |
38 | # Create new log file
39 | touch "$LOGFILE"
40 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE"
41 | fi
42 | }
43 |
44 | # Run the update task
45 | $PAPERSORTER_CMD update \
46 | --config "$CONFIG_FILE" \
47 | --log-file "$LOGFILE" \
48 | --quiet \
49 | --limit-sources 20 \
50 | --check-interval-hours 3
51 |
52 | # Rotate logs if needed
53 | rotate_logs
--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
1 | name: Build and Deploy Documentation
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - 'docs/**'
8 | - 'PaperSorter/**/*.py'
9 | - '.github/workflows/docs.yml'
10 | pull_request:
11 | branches: [ main ]
12 | paths:
13 | - 'docs/**'
14 | - 'PaperSorter/**/*.py'
15 | workflow_dispatch:
16 |
17 | permissions:
18 | contents: read
19 | pages: write
20 | id-token: write
21 |
22 | concurrency:
23 | group: "pages"
24 | cancel-in-progress: false
25 |
26 | jobs:
27 | build:
28 | runs-on: ubuntu-latest
29 | steps:
30 | - uses: actions/checkout@v4
31 |
32 | - name: Set up Python
33 | uses: actions/setup-python@v5
34 | with:
35 | python-version: '3.11'
36 |
37 | - name: Cache dependencies
38 | uses: actions/cache@v3
39 | with:
40 | path: ~/.cache/pip
41 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
42 | restore-keys: |
43 | ${{ runner.os }}-pip-
44 |
45 | - name: Install dependencies
46 | run: |
47 | python -m pip install --upgrade pip
48 | pip install -e .
49 | pip install -r docs/requirements.txt
50 |
51 | - name: Build documentation
52 | run: |
53 | cd docs
54 | make clean
55 | make html
56 |
57 | - name: Check for broken links
58 | run: |
59 | cd docs
60 | make linkcheck
61 | continue-on-error: true
62 |
63 | - name: Upload artifact
64 | uses: actions/upload-pages-artifact@v3
65 | with:
66 | path: docs/_build/html
67 |
68 | deploy:
69 | if: github.event_name == 'push' && github.ref == 'refs/heads/main'
70 | needs: build
71 | runs-on: ubuntu-latest
72 | environment:
73 | name: github-pages
74 | url: ${{ steps.deployment.outputs.page_url }}
75 | steps:
76 | - name: Deploy to GitHub Pages
77 | id: deployment
78 | uses: actions/deploy-pages@v4
--------------------------------------------------------------------------------
/PaperSorter/templates/partials/similar_section.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
15 |
18 |
19 |
20 |
Use the actions above to generate an AI summary or infographic.
21 |
22 |
23 |
24 |
Generating summary...
25 |
26 |
27 |
28 |
Generating infographic poster...
29 |
This may take 3-5 minutes. Please wait...
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 | Loading similar articles...
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/PaperSorter/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | import sys
25 | import importlib
26 | from .tasks import __all__ as alltasks
27 | from .cli.parser import main as cli_main
28 |
29 | def main():
30 | """Main entry point for PaperSorter CLI."""
31 |
32 | # Import and register all commands
33 |
34 | for task in alltasks:
35 | # Import the task module (this triggers registration for migrated commands)
36 | try:
37 | importlib.import_module(f".tasks.{task}", package="PaperSorter")
38 | except ImportError as e:
39 | print(f"Warning: Could not import task {task}: {e}", file=sys.stderr)
40 | continue
41 |
42 | # Run the CLI
43 | return cli_main()
44 |
45 | if __name__ == "__main__":
46 | sys.exit(main())
47 |
48 | # Export main for use as console script
49 | __all__ = ['main']
50 |
--------------------------------------------------------------------------------
/docs/user-guide/index.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | User Guide
3 | ==========
4 |
5 | This comprehensive guide covers all aspects of using PaperSorter effectively, from basic configuration to advanced workflows.
6 |
7 | Whether you're a researcher looking to streamline your paper discovery process or an administrator managing a team's research feeds, this guide provides the knowledge you need to get the most out of PaperSorter.
8 |
9 | Overview
10 | ========
11 |
12 | PaperSorter helps you:
13 |
14 | - **Discover relevant papers** automatically from multiple sources
15 | - **Train personalized models** that learn your research interests
16 | - **Receive targeted notifications** through Slack, email, or other channels
17 | - **Manage and label papers** through an intuitive web interface
18 | - **Search and explore** related work using semantic similarity
19 |
20 | .. toctree::
21 | :maxdepth: 2
22 |
23 | configuration
24 | feed-sources
25 | training-models
26 | sharing-broadcasting
27 | notifications
28 | search-from-pdf
29 | web-interface
30 | workflows
31 |
32 | Quick Reference
33 | ===============
34 |
35 | Common Tasks
36 | ------------
37 |
38 | - **Add new feeds**: Use the web interface or directly edit the database
39 | - **Train a model**: Label ~100 papers, then run ``papersorter train``
40 | - **Check new papers**: Run ``papersorter update`` to fetch and score articles
41 | - **Send notifications**: Use ``papersorter broadcast`` to deliver recommendations
42 |
43 | Best Practices
44 | --------------
45 |
46 | - Start with broad feeds and narrow down based on model performance
47 | - Label papers consistently to improve model accuracy
48 | - Regularly retrain models as your research interests evolve
49 | - Monitor notification channels to ensure appropriate content delivery
50 |
51 | Related Sections
52 | ================
53 |
54 | - :doc:`../getting-started/index` - Initial setup and installation
55 | - :doc:`../cli-reference/index` - Complete command reference
56 | - :doc:`../tutorials/index` - Step-by-step integration guides
--------------------------------------------------------------------------------
/PaperSorter/log.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | import logging
25 |
26 | console = logging.StreamHandler()
27 | console.setLevel(logging.INFO)
28 |
29 | log = logging.getLogger("PaperSorter")
30 | log.setLevel(logging.INFO)
31 | log.addHandler(console)
32 |
33 |
34 | def initialize_logging(task="", logfile=None, quiet=False):
35 | if logfile is not None:
36 | formatter = logging.Formatter(
37 | f"[%(asctime)s/{task}] %(message)s", "%Y-%m-%d %H:%M:%S"
38 | )
39 | logf_handler = logging.FileHandler(logfile, mode="a")
40 | logf_handler.setLevel(logging.INFO)
41 | logf_handler.setFormatter(formatter)
42 | logf_handler.addFilter(
43 | lambda record: not record.name.endswith("TheOldReaderConnection")
44 | )
45 | log.addHandler(logf_handler)
46 |
47 | if quiet:
48 | console.setLevel(logging.WARNING)
49 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # PaperSorter Examples
2 |
3 | This directory contains example configuration and automation scripts for PaperSorter.
4 |
5 | ## Files
6 |
7 | ### config.yml
8 | Example configuration file for PaperSorter. Copy this to your working directory and customize with your API keys and database credentials.
9 |
10 | ### Cron Wrapper Scripts
11 |
12 | These scripts provide automated execution with log rotation:
13 |
14 | - **cron-update.sh** - Runs the update task to fetch new articles
15 | - **cron-broadcast.sh** - Runs the broadcast task to send notifications
16 | - **cron-combined.sh** - Runs both update and broadcast in sequence
17 |
18 | To use these scripts:
19 | 1. Copy to your preferred location
20 | 2. Edit the configuration variables at the top of each script
21 | 3. Make executable: `chmod +x *.sh`
22 | 4. Add to crontab (see crontab.example)
23 |
24 | ### crontab.example
25 | Example crontab entries showing different scheduling strategies for running PaperSorter tasks.
26 |
27 | ## Usage
28 |
29 | 1. **Initial Setup**
30 | ```bash
31 | # Copy and customize configuration
32 | cp examples/config.yml ./config.yml
33 | # Edit config.yml with your API keys and settings
34 | ```
35 |
36 | 2. **Manual Execution**
37 | ```bash
38 | # Run tasks manually
39 | papersorter update --config ./config.yml
40 | papersorter broadcast --config ./config.yml
41 | ```
42 |
43 | 3. **Automated Execution**
44 | ```bash
45 | # Copy and customize cron scripts
46 | cp examples/cron-combined.sh ~/bin/papersorter-cron.sh
47 | chmod +x ~/bin/papersorter-cron.sh
48 | # Edit the script with your paths
49 |
50 | # Add to crontab
51 | crontab -e
52 | # Add: 0 */3 * * * /home/username/bin/papersorter-cron.sh
53 | ```
54 |
55 | ## Notes
56 |
57 | - The cron scripts include automatic log rotation to prevent logs from growing too large
58 | - Broadcast hours are now configured per channel in the web interface settings
59 | - The broadcast task can run every hour and will automatically respect each channel's configured hours
60 | - Adjust the update schedule according to your preferences
--------------------------------------------------------------------------------
/examples/cron-combined.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Example combined cron wrapper script for PaperSorter
3 | # Runs both update and broadcast tasks in sequence
4 | # Copy and customize this script for your environment
5 |
6 | # Set your PaperSorter command
7 | # For system-wide installation:
8 | PAPERSORTER_CMD="papersorter"
9 | # For virtual environment:
10 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter"
11 | # For conda environment:
12 | # PAPERSORTER_CMD="conda run -n myenv papersorter"
13 |
14 | # Configuration
15 | PAPERSORTER_DIR="/path/to/papersorter"
16 | LOGFILE="background-updates.log"
17 | CONFIG_FILE="./config.yml"
18 |
19 | # Change to PaperSorter directory
20 | cd "$PAPERSORTER_DIR" || exit 1
21 |
22 | # Function to rotate logs when they get too large
23 | rotate_logs() {
24 | # Check if log file exists and is larger than 50MB
25 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then
26 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE"
27 |
28 | # Remove oldest compressed log if we have 5 or more
29 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then
30 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1)
31 | rm -f "$oldest_log"
32 | fi
33 |
34 | # Rotate and compress current log
35 | timestamp=$(date +%Y%m%d_%H%M%S)
36 | mv "$LOGFILE" "${LOGFILE}.${timestamp}"
37 | gzip "${LOGFILE}.${timestamp}"
38 |
39 | # Create new log file
40 | touch "$LOGFILE"
41 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE"
42 | fi
43 | }
44 |
45 | # Always run update to fetch new articles
46 | echo "$(date): Starting update task" >> "$LOGFILE"
47 | $PAPERSORTER_CMD update \
48 | --config "$CONFIG_FILE" \
49 | --log-file "$LOGFILE" \
50 | --quiet \
51 | --limit-sources 20 \
52 | --check-interval-hours 3
53 |
54 | # Run broadcast task (channels have their own hour restrictions)
55 | echo "$(date): Starting broadcast task" >> "$LOGFILE"
56 | $PAPERSORTER_CMD broadcast \
57 | --config "$CONFIG_FILE" \
58 | --log-file "$LOGFILE" \
59 | --quiet
60 |
61 | # Rotate logs if needed
62 | rotate_logs
--------------------------------------------------------------------------------
/docker/caddy/Caddyfile.prod:
--------------------------------------------------------------------------------
1 | # Caddyfile for PaperSorter
2 | # Production configuration with automatic HTTPS
3 |
4 | {$DOMAIN} {
5 | # Automatic HTTPS with Let's Encrypt
6 | tls {$EMAIL}
7 |
8 | # Reverse proxy to PaperSorter web service
9 | reverse_proxy web:5001 {
10 | header_up X-Real-IP {remote_host}
11 | header_up X-Forwarded-For {remote_host}
12 | header_up X-Forwarded-Proto {scheme}
13 | header_up X-Forwarded-Host {host}
14 |
15 | # Health check
16 | health_uri /health
17 | health_interval 30s
18 | health_timeout 5s
19 |
20 | # Retry policy
21 | lb_policy round_robin
22 | lb_try_duration 10s
23 | lb_try_interval 1s
24 | }
25 |
26 | # Compression
27 | encode gzip zstd
28 |
29 | # Security headers
30 | header {
31 | Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
32 | X-Content-Type-Options "nosniff"
33 | X-Frame-Options "SAMEORIGIN"
34 | X-XSS-Protection "1; mode=block"
35 | Referrer-Policy "strict-origin-when-cross-origin"
36 | Content-Security-Policy "default-src 'self' https:; script-src 'self' 'unsafe-inline' 'unsafe-eval' https:; style-src 'self' 'unsafe-inline' https:; img-src 'self' data: https:; font-src 'self' data: https:;"
37 | -Server
38 | }
39 |
40 | # Logging
41 | log {
42 | output file /data/logs/caddy_access.log {
43 | roll_size 100mb
44 | roll_keep 10
45 | roll_keep_for 2160h
46 | }
47 | }
48 |
49 | # Handle errors
50 | handle_errors {
51 | @404 {
52 | expression {http.error.status_code} == 404
53 | }
54 | respond @404 "Page not found" 404
55 | respond "{http.error.status_code} {http.error.status_text}" {http.error.status_code}
56 | }
57 |
58 | # Larger uploads for file imports
59 | request_body {
60 | max_size 100MB
61 | }
62 |
63 | # Timeouts for long-running requests (training, imports)
64 | timeouts {
65 | read_body 5m
66 | write 5m
67 | }
68 | }
69 |
70 | # Redirect www to non-www
71 | www.{$DOMAIN} {
72 | redir https://{$DOMAIN}{uri} permanent
73 | }
--------------------------------------------------------------------------------
/docs/admin-guide/index.rst:
--------------------------------------------------------------------------------
1 | =================
2 | Administrator Guide
3 | =================
4 |
5 | This guide is designed for system administrators, DevOps engineers, and technical users responsible for deploying, maintaining, and scaling PaperSorter installations.
6 |
7 | Learn how to set up robust, production-ready deployments that can serve multiple users and handle large volumes of research papers efficiently.
8 |
9 | Scope
10 | =====
11 |
12 | This guide covers:
13 |
14 | - **Production deployment** strategies and best practices
15 | - **Database administration** including setup, optimization, and maintenance
16 | - **Security considerations** for multi-user environments
17 | - **Monitoring and troubleshooting** common issues
18 | - **Backup and disaster recovery** procedures
19 |
20 | .. toctree::
21 | :maxdepth: 2
22 |
23 | authentication
24 | deployment
25 | database-setup
26 | backup-restore
27 | monitoring
28 | security
29 | troubleshooting
30 |
31 | Key Responsibilities
32 | ====================
33 |
34 | System Architecture
35 | -------------------
36 |
37 | - Database server management (PostgreSQL with pgvector)
38 | - Web server configuration and load balancing
39 | - Background task scheduling (cron jobs or task queues)
40 | - API key and credential management
41 |
42 | Operational Tasks
43 | -----------------
44 |
45 | - Regular database maintenance and optimization
46 | - Model performance monitoring and retraining schedules
47 | - User access management and authentication setup
48 | - System resource monitoring and scaling decisions
49 |
50 | Security Considerations
51 | -----------------------
52 |
53 | - OAuth provider configuration (Google, GitHub, ORCID)
54 | - Database access controls and encryption
55 | - API key rotation and secure storage
56 | - Network security and SSL certificate management
57 |
58 | Production Readiness
59 | ====================
60 |
61 | Before deploying to production, ensure:
62 |
63 | - Database backups are automated and tested
64 | - Monitoring and alerting systems are in place
65 | - Security policies are implemented and documented
66 | - Disaster recovery procedures are established
67 |
68 | Related Resources
69 | =================
70 |
71 | - :doc:`../development/index` - Contributing and extending PaperSorter
72 | - :doc:`../api/index` - API reference for custom integrations
--------------------------------------------------------------------------------
/PaperSorter/cli/types.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Type converter functions for argparse."""
25 |
26 | import argparse
27 | from typing import List
28 |
29 |
30 | def positive_int(value: str) -> int:
31 | """Validate positive integer."""
32 | ivalue = int(value)
33 | if ivalue <= 0:
34 | raise argparse.ArgumentTypeError(f"{value} must be a positive integer")
35 | return ivalue
36 |
37 |
38 | def probability_float(value: str) -> float:
39 | """Validate probability value between 0 and 1."""
40 | fvalue = float(value)
41 | if not 0.0 <= fvalue <= 1.0:
42 | raise argparse.ArgumentTypeError(f"{value} must be between 0.0 and 1.0")
43 | return fvalue
44 |
45 |
46 | def comma_separated_list(value: str) -> List[str]:
47 | """Parse comma-separated list."""
48 | return [item.strip() for item in value.split(',') if item.strip()]
49 |
50 |
51 | def issn_list(value: str) -> str:
52 | """Validate ISSN format (XXXX-XXXX)."""
53 | value = value.strip()
54 | if len(value) == 9 and value[4] == '-':
55 | return value
56 | raise argparse.ArgumentTypeError(f"{value} is not a valid ISSN format (XXXX-XXXX)")
57 |
--------------------------------------------------------------------------------
/PaperSorter/templates/email/newsletter.txt:
--------------------------------------------------------------------------------
1 | PAPERSORTER DIGEST
2 | {%- if channel_name %}
3 | {{ channel_name }}
4 | {%- endif %}
5 | {{ date.strftime('%B %d, %Y') }}
6 | ================================================================================
7 |
8 | {% if papers|length == 1 -%}
9 | 1 paper selected for your review
10 | {%- else -%}
11 | {{ papers|length }} papers selected for your review
12 | {%- endif -%}
13 | {%- if source_count %} from {{ source_count }} sources{% endif %}
14 |
15 | ================================================================================
16 |
17 | {% for paper in papers %}
18 | {{ loop.index }}. {{ paper.title }}
19 | {{ '-' * paper.title|length }}
20 |
21 | {% if paper.author -%}
22 | Authors: {{ paper.author }}
23 | {% endif -%}
24 | {%- if paper.origin -%}
25 | Source: {{ paper.origin }}
26 | {% endif -%}
27 | {%- if paper.published -%}
28 | Published: {{ paper.published.strftime('%Y-%m-%d') }}
29 | {% endif -%}
30 | {%- if paper.score is defined -%}
31 | Score: {{ "%.0f"|format(paper.score * 100) }}
32 | {%- if paper.other_scores %}
33 | {%- for other_score in paper.other_scores %}
34 | {%- if other_score.score is defined %}
35 | {{ other_score.score_name }}: {{ "%.0f"|format(other_score.score * 100) }}
36 | {%- endif %}
37 | {%- endfor %}
38 | {%- endif %}
39 | {% endif %}
40 |
41 | {% if include_abstracts -%}
42 | {%- if paper.content -%}
43 | Abstract:
44 | {{ paper.content|truncate(500)|wordwrap(78) }}
45 | {%- elif paper.tldr -%}
46 | Summary:
47 | {{ paper.tldr|wordwrap(78) }}
48 | {%- endif -%}
49 | {%- elif paper.tldr -%}
50 | TL;DR:
51 | {{ paper.tldr|wordwrap(78) }}
52 | {%- elif paper.get('_abstract_fallback') -%}
53 | Abstract:
54 | {{ paper.get('_abstract_fallback')|wordwrap(78) }}
55 | {%- endif %}
56 |
57 | Full Paper: {{ paper.link }}
58 | {%- if base_url and paper.id %}
59 | Paper Details: {{ base_url }}/paper/{{ paper.id }}
60 | {%- endif %}
61 |
62 | {% if not loop.last %}
63 | --------------------------------------------------------------------------------
64 | {% endif %}
65 | {% endfor %}
66 |
67 | ================================================================================
68 |
69 | This digest was generated by PaperSorter
70 | {%- if base_url %}
71 | View in Web Interface: {{ base_url }}
72 | {%- endif %}
73 |
74 | To unsubscribe or adjust preferences, please contact your administrator.
75 |
--------------------------------------------------------------------------------
/docker-compose.prod.yml:
--------------------------------------------------------------------------------
1 | version: '3.3'
2 |
3 | # Production overrides for docker-compose.yml
4 | # Usage: docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d
5 |
6 | services:
7 | postgres:
8 | # Production database settings
9 | deploy:
10 | resources:
11 | limits:
12 | cpus: '2'
13 | memory: 2G
14 | reservations:
15 | cpus: '0.5'
16 | memory: 512M
17 | # Add backup volume
18 | volumes:
19 | - postgres_backup:/backup
20 |
21 | web:
22 | # Production web settings
23 | deploy:
24 | resources:
25 | limits:
26 | cpus: '2'
27 | memory: 4G
28 | reservations:
29 | cpus: '1'
30 | memory: 1G
31 | environment:
32 | # Production settings
33 | FLASK_ENV: production
34 | LOG_LEVEL: warning
35 | # Increase gunicorn workers/threads for production
36 | command: ["gunicorn",
37 | "--worker-class", "gthread",
38 | "--workers", "1",
39 | "--threads", "8",
40 | "--bind", "0.0.0.0:5001",
41 | "--access-logfile", "/data/logs/access.log",
42 | "--error-logfile", "/data/logs/error.log",
43 | "--log-level", "warning",
44 | "--timeout", "120",
45 | "--max-requests", "1000",
46 | "--max-requests-jitter", "100",
47 | "PaperSorter.web.app:create_app()"]
48 |
49 | scheduler:
50 | # Production scheduler settings
51 | deploy:
52 | resources:
53 | limits:
54 | cpus: '1'
55 | memory: 2G
56 | reservations:
57 | cpus: '0.25'
58 | memory: 256M
59 |
60 | caddy:
61 | # Production Caddy settings with proper domain
62 | environment:
63 | # These should be set in .env for production
64 | DOMAIN: ${DOMAIN}
65 | EMAIL: ${ADMIN_EMAIL}
66 | # Mount production Caddyfile
67 | volumes:
68 | - ./docker/caddy/Caddyfile.prod:/etc/caddy/Caddyfile:ro
69 | - caddy_data:/data
70 | - caddy_config:/config
71 | deploy:
72 | resources:
73 | limits:
74 | cpus: '0.5'
75 | memory: 256M
76 | reservations:
77 | cpus: '0.1'
78 | memory: 64M
79 |
80 | volumes:
81 | postgres_backup:
82 | name: papersorter_postgres_backup
--------------------------------------------------------------------------------
/PaperSorter/web/auth/models.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """User model for authentication."""
25 |
26 | from flask_login import UserMixin
27 |
28 |
29 | class User(UserMixin):
30 | """User model for Flask-Login integration."""
31 |
32 | def __init__(
33 | self,
34 | id,
35 | username,
36 | email=None,
37 | is_admin=False,
38 | timezone="UTC",
39 | date_format="MMM D, YYYY",
40 | feedlist_minscore=None,
41 | primary_channel_id=None,
42 | theme="light",
43 | ):
44 | self.id = id
45 | self.username = username
46 | self.email = email
47 | self.is_admin = is_admin
48 | self.timezone = timezone
49 | self.date_format = date_format
50 | # Store the integer value from DB, convert to decimal for internal use
51 | self.feedlist_minscore_int = (
52 | feedlist_minscore if feedlist_minscore is not None else 25
53 | )
54 | self.feedlist_minscore = (
55 | self.feedlist_minscore_int / 100.0
56 | ) # Convert to decimal (e.g., 25 -> 0.25)
57 | self.primary_channel_id = primary_channel_id
58 | self.theme = theme if theme in ["light", "dark", "auto"] else "light"
59 |
--------------------------------------------------------------------------------
/docs/cli-reference/index.rst:
--------------------------------------------------------------------------------
1 | =============
2 | CLI Reference
3 | =============
4 |
5 | PaperSorter provides a comprehensive command-line interface for all system operations. This reference documents every command, option, and usage pattern.
6 |
7 | The CLI is built using Click and follows standard Unix conventions for options and arguments. All commands support ``--help`` for detailed usage information.
8 |
9 | Command Overview
10 | ================
11 |
12 | Core Operations
13 | ---------------
14 |
15 | - **update**: Fetch new articles, generate embeddings, and score papers
16 | - **train**: Train or retrain machine learning models on labeled data
17 | - **broadcast**: Send notifications and recommendations to configured channels
18 | - **serve**: Start the web interface for interactive paper management
19 |
20 | The typical workflow involves running these commands in sequence, often automated via cron jobs for regular operation.
21 |
22 | .. toctree::
23 | :maxdepth: 2
24 |
25 | commands
26 | examples
27 |
28 | Global Options
29 | ==============
30 |
31 | All commands support these common options:
32 |
33 | ``--config PATH``
34 | Configuration file location (default: ./config.yml)
35 |
36 | ``--log-file PATH``
37 | Write logs to specified file instead of stdout
38 |
39 | ``-q, --quiet``
40 | Suppress non-error output
41 |
42 | ``--help``
43 | Show command-specific help and exit
44 |
45 | Environment Variables
46 | =====================
47 |
48 | Configuration can also be provided via environment variables:
49 |
50 | - ``PAPERSORTER_CONFIG``: Path to configuration file
51 | - ``PAPERSORTER_LOG_LEVEL``: Logging level (DEBUG, INFO, WARNING, ERROR)
52 | - ``PAPERSORTER_LOG_FILE``: Log file path
53 |
54 | Exit Codes
55 | ==========
56 |
57 | PaperSorter follows standard Unix exit code conventions:
58 |
59 | - ``0``: Success
60 | - ``1``: General error
61 | - ``2``: Command-line usage error
62 | - ``3``: Configuration error
63 | - ``4``: Database error
64 |
65 | Examples
66 | ========
67 |
68 | Common usage patterns:
69 |
70 | .. code-block:: bash
71 |
72 | # Daily automation (typical cron setup)
73 | papersorter update --batch-size 50
74 | papersorter train --rounds 100
75 | papersorter broadcast --limit 10
76 |
77 | # Development and testing
78 | papersorter serve --debug --port 5000
79 | papersorter update --limit-sources 5 --check-interval-hours 1
80 |
81 | Related Documentation
82 | =====================
83 |
84 | - :doc:`../admin-guide/deployment` - Production automation setup
--------------------------------------------------------------------------------
/.github/workflows/claude.yml:
--------------------------------------------------------------------------------
1 | name: Claude Code
2 |
3 | on:
4 | issue_comment:
5 | types: [created]
6 | pull_request_review_comment:
7 | types: [created]
8 | issues:
9 | types: [opened, assigned]
10 | pull_request_review:
11 | types: [submitted]
12 |
13 | jobs:
14 | claude:
15 | if: |
16 | (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17 | (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18 | (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19 | (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20 | runs-on: ubuntu-latest
21 | permissions:
22 | contents: read
23 | pull-requests: read
24 | issues: read
25 | id-token: write
26 | actions: read # Required for Claude to read CI results on PRs
27 | steps:
28 | - name: Checkout repository
29 | uses: actions/checkout@v4
30 | with:
31 | fetch-depth: 1
32 |
33 | - name: Run Claude Code
34 | id: claude
35 | uses: anthropics/claude-code-action@beta
36 | with:
37 | claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
38 |
39 | # This is an optional setting that allows Claude to read CI results on PRs
40 | additional_permissions: |
41 | actions: read
42 |
43 | # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1)
44 | # model: "claude-opus-4-1-20250805"
45 |
46 | # Optional: Customize the trigger phrase (default: @claude)
47 | # trigger_phrase: "/claude"
48 |
49 | # Optional: Trigger when specific user is assigned to an issue
50 | # assignee_trigger: "claude-bot"
51 |
52 | # Optional: Allow Claude to run specific commands
53 | # allowed_tools: "Bash(npm install),Bash(npm run build),Bash(npm run test:*),Bash(npm run lint:*)"
54 |
55 | # Optional: Add custom instructions for Claude to customize its behavior for your project
56 | # custom_instructions: |
57 | # Follow our coding standards
58 | # Ensure all new code has tests
59 | # Use TypeScript for new files
60 |
61 | # Optional: Custom environment variables for Claude
62 | # claude_env: |
63 | # NODE_ENV: test
64 |
65 |
--------------------------------------------------------------------------------
/PaperSorter/static/css/pages/feed_struct.css:
--------------------------------------------------------------------------------
1 | /* Shared feed list structure (no colors) */
2 |
3 | /* Header row inside a feed item */
4 | .feed-header {
5 | padding: 20px;
6 | display: flex;
7 | align-items: center;
8 | gap: 15px;
9 | position: relative;
10 | }
11 |
12 | /* Score badge geometry */
13 | .score-badge {
14 | display: inline-flex;
15 | align-items: center;
16 | padding: 4px 12px;
17 | border-radius: 12px;
18 | font-size: 12px;
19 | font-weight: bold;
20 | min-width: 50px;
21 | justify-content: center;
22 | flex-shrink: 0;
23 | position: relative;
24 | }
25 |
26 | /* Container for score icons */
27 | .score-icons {
28 | position: absolute;
29 | top: -8px;
30 | right: -8px;
31 | display: flex;
32 | gap: 2px;
33 | }
34 |
35 | /* Score icon geometry only */
36 | .score-icon {
37 | border-radius: 50%;
38 | width: 16px;
39 | height: 16px;
40 | display: flex;
41 | align-items: center;
42 | justify-content: center;
43 | font-size: 10px;
44 | }
45 |
46 | /* Content column */
47 | .feed-content {
48 | flex: 1;
49 | min-width: 0;
50 | }
51 |
52 | /* Title */
53 | .feed-title {
54 | font-size: 18px;
55 | font-weight: bold;
56 | margin-bottom: 5px;
57 | line-height: 1.3;
58 | }
59 |
60 | /* Meta row */
61 | .feed-meta {
62 | display: flex;
63 | gap: 15px;
64 | font-size: 14px;
65 | margin-bottom: 5px;
66 | }
67 |
68 | .feed-meta-item {
69 | white-space: nowrap;
70 | overflow: hidden;
71 | text-overflow: ellipsis;
72 | }
73 |
74 | .feed-author { max-width: 200px; }
75 | .feed-item.expanded .feed-author {
76 | white-space: normal;
77 | overflow: visible;
78 | text-overflow: unset;
79 | max-width: none;
80 | }
81 |
82 | .feed-origin { font-weight: bold; }
83 | .feed-date { flex-shrink: 0; }
84 |
85 | /* Label badges */
86 | .feed-labels {
87 | display: flex;
88 | gap: 8px;
89 | align-items: center;
90 | margin-left: auto;
91 | flex-shrink: 0;
92 | }
93 |
94 | .label-badge {
95 | padding: 2px 8px;
96 | border-radius: 10px;
97 | font-size: 11px;
98 | font-weight: bold;
99 | }
100 |
101 | /* Details block */
102 | .feed-details { padding: 20px; }
103 | .feed-details.expanded { display: block; }
104 |
105 | /* Abstract */
106 | .feed-abstract {
107 | font-size: 15px;
108 | line-height: 1.6;
109 | margin-bottom: 15px;
110 | }
111 |
112 | /* Actions row */
113 | .feed-actions {
114 | display: flex;
115 | gap: 10px;
116 | }
117 |
118 | .badges-container {
119 | display: flex;
120 | gap: 10px;
121 | align-items: center;
122 | }
123 |
124 |
--------------------------------------------------------------------------------
/PaperSorter/templates/feedback_error.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}Error - {{ site_name }}{% endblock %}
4 |
5 | {% block header %}{% endblock %}
6 |
7 | {% block styles %}
8 |
9 |
77 | {% endblock %}
78 |
79 | {% block main_container %}
80 |
81 |
❌
82 |
83 |
Oops!
84 |
85 |
86 | {{ message }}
87 |
88 |
89 |
94 |
95 | {% endblock %}
96 |
--------------------------------------------------------------------------------
/docker/config.docker.yml:
--------------------------------------------------------------------------------
1 | # PaperSorter Configuration Template for Docker
2 | # This file should be mounted as /app/config.yml in the container
3 | # Environment variables are expanded when needed
4 |
5 | admin_users:
6 | # Auto-promoted admin users (comma-separated in env var)
7 | # ${ADMIN_USERS}
8 |
9 | db:
10 | type: postgres
11 | host: postgres # Docker service name
12 | database: ${POSTGRES_DB:-papersorter}
13 | user: ${POSTGRES_USER:-papersorter}
14 | password: ${POSTGRES_PASSWORD:-changeme}
15 |
16 | web:
17 | site_name: ${SITE_NAME:-PaperSorter}
18 | base_url: ${BASE_URL:-http://localhost}
19 | flask_secret_key: ${FLASK_SECRET_KEY}
20 | default_timezone: ${DEFAULT_TIMEZONE:-UTC}
21 | default_date_format: ${DEFAULT_DATE_FORMAT:-MMM D, YYYY}
22 |
23 | oauth:
24 | google:
25 | client_id: ${GOOGLE_CLIENT_ID}
26 | secret: ${GOOGLE_CLIENT_SECRET}
27 | github:
28 | client_id: ${GITHUB_CLIENT_ID}
29 | secret: ${GITHUB_CLIENT_SECRET}
30 | orcid:
31 | client_id: ${ORCID_CLIENT_ID}
32 | secret: ${ORCID_CLIENT_SECRET}
33 | sandbox: false
34 |
35 | embedding_api:
36 | api_key: ${EMBEDDING_API_KEY}
37 | api_url: ${EMBEDDING_API_URL:-https://api.openai.com/v1}
38 | model: ${EMBEDDING_MODEL:-text-embedding-3-large}
39 | dimensions: ${EMBEDDING_DIMENSIONS}
40 |
41 | models:
42 | path: /data/models
43 |
44 | storage:
45 | ai_poster_dir: /data/posters
46 |
47 | summarization_api:
48 | api_key: ${SUMMARIZATION_API_KEY}
49 | api_url: ${SUMMARIZATION_API_URL:-https://generativelanguage.googleapis.com/v1beta/openai}
50 | model: ${SUMMARIZATION_MODEL:-gemini-2.0-flash-thinking-exp-01-21}
51 |
52 | scholarly_database:
53 | provider: ${SCHOLARLY_PROVIDER:-semantic_scholar}
54 | match_date_tolerance_days: ${MATCH_DATE_TOLERANCE_DAYS:-60}
55 | semantic_scholar:
56 | api_key: ${SEMANTIC_SCHOLAR_API_KEY}
57 | max_retries: ${SEMANTIC_SCHOLAR_MAX_RETRIES:-5}
58 | retry_backoff_base: ${SEMANTIC_SCHOLAR_RETRY_BACKOFF_BASE:-2}
59 | throttle: ${SEMANTIC_SCHOLAR_THROTTLE:-1}
60 | openalex:
61 | email: ${OPENALEX_EMAIL}
62 | max_retries: ${OPENALEX_MAX_RETRIES:-5}
63 | retry_backoff_base: ${OPENALEX_RETRY_BACKOFF_BASE:-2}
64 | throttle: ${OPENALEX_THROTTLE:-0.1}
65 |
66 | smtp:
67 | provider: ${SMTP_PROVIDER:-custom}
68 | host: ${SMTP_HOST}
69 | port: ${SMTP_PORT:-587}
70 | username: ${SMTP_USERNAME}
71 | password: ${SMTP_PASSWORD}
72 | encryption: ${SMTP_ENCRYPTION:-tls}
73 | timeout: ${SMTP_TIMEOUT:-30}
74 |
75 | email:
76 | from_address: ${EMAIL_FROM:-papersorter@example.com}
77 | from_name: ${EMAIL_FROM_NAME:-PaperSorter Newsletter}
78 | subject_template: ${EMAIL_SUBJECT_TEMPLATE:-Research Papers Digest - {date:%Y-%m-%d}}
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # Repository Guidelines
2 |
3 | ## Project Structure & Module Organization
4 | - `PaperSorter/` – main package: `cli/`, `tasks/`, `providers/`, `notification/`, `utils/`, `web/`, `templates/`, `static/`, data helpers.
5 | - `docs/` – Sphinx docs (`make html`).
6 | - `docker/`, `docker-compose*.yml`, `papersorter-cli` – containerized runtime.
7 | - `migrations/`, `SQL_SCHEMA.sql` – database schema/migrations.
8 | - `examples/`, `tools/`, `notebook/` – scripts and prototypes.
9 | - `config.yml` – root config (often a symlink); do not commit secrets.
10 |
11 | ## Build, Test, and Development Commands
12 | - Create env: `python -m venv .venv && source .venv/bin/activate && pip install -e ".[dev]"`
13 | - Initialize DB: `papersorter init` (Docker: `./papersorter-cli init`).
14 | - Run web locally: `papersorter serve --debug --port 5001`.
15 | - Update/predict/train: `papersorter update`, `papersorter predict`, `papersorter train --name "Model v1"`.
16 | - Lint/format/types: `black PaperSorter/`, `flake8 PaperSorter/`, `mypy PaperSorter/`.
17 | - Tests: `pytest` (optional coverage: `pytest --cov=PaperSorter` if `pytest-cov` installed).
18 | - Docs: `cd docs && make html`.
19 |
20 | ## Coding Style & Naming Conventions
21 | - Python 3.8+; PEP 8 with Black formatting (88 cols default).
22 | - Lint with Flake8; type hints required on public APIs; keep `mypy` clean.
23 | - Naming: modules/functions/vars `snake_case`; classes `PascalCase`; constants `UPPER_CASE`.
24 | - Keep modules focused; prefer explicit imports; add docstrings for non-trivial functions.
25 |
26 | ## Testing Guidelines
27 | - Framework: Pytest. Place tests under `tests/` mirroring `PaperSorter/` paths.
28 | - File naming: `tests/test_*.py`; use fixtures and fakes over hitting real services.
29 | - Database: prefer isolated test DB or mocks; avoid modifying prod schemas.
30 | - Aim for coverage on new/changed code; include CLI and key branches.
31 |
32 | ## Commit & Pull Request Guidelines
33 | - Commits: imperative, concise subject (e.g., "Fix event logging in update task").
34 | - Include rationale in body when behavior changes; reference issues (`Fixes #123`).
35 | - PRs must: describe scope and impact, include screenshots for UI, sample CLI output/logs for tasks, update docs/CHANGELOG when user-facing, note migration impacts.
36 | - CI hygiene: run `black`, `flake8`, `mypy`, and `pytest` locally before opening PRs.
37 |
38 | ## Security & Configuration Tips
39 | - Do not commit secrets. Use `.env` (copy from `.env.example`) and `config.yml` locally; keep lab-specific configs (e.g., `qbio/`) out of PRs unless intended.
40 | - Validate external API keys via environment/config; never hardcode.
41 | - For Docker, prefer `docker-compose up -d` and manage settings in `.env`.
42 |
--------------------------------------------------------------------------------
/PaperSorter/web/models/scholarly_article.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Scholarly article item model for web interface."""
25 |
26 | from datetime import datetime
27 | from ...providers import FeedItem
28 | from ...providers.scholarly_database import ScholarlyArticle
29 |
30 |
31 | class ScholarlyArticleItem(FeedItem):
32 | """Item model for scholarly articles from any database provider."""
33 |
34 | def __init__(self, article: ScholarlyArticle):
35 | """Initialize from a ScholarlyArticle object."""
36 | self.article = article
37 |
38 | # Extract content with tldr fallback
39 | content = article.abstract
40 | if not content and article.tldr:
41 | content = f"(tl;dr) {article.tldr}"
42 | elif article.tldr:
43 | # Prepend tldr if available
44 | content = f"(tl;dr) {article.tldr}\n\n{content}"
45 |
46 | # Use publication date or current date
47 | published_datetime = article.publication_date or datetime.now()
48 |
49 | # Initialize parent FeedItem
50 | super().__init__(
51 | external_id=article.unique_id,
52 | title=article.title,
53 | content=content or "",
54 | author=article.format_authors(),
55 | origin="", # Source will be set by caller when adding
56 | journal=article.venue or "Unknown",
57 | link=article.url or "",
58 | published=published_datetime,
59 | )
60 |
61 | # Store raw article for access to all fields
62 | self.raw_article = article
63 |
--------------------------------------------------------------------------------
/papersorter-cli:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # PaperSorter CLI wrapper for Docker
3 | # This script allows running PaperSorter commands from the host system
4 | # against the dockerized instance
5 |
6 | set -e
7 |
8 | # Colors for output
9 | RED='\033[0;31m'
10 | GREEN='\033[0;32m'
11 | YELLOW='\033[1;33m'
12 | NC='\033[0m' # No Color
13 |
14 | # Check if docker-compose is available
15 | if ! command -v docker &> /dev/null; then
16 | echo -e "${RED}Error: Docker is not installed or not in PATH${NC}"
17 | exit 1
18 | fi
19 |
20 | # Check if we're in the right directory (where docker-compose.yml exists)
21 | if [ ! -f "docker-compose.yml" ]; then
22 | echo -e "${RED}Error: docker-compose.yml not found in current directory${NC}"
23 | echo "Please run this script from the PaperSorter root directory"
24 | exit 1
25 | fi
26 |
27 | # Check if the web container is running
28 | if ! docker-compose ps --services --filter "status=running" | grep -q "^web$"; then
29 | echo -e "${YELLOW}Warning: PaperSorter web container is not running${NC}"
30 | echo "Starting services..."
31 | docker-compose up -d
32 |
33 | # Wait for services to be ready
34 | echo "Waiting for services to be ready..."
35 | sleep 5
36 |
37 | # Wait for database to be ready
38 | for i in {1..30}; do
39 | if docker-compose exec -T web pg_isready -h postgres -U papersorter &> /dev/null; then
40 | echo -e "${GREEN}Services are ready!${NC}"
41 | break
42 | fi
43 | if [ $i -eq 30 ]; then
44 | echo -e "${RED}Error: Services failed to start${NC}"
45 | exit 1
46 | fi
47 | echo -n "."
48 | sleep 2
49 | done
50 | echo
51 | fi
52 |
53 | # Special handling for certain commands
54 | case "$1" in
55 | "logs")
56 | # Show logs from web container
57 | docker-compose logs -f web
58 | ;;
59 | "shell")
60 | # Open interactive shell in web container
61 | docker-compose exec web bash
62 | ;;
63 | "db-shell")
64 | # Open PostgreSQL shell
65 | docker-compose exec postgres psql -U papersorter papersorter
66 | ;;
67 | "status")
68 | # Show status of all services
69 | docker-compose ps
70 | ;;
71 | "restart")
72 | # Restart all services
73 | docker-compose restart
74 | ;;
75 | "update-image")
76 | # Rebuild and update Docker images
77 | echo "Rebuilding PaperSorter images..."
78 | docker-compose build --no-cache
79 | docker-compose up -d
80 | ;;
81 | *)
82 | # Pass through to papersorter command in container
83 | # Use -T flag to disable TTY allocation for non-interactive commands
84 | docker-compose exec -T web papersorter "$@"
85 | ;;
86 | esac
--------------------------------------------------------------------------------
/docs/tutorials/index.rst:
--------------------------------------------------------------------------------
1 | ===================
2 | Tutorials
3 | ===================
4 |
5 | Step-by-step tutorials for common PaperSorter integrations and advanced use cases. These hands-on guides walk you through real-world scenarios with detailed instructions and examples.
6 |
7 | Each tutorial is designed to be self-contained and includes all necessary configuration files, code snippets, and troubleshooting tips.
8 |
9 | What You'll Find Here
10 | =====================
11 |
12 | Integration Guides
13 | ------------------
14 |
15 | Learn how to connect PaperSorter with popular services and tools:
16 |
17 | - **Email Newsletter Setup**: Configure Gmail or other email providers
18 | - **Slack Integration**: Set up team notifications and channels
19 | - **Custom Embedding Models**: Use local or specialized embedding APIs
20 | - **Multi-Model Workflows**: Manage different models for different research areas
21 |
22 | These tutorials assume you have completed the :doc:`../getting-started/index` guide and have a working PaperSorter installation.
23 |
24 | .. toctree::
25 | :maxdepth: 2
26 |
27 | gmail-setup
28 | slack-integration
29 | custom-embeddings
30 | multi-model
31 |
32 | Tutorial Structure
33 | ==================
34 |
35 | Each tutorial follows a consistent format:
36 |
37 | **Prerequisites**
38 | What you need before starting (accounts, API keys, etc.)
39 |
40 | **Overview**
41 | What you'll accomplish and why it's useful
42 |
43 | **Step-by-Step Instructions**
44 | Detailed walkthrough with commands and configuration
45 |
46 | **Testing and Verification**
47 | How to confirm everything is working correctly
48 |
49 | **Troubleshooting**
50 | Common issues and their solutions
51 |
52 | **Next Steps**
53 | Related tutorials and advanced configurations
54 |
55 | Difficulty Levels
56 | =================
57 |
58 | 🟢 **Beginner**: Basic configuration and setup tasks
59 |
60 | 🟡 **Intermediate**: Requires some technical knowledge and customization
61 |
62 | 🔴 **Advanced**: Complex integrations requiring development skills
63 |
64 | Prerequisites
65 | =============
66 |
67 | Before starting any tutorial:
68 |
69 | - Complete the :doc:`../getting-started/quickstart` guide
70 | - Have a working PaperSorter installation
71 | - Access to necessary external services (Gmail, Slack, etc.)
72 | - Basic familiarity with configuration files and command-line tools
73 |
74 | Getting Help
75 | ============
76 |
77 | If you encounter issues:
78 |
79 | 1. Check the tutorial's troubleshooting section
80 | 2. Search existing GitHub issues
81 | 3. Ask for help in community discussions
82 |
83 | Contributing Tutorials
84 | ======================
85 |
86 | Have a useful integration or workflow? We welcome tutorial contributions!
87 |
88 | Check the development documentation for information on:
89 |
90 | - Tutorial writing guidelines
91 | - Documentation standards
92 | - Submission process
93 |
94 | Related Documentation
95 | =====================
96 |
97 | - :doc:`../user-guide/index` - Complete feature documentation
98 | - :doc:`../cli-reference/index` - Command reference
--------------------------------------------------------------------------------
/PaperSorter/providers/openai_client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Centralized OpenAI client management.
3 |
4 | This module exposes helper functions for retrieving shared OpenAI clients
5 | configured via the main PaperSorter configuration. Clients are cached per
6 | configuration section so that callers reuse authenticated sessions instead of
7 | recreating them throughout the codebase.
8 | """
9 |
10 | from __future__ import annotations
11 |
12 | from threading import RLock
13 | from typing import Any, Dict, Mapping, Optional, Tuple
14 |
15 | from openai import OpenAI
16 |
17 | from ..config import get_config
18 |
19 | _DEFAULT_BASE_URL = "https://api.openai.com/v1"
20 |
21 | # Cache initialized clients keyed by (section, api_key, base_url)
22 | _CLIENT_CACHE: Dict[Tuple[str, str, str], OpenAI] = {}
23 | _CACHE_LOCK = RLock()
24 |
25 |
26 | def _normalize_base_url(url: Optional[str]) -> str:
27 | if not url:
28 | return _DEFAULT_BASE_URL
29 | return url.rstrip("/") or _DEFAULT_BASE_URL
30 |
31 |
32 | def get_openai_client(
33 | section: str,
34 | cfg: Optional[Mapping[str, Any]] = None,
35 | *,
36 | optional: bool = False,
37 | ) -> Optional[OpenAI]:
38 | """Return a shared OpenAI client for the given configuration section.
39 |
40 | Args:
41 | section: Name of the configuration section (e.g., ``"summarization_api"``).
42 | cfg: Optional configuration mapping overriding the global config.
43 | optional: When ``True``, return ``None`` instead of raising if the
44 | section is missing or lacks credentials.
45 |
46 | Raises:
47 | ValueError: If the configuration section or API key is missing and
48 | ``optional`` is ``False``.
49 |
50 | Returns:
51 | An initialized :class:`~openai.OpenAI` client or ``None`` when optional.
52 | """
53 |
54 | config_source: Optional[Mapping[str, Any]] = cfg if cfg is not None else get_config().raw
55 | api_section = config_source.get(section) if config_source else None
56 |
57 | if not isinstance(api_section, Mapping):
58 | if optional:
59 | return None
60 | raise ValueError(f"Configuration section '{section}' is missing or invalid")
61 |
62 | api_config: Mapping[str, Any] = api_section
63 |
64 | api_key = api_config.get("api_key")
65 | if not isinstance(api_key, str) or not api_key.strip():
66 | if optional:
67 | return None
68 | raise ValueError(f"Configuration section '{section}' is missing 'api_key'")
69 |
70 | base_url_value = api_config.get("api_url")
71 | base_url = _normalize_base_url(base_url_value if isinstance(base_url_value, str) else None)
72 |
73 | cache_key = (section, api_key, base_url)
74 |
75 | with _CACHE_LOCK:
76 | client = _CLIENT_CACHE.get(cache_key)
77 | if client is None:
78 | client = OpenAI(api_key=api_key, base_url=base_url)
79 | _CLIENT_CACHE[cache_key] = client
80 |
81 | return client
82 |
83 |
84 | def reset_openai_client_cache() -> None:
85 | """Clear the cached OpenAI clients (useful in tests)."""
86 |
87 | with _CACHE_LOCK:
88 | _CLIENT_CACHE.clear()
89 |
--------------------------------------------------------------------------------
/PaperSorter/notification/base.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Base classes for notification providers."""
25 |
26 | from abc import ABC, abstractmethod
27 | import re
28 |
29 |
30 | class NotificationError(Exception):
31 | """Base exception for notification errors."""
32 |
33 | pass
34 |
35 |
36 | class NotificationProvider(ABC):
37 | """Abstract base class for notification providers."""
38 |
39 | @abstractmethod
40 | def send_notifications(self, items, message_options, base_url=None):
41 | """Send notifications for a batch of items.
42 |
43 | Args:
44 | items: List of dictionaries, each containing article information with keys:
45 | - id: Article ID
46 | - title: Article title
47 | - content: Article content/abstract
48 | - author: Article authors
49 | - origin: Source of the article
50 | - link: URL to the article
51 | - score: Prediction score (0.0 to 1.0)
52 | message_options: Additional options for the message
53 | - model_name: Name of the model used for scoring
54 | - channel_name: Name of the channel
55 | base_url: Base URL for web interface links
56 |
57 | Returns:
58 | List of (item_id, success) tuples indicating which items were sent successfully
59 |
60 | Raises:
61 | NotificationError: If sending fails completely
62 | """
63 | pass
64 |
65 | @staticmethod
66 | def normalize_text(text):
67 | """Normalize whitespace in text."""
68 | if not text:
69 | return ""
70 | return re.sub(r"\s+", " ", text).strip()
71 |
72 | @staticmethod
73 | def limit_text_length(text, limit):
74 | """Truncate text to specified length."""
75 | if not text:
76 | return ""
77 | if len(text) > limit:
78 | return text[: limit - 3] + "…"
79 | return text
80 |
--------------------------------------------------------------------------------
/PaperSorter/utils/template_filters.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Shared Jinja2 template filters for PaperSorter."""
25 |
26 | import html
27 | import re
28 | from markupsafe import Markup, escape
29 |
30 |
31 | def safe_html_filter(text):
32 | """
33 | Filter to allow only safe HTML tags in text content.
34 | Allows: i, b, em, strong, sup, sub tags while escaping everything else.
35 | """
36 | if not text:
37 | return text
38 |
39 | # First escape all HTML
40 | escaped_text = escape(text)
41 |
42 | # Define allowed tags and their replacements back to HTML
43 | allowed_tags = {
44 | r'<i>(.*?)</i>': r'\1 ',
45 | r'<b>(.*?)</b>': r'\1 ',
46 | r'<em>(.*?)</em>': r'\1 ',
47 | r'<strong>(.*?)</strong>': r'\1 ',
48 | r'<sup>(.*?)</sup>': r'\1 ',
49 | r'<sub>(.*?)</sub>': r'\1 ',
50 | }
51 |
52 | # Convert back allowed tags from escaped to HTML
53 | result = str(escaped_text)
54 | for pattern, replacement in allowed_tags.items():
55 | result = re.sub(pattern, replacement, result, flags=re.IGNORECASE | re.DOTALL)
56 |
57 | return Markup(result)
58 |
59 |
60 | def strip_html_filter(text):
61 | """
62 | Strip all HTML tags from text for use in page titles and meta tags.
63 | """
64 | if not text:
65 | return text
66 |
67 | # Remove all HTML tags using regex
68 | clean_text = re.sub(r'<[^>]+>', '', str(text))
69 |
70 | # Also decode HTML entities
71 | clean_text = html.unescape(clean_text)
72 |
73 | return clean_text
74 |
75 |
76 | def register_filters(jinja_env):
77 | """
78 | Register all custom template filters with a Jinja2 environment.
79 |
80 | Args:
81 | jinja_env: Jinja2 Environment instance
82 | """
83 | jinja_env.filters['safe_html'] = safe_html_filter
84 | jinja_env.filters['strip_html'] = strip_html_filter
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "papersorter"
7 | version = "0.9.0"
8 | description = "Intelligent academic paper recommendation system with ML-powered filtering and Slack notifications"
9 | readme = "README.md"
10 | requires-python = ">=3.8"
11 | license = {text = "MIT"}
12 | authors = [
13 | {name = "Hyeshik Chang", email = "hyeshik@snu.ac.kr"},
14 | ]
15 | maintainers = [
16 | {name = "Hyeshik Chang", email = "hyeshik@snu.ac.kr"},
17 | ]
18 | keywords = [
19 | "academic papers",
20 | "machine learning",
21 | "RSS feed",
22 | "research tools",
23 | "paper recommendation",
24 | "slack integration",
25 | "scientific literature"
26 | ]
27 | classifiers = [
28 | "Development Status :: 4 - Beta",
29 | "Environment :: Console",
30 | "Environment :: Web Environment",
31 | "Intended Audience :: Education",
32 | "Intended Audience :: Science/Research",
33 | "License :: OSI Approved :: MIT License",
34 | "Operating System :: OS Independent",
35 | "Programming Language :: Python :: 3",
36 | "Programming Language :: Python :: 3.8",
37 | "Programming Language :: Python :: 3.9",
38 | "Programming Language :: Python :: 3.10",
39 | "Programming Language :: Python :: 3.11",
40 | "Programming Language :: Python :: 3.12",
41 | "Topic :: Scientific/Engineering",
42 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
43 | "Topic :: Scientific/Engineering :: Information Analysis",
44 | ]
45 |
46 | dependencies = [
47 | "feedparser>=6.0",
48 | "numpy>=1.20",
49 | "openai>=1.30",
50 | "pandas>=2.0",
51 | "psycopg2-binary>=2.9",
52 | "pgvector>=0.2.0",
53 | "PyYAML>=6.0",
54 | "requests>=2.7.0",
55 | "scikit-learn>=1.4",
56 | "scipy>=1.10",
57 | "xgboost>2.0",
58 | "Flask>=2.0",
59 | "Flask-Login>=0.6.0",
60 | "Authlib>=1.2.0",
61 | "markdown2>=2.4.0",
62 | "tabulate>=0.9.0",
63 | ]
64 |
65 | [project.urls]
66 | Homepage = "https://github.com/ChangLabSNU/papersorter"
67 | Repository = "https://github.com/ChangLabSNU/papersorter"
68 | Documentation = "https://github.com/ChangLabSNU/papersorter#readme"
69 | "Bug Tracker" = "https://github.com/ChangLabSNU/papersorter/issues"
70 |
71 | [project.scripts]
72 | papersorter = "PaperSorter.__main__:main"
73 |
74 | [project.optional-dependencies]
75 | dev = [
76 | "pytest>=7.0",
77 | "black>=22.0",
78 | "flake8>=5.0",
79 | "mypy>=1.0",
80 | ]
81 | server = [
82 | "uwsgi>=2.0",
83 | ]
84 |
85 | [tool.setuptools]
86 | include-package-data = true
87 |
88 | [tool.setuptools.packages.find]
89 | include = ["PaperSorter*"]
90 | exclude = ["tests*", "notebook*", "tools*", "old*"]
91 |
92 | [tool.setuptools.package-data]
93 | PaperSorter = [
94 | "templates/*.html",
95 | "templates/email/*.html",
96 | "templates/email/*.txt",
97 | "static/favicon.ico",
98 | "static/manifest.json",
99 | "static/css/*.css",
100 | "static/css/components/.gitkeep",
101 | "static/js/*.js",
102 | "static/icons/*.png",
103 | "data/*.py",
104 | ]
105 |
--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | =================
2 | API Documentation
3 | =================
4 |
5 | This section provides comprehensive documentation for PaperSorter's internal APIs, modules, and extension points.
6 |
7 | Whether you're developing custom integrations, contributing to the project, or building extensions, this reference will help you understand PaperSorter's architecture and interfaces.
8 |
9 | Architecture Overview
10 | =====================
11 |
12 | PaperSorter is organized into several key components:
13 |
14 | - **Core Modules**: Database interfaces, embedding generation, and ML models
15 | - **Feed Providers**: Pluggable RSS/Atom feed processors
16 | - **Web Framework**: Flask-based REST API and user interface
17 | - **Notification System**: Multi-channel broadcast capabilities
18 | - **CLI Tasks**: Command-line interface implementations
19 |
20 | .. toctree::
21 | :maxdepth: 2
22 |
23 | modules
24 | database
25 | providers
26 | notifications
27 | web
28 |
29 | Key Interfaces
30 | ==============
31 |
32 | Database Layer
33 | --------------
34 |
35 | The database layer provides unified access to PostgreSQL with pgvector support:
36 |
37 | - **FeedDatabase**: Article metadata and user preferences
38 | - **EmbeddingDatabase**: Vector storage and similarity search
39 | - **Schema Management**: Migrations and table definitions
40 |
41 | Provider System
42 | ---------------
43 |
44 | Feed providers implement a common interface for content ingestion:
45 |
46 | - **BaseProvider**: Abstract interface for all feed sources
47 | - **RSSProvider**: RSS/Atom feed implementation
48 | - **Custom Providers**: Extension points for new content sources
49 |
50 | Web API
51 | -------
52 |
53 | RESTful endpoints organized by functional domain:
54 |
55 | - **Feeds API**: Article management and labeling
56 | - **Search API**: Text and semantic search capabilities
57 | - **Settings API**: Administrative configuration
58 | - **User API**: Preferences and personalization
59 |
60 | Extension Points
61 | ================
62 |
63 | Custom Feed Providers
64 | ----------------------
65 |
66 | Implement ``BaseProvider`` to add new content sources:
67 |
68 | .. code-block:: python
69 |
70 | from PaperSorter.providers.base import BaseProvider
71 |
72 | class CustomProvider(BaseProvider):
73 | def fetch_articles(self):
74 | # Implementation here
75 | pass
76 |
77 | Custom Notification Channels
78 | -----------------------------
79 |
80 | Extend the notification system for new delivery methods:
81 |
82 | .. code-block:: python
83 |
84 | from PaperSorter.notifications import BaseNotifier
85 |
86 | class CustomNotifier(BaseNotifier):
87 | def send(self, articles):
88 | # Implementation here
89 | pass
90 |
91 | API Conventions
92 | ===============
93 |
94 | - All APIs use consistent error handling and response formats
95 | - Database operations support transaction management
96 | - Configuration is injected via dependency injection patterns
97 | - Logging follows structured format for operational monitoring
98 |
99 | Related Resources
100 | =================
101 |
102 | - :doc:`../development/index` - Development and contributing guidelines
--------------------------------------------------------------------------------
/.github/workflows/claude-code-review.yml:
--------------------------------------------------------------------------------
1 | name: Claude Code Review
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize]
6 | # Optional: Only run on specific file changes
7 | # paths:
8 | # - "src/**/*.ts"
9 | # - "src/**/*.tsx"
10 | # - "src/**/*.js"
11 | # - "src/**/*.jsx"
12 |
13 | jobs:
14 | claude-review:
15 | # Optional: Filter by PR author
16 | # if: |
17 | # github.event.pull_request.user.login == 'external-contributor' ||
18 | # github.event.pull_request.user.login == 'new-developer' ||
19 | # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
20 |
21 | runs-on: ubuntu-latest
22 | permissions:
23 | contents: read
24 | pull-requests: read
25 | issues: read
26 | id-token: write
27 |
28 | steps:
29 | - name: Checkout repository
30 | uses: actions/checkout@v4
31 | with:
32 | fetch-depth: 1
33 |
34 | - name: Run Claude Code Review
35 | id: claude-review
36 | uses: anthropics/claude-code-action@beta
37 | with:
38 | claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
39 |
40 | # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1)
41 | # model: "claude-opus-4-1-20250805"
42 |
43 | # Direct prompt for automated review (no @claude mention needed)
44 | direct_prompt: |
45 | Please review this pull request and provide feedback on:
46 | - Code quality and best practices
47 | - Potential bugs or issues
48 | - Performance considerations
49 | - Security concerns
50 | - Test coverage
51 |
52 | Be constructive and helpful in your feedback.
53 |
54 | # Optional: Use sticky comments to make Claude reuse the same comment on subsequent pushes to the same PR
55 | # use_sticky_comment: true
56 |
57 | # Optional: Customize review based on file types
58 | # direct_prompt: |
59 | # Review this PR focusing on:
60 | # - For TypeScript files: Type safety and proper interface usage
61 | # - For API endpoints: Security, input validation, and error handling
62 | # - For React components: Performance, accessibility, and best practices
63 | # - For tests: Coverage, edge cases, and test quality
64 |
65 | # Optional: Different prompts for different authors
66 | # direct_prompt: |
67 | # ${{ github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' &&
68 | # 'Welcome! Please review this PR from a first-time contributor. Be encouraging and provide detailed explanations for any suggestions.' ||
69 | # 'Please provide a thorough code review focusing on our coding standards and best practices.' }}
70 |
71 | # Optional: Add specific tools for running tests or linting
72 | # allowed_tools: "Bash(npm run test),Bash(npm run lint),Bash(npm run typecheck)"
73 |
74 | # Optional: Skip review for certain conditions
75 | # if: |
76 | # !contains(github.event.pull_request.title, '[skip-review]') &&
77 | # !contains(github.event.pull_request.title, '[WIP]')
78 |
79 |
--------------------------------------------------------------------------------
/PaperSorter/web/models/semantic_scholar.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Semantic Scholar item model."""
25 |
26 | import uuid
27 | from datetime import datetime
28 | from ...providers import FeedItem
29 |
30 |
31 | class SemanticScholarItem(FeedItem):
32 | """Item model for Semantic Scholar papers."""
33 |
34 | def __init__(self, paper_info):
35 | self.paper_info = paper_info
36 | article_id = uuid.uuid3(uuid.NAMESPACE_URL, paper_info["url"])
37 |
38 | # Extract content with tldr fallback
39 | tldr = (
40 | ("(tl;dr) " + paper_info["tldr"]["text"])
41 | if paper_info["tldr"] and paper_info["tldr"]["text"]
42 | else ""
43 | )
44 | content = paper_info["abstract"] or tldr
45 |
46 | # Parse publication date
47 | published_datetime = None
48 | pdate = paper_info["publicationDate"]
49 | if pdate is not None:
50 | published_datetime = datetime(
51 | *list(map(int, paper_info["publicationDate"].split("-")))
52 | )
53 | else:
54 | published_datetime = datetime.now()
55 |
56 | # Initialize parent FeedItem
57 | super().__init__(
58 | external_id=str(article_id),
59 | title=paper_info["title"],
60 | content=content,
61 | author=", ".join([a["name"] for a in paper_info["authors"]]),
62 | origin="Semantic Scholar",
63 | journal=self.determine_journal(paper_info),
64 | link=paper_info["url"],
65 | published=published_datetime,
66 | )
67 |
68 | # Store additional attributes for compatibility
69 | self.href = self.link
70 | self.mediaUrl = self.link
71 | self.item_id = self.external_id # Alias for database compatibility
72 |
73 | def determine_journal(self, paper_info):
74 | if paper_info["journal"]:
75 | return paper_info["journal"]["name"]
76 | elif paper_info["venue"]:
77 | return paper_info["venue"]
78 | elif "ArXiv" in paper_info["externalIds"]:
79 | return "arXiv"
80 | else:
81 | return "Unknown"
82 |
--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to PaperSorter will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7 |
8 | ## [Unreleased]
9 |
10 | ### Added
11 | - Comprehensive Sphinx documentation with Read the Docs theme
12 | - Auto-generated API documentation from docstrings
13 | - CLI command reference with examples
14 | - Getting Started guides for new users
15 | - GitHub Actions workflow for documentation deployment
16 |
17 | ## [1.0.0] - 2025-01-16
18 |
19 | ### Added
20 | - Initial release of PaperSorter
21 | - RSS/Atom feed support for paper ingestion
22 | - Machine learning-based paper recommendation using XGBoost
23 | - PostgreSQL database with pgvector for embeddings
24 | - Web interface for paper labeling and management
25 | - Slack, Discord, and email notification support
26 | - OAuth authentication (Google, GitHub, ORCID)
27 | - Multi-model support for different research domains
28 | - Semantic search using embedding similarity
29 | - AI-powered summarization and poster generation
30 | - Admin interface for system configuration
31 | - Comprehensive CLI with task automation
32 | - Docker and Kubernetes deployment support
33 |
34 | ### Changed
35 | - Migrated from SQLite to PostgreSQL for better scalability
36 | - Improved embedding generation with configurable models
37 | - Enhanced web UI with responsive design
38 | - Optimized database queries for large datasets
39 |
40 | ### Fixed
41 | - Unicode handling in paper titles and abstracts
42 | - Memory leaks in long-running update processes
43 | - Race conditions in parallel feed processing
44 | - Authentication session management issues
45 |
46 | ## [0.9.0] - 2024-12-01 (Beta)
47 |
48 | ### Added
49 | - Beta release for testing
50 | - Core functionality implementation
51 | - Basic web interface
52 | - Initial model training capabilities
53 |
54 | ### Known Issues
55 | - Limited to single-user deployment
56 | - No backup/restore functionality
57 | - Manual configuration required
58 |
59 | ## [0.5.0] - 2024-10-15 (Alpha)
60 |
61 | ### Added
62 | - Alpha release for internal testing
63 | - Proof of concept implementation
64 | - Basic RSS feed parsing
65 | - Simple XGBoost model training
66 |
67 | ---
68 |
69 | ## Version History Summary
70 |
71 | - **1.0.0** - Production-ready release with full feature set
72 | - **0.9.0** - Beta release with core functionality
73 | - **0.5.0** - Alpha release for testing
74 |
75 | ## Upgrade Notes
76 |
77 | ### Upgrading from 0.9.x to 1.0.0
78 |
79 | 1. **Database Migration Required**
80 | ```bash
81 | papersorter migrate --from 0.9
82 | ```
83 |
84 | 2. **Configuration Changes**
85 | - `google_oauth` renamed to `oauth.google`
86 | - New `web.base_url` setting required
87 | - `embedding_api.dimensions` now optional
88 |
89 | 3. **Breaking Changes**
90 | - CLI command structure reorganized
91 | - API endpoints moved to `/api/v1/` prefix
92 | - Model file format updated (retrain required)
93 |
94 | ### Upgrading from 0.5.x to 1.0.0
95 |
96 | Complete reinstallation recommended due to extensive changes.
97 |
98 | ## Support
99 |
100 | For upgrade assistance:
101 | - Issues: https://github.com/ChangLabSNU/papersorter/issues
--------------------------------------------------------------------------------
/PaperSorter/notification/factory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Factory for creating notification providers based on webhook URL."""
25 |
26 | from urllib.parse import urlparse
27 | from ..log import log
28 | from .slack import SlackProvider
29 | from .discord import DiscordProvider
30 | from .email import EmailProvider
31 |
32 |
33 | def create_notification_provider(webhook_url):
34 | """Create appropriate notification provider based on webhook URL.
35 |
36 | Automatically detects the webhook type based on the URL scheme or hostname:
37 | - URLs starting with 'mailto:' -> EmailProvider
38 | - Hostnames ending with 'slack.com' -> SlackProvider
39 | - Hostnames ending with 'discord.com' or 'discordapp.com' -> DiscordProvider
40 |
41 | Args:
42 | webhook_url: The webhook URL to analyze
43 | config_path: Path to configuration file (for email provider)
44 |
45 | Returns:
46 | NotificationProvider: Appropriate provider instance
47 |
48 | Raises:
49 | ValueError: If webhook URL is invalid or empty
50 | """
51 | if not webhook_url:
52 | raise ValueError("Webhook URL cannot be empty")
53 |
54 | # Check for mailto: URLs first
55 | if webhook_url.startswith("mailto:"):
56 | log.debug(f"Detected email notification: {webhook_url}")
57 | return EmailProvider(webhook_url)
58 |
59 | # Parse the URL to get hostname
60 | try:
61 | parsed = urlparse(webhook_url)
62 | hostname = parsed.hostname or ""
63 | except Exception as e:
64 | raise ValueError(f"Invalid webhook URL: {e}")
65 |
66 | if not hostname:
67 | raise ValueError(f"Could not extract hostname from URL: {webhook_url}")
68 |
69 | # Determine provider based on hostname
70 | hostname_lower = hostname.lower()
71 |
72 | if hostname_lower.endswith("slack.com"):
73 | log.debug(f"Detected Slack webhook: {hostname}")
74 | return SlackProvider(webhook_url)
75 | elif hostname_lower.endswith("discord.com") or hostname_lower.endswith(
76 | "discordapp.com"
77 | ):
78 | log.debug(f"Detected Discord webhook: {hostname}")
79 | return DiscordProvider(webhook_url)
80 | else:
81 | # Default to Slack for backward compatibility
82 | log.warning(
83 | f"Unknown webhook hostname '{hostname}', defaulting to Slack provider"
84 | )
85 | return SlackProvider(webhook_url)
86 |
--------------------------------------------------------------------------------
/PaperSorter/static/css/pages/paper_detail_similar.css:
--------------------------------------------------------------------------------
1 | /* Similar section styles embedded in paper details */
2 |
3 | .summary-section {
4 | background: var(--bg-card);
5 | border-radius: 8px;
6 | padding: 20px;
7 | margin-bottom: 20px;
8 | box-shadow: var(--shadow-subtle);
9 | }
10 |
11 | .similar-section-header {
12 | display: flex;
13 | align-items: center;
14 | justify-content: space-between;
15 | gap: 16px;
16 | flex-wrap: wrap;
17 | margin-bottom: 12px;
18 | }
19 |
20 | .similar-section-actions {
21 | display: flex;
22 | align-items: center;
23 | gap: 8px;
24 | margin-left: auto;
25 | }
26 |
27 | .similar-section-actions .btn {
28 | white-space: nowrap;
29 | }
30 |
31 | .similar-section-actions .btn-generate.btn-sm {
32 | padding: 6px 16px;
33 | font-size: 14px;
34 | border-radius: var(--radius-base);
35 | }
36 |
37 | .summary-initial {
38 | display: flex;
39 | justify-content: center;
40 | align-items: center;
41 | min-height: 80px;
42 | text-align: center;
43 | }
44 |
45 | .summary-placeholder {
46 | margin: 0;
47 | color: var(--text-secondary);
48 | font-size: 14px;
49 | }
50 |
51 | .summary-header {
52 | display: flex;
53 | justify-content: space-between;
54 | align-items: center;
55 | margin-bottom: 0; /* align content to top */
56 | }
57 |
58 | .summary-header h3 {
59 | margin: 0;
60 | color: var(--similar-summary-header-color);
61 | font-size: 20px;
62 | }
63 |
64 | .summary-text { font-size: 15px; line-height: 1.8; color: var(--similar-summary-text-color); }
65 | .summary-text strong { color: var(--similar-summary-strong-color); }
66 | /* Align list indentation with similar_articles.html */
67 | .summary-text ul,
68 | .summary-text ol {
69 | margin: 10px 0;
70 | padding-left: 25px;
71 | }
72 |
73 | .summary-text ul ul,
74 | .summary-text ol ol,
75 | .summary-text ul ol,
76 | .summary-text ol ul {
77 | margin-top: 5px;
78 | padding-left: 25px; /* incremental per level */
79 | }
80 |
81 | .summary-disclaimer {
82 | margin-top: 16px;
83 | padding: 10px 14px;
84 | background-color: var(--similar-disclaimer-bg);
85 | border-left: 3px solid var(--similar-disclaimer-border);
86 | border-radius: 4px;
87 | }
88 |
89 | .poster-content { width: 100%; margin-top: 16px; }
90 | .poster-iframe {
91 | width: 100%;
92 | height: 800px;
93 | border: 1px solid var(--similar-disclaimer-border);
94 | border-radius: 8px;
95 | box-shadow: var(--shadow-subtle);
96 | }
97 |
98 | .poster-actions { display: flex; gap: 10px; margin-bottom: 12px; justify-content: flex-end; }
99 |
100 | .btn-print { display: inline-flex; align-items: center; gap: 8px; }
101 |
102 | #similarFeedsList,
103 | .similarFeedsList {
104 | padding-left: 0;
105 | padding-right: 0;
106 | margin-left: 0;
107 | margin-right: 0;
108 | }
109 |
110 | .similarity-header {
111 | background: var(--bg-table-header);
112 | padding: 15px 20px;
113 | border-bottom: 1px solid var(--border-light);
114 | font-weight: bold;
115 | color: var(--text-primary);
116 | font-size: 16px;
117 | }
118 |
119 | #similar-section hr {
120 | border: none;
121 | border-top: 1px solid var(--border-medium);
122 | margin: 20px 0 16px;
123 | }
124 |
125 | #similar-section .summary-loading .spinner,
126 | #similar-section .poster-loading .spinner {
127 | width: 40px;
128 | height: 40px;
129 | margin: 0 auto 20px;
130 | border: 4px solid var(--similar-loading-spinner-border);
131 | border-top: 4px solid var(--similar-btn-generate-bg);
132 | border-radius: 50%;
133 | animation: spin 1s linear infinite;
134 | }
135 |
--------------------------------------------------------------------------------
/PaperSorter/cli/context.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Command context management for PaperSorter CLI."""
25 |
26 | from typing import Optional
27 |
28 | from ..config import get_config
29 | from ..db import DatabaseManager
30 |
31 |
32 | class CommandContext:
33 | """Context object passed to all commands."""
34 |
35 | def __init__(self, log_file: Optional[str] = None, quiet: bool = False):
36 | """
37 | Initialize command context.
38 |
39 | Args:
40 | log_file: Optional log file path
41 | quiet: Whether to suppress output
42 | """
43 | self.log_file = log_file
44 | self.quiet = quiet
45 | self._config = None
46 | self._db_manager = None
47 | self._db = None
48 | self._embedding_db = None
49 |
50 | @property
51 | def config(self) -> dict:
52 | """Load and cache configuration."""
53 | if self._config is None:
54 | self._config = get_config().raw
55 | return self._config
56 |
57 | @property
58 | def db_manager(self) -> DatabaseManager:
59 | """Return a pooled database manager."""
60 | if self._db_manager is None:
61 | db_config = self.config["db"]
62 | self._db_manager = DatabaseManager.from_config(
63 | db_config,
64 | application_name="papersorter-cli",
65 | )
66 | return self._db_manager
67 |
68 | @property
69 | def db(self):
70 | """Get database connection (lazy loading)."""
71 | if self._db is None:
72 | from ..feed_database import FeedDatabase
73 | self._db = FeedDatabase(db_manager=self.db_manager)
74 | return self._db
75 |
76 | @property
77 | def embedding_db(self):
78 | """Get embedding database connection (lazy loading)."""
79 | if self._embedding_db is None:
80 | from ..embedding_database import EmbeddingDatabase
81 | self._embedding_db = EmbeddingDatabase(db_manager=self.db_manager)
82 | return self._embedding_db
83 |
84 | def cleanup(self):
85 | """Clean up resources."""
86 | if self._db is not None:
87 | self._db.close()
88 | self._db = None
89 | if self._embedding_db is not None:
90 | self._embedding_db.close()
91 | self._embedding_db = None
92 | if self._db_manager is not None:
93 | self._db_manager.close()
94 | self._db_manager = None
95 |
--------------------------------------------------------------------------------
/PaperSorter/cli/parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Main parser creation for PaperSorter CLI."""
25 |
26 | import argparse
27 | from .base import registry
28 | from .context import CommandContext
29 | from ..config import get_config
30 | from ..__version__ import __version__
31 |
32 |
33 | def create_parser() -> argparse.ArgumentParser:
34 | """Create the main argument parser."""
35 | parser = argparse.ArgumentParser(
36 | prog='papersorter',
37 | description='Intelligent academic paper recommendation system',
38 | formatter_class=argparse.RawDescriptionHelpFormatter,
39 | add_help=False # We'll add custom help
40 | )
41 |
42 | # Add help manually to support both -h and --help
43 | parser.add_argument(
44 | '-h', '--help',
45 | action='help',
46 | help='Show this help message and exit'
47 | )
48 |
49 | # Add version option
50 | parser.add_argument(
51 | '--version',
52 | action='version',
53 | version=f'PaperSorter, version {__version__}'
54 | )
55 |
56 | return parser
57 |
58 |
59 | def execute_command(args: argparse.Namespace) -> int:
60 | """Execute the parsed command."""
61 | if not hasattr(args, 'command_handler'):
62 | return 1
63 |
64 | try:
65 | if getattr(args, 'config', None):
66 | get_config(args.config)
67 | else:
68 | get_config()
69 | except Exception:
70 | # Defer errors to individual commands where appropriate
71 | pass
72 |
73 | # Create context (no config argument)
74 | context = CommandContext(
75 | log_file=getattr(args, 'log_file', None),
76 | quiet=getattr(args, 'quiet', False)
77 | )
78 |
79 | try:
80 | # Execute the command
81 | return args.command_handler.handle(args, context)
82 | finally:
83 | # Clean up resources
84 | context.cleanup()
85 |
86 |
87 | def main(argv=None):
88 | """Main entry point for the CLI."""
89 | parser = create_parser()
90 |
91 | # Create subparsers for all registered commands
92 | registry.create_subparsers(parser)
93 |
94 | # Parse arguments
95 | args = parser.parse_args(argv)
96 |
97 | # If no command specified, show help
98 | if not hasattr(args, 'command') or args.command is None:
99 | parser.print_help()
100 | return 0
101 |
102 | # Execute the command
103 | return execute_command(args)
104 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # PaperSorter Documentation
2 |
3 | This directory contains the Sphinx-based documentation for PaperSorter.
4 |
5 | ## Quick Start
6 |
7 | ### Prerequisites
8 |
9 | Install the documentation dependencies:
10 |
11 | ```bash
12 | pip install -r requirements.txt
13 | ```
14 |
15 | ### Building Documentation
16 |
17 | #### Build HTML Documentation
18 |
19 | ```bash
20 | make html
21 | ```
22 |
23 | The built documentation will be in `_build/html/`. Open `_build/html/index.html` in your browser to view it.
24 |
25 | #### Live Development Server
26 |
27 | For development with automatic rebuilds:
28 |
29 | ```bash
30 | make livehtml
31 | ```
32 |
33 | This will start a server at http://localhost:8000 that automatically rebuilds when you make changes.
34 |
35 | ### Alternative Build Methods
36 |
37 | Using the build script:
38 |
39 | ```bash
40 | ./build.sh html # Build HTML
41 | ./build.sh serve # Build and serve locally
42 | ./build.sh live # Live reload for development
43 | ./build.sh all # Build everything (HTML, PDF, check links)
44 | ```
45 |
46 | Using Sphinx directly:
47 |
48 | ```bash
49 | sphinx-build -b html . _build/html
50 | ```
51 |
52 | ## Documentation Structure
53 |
54 | ```
55 | docs/
56 | ├── getting-started/ # Quick start guides for new users
57 | ├── user-guide/ # Detailed user documentation
58 | ├── admin-guide/ # System administration guides
59 | ├── cli-reference/ # Command-line interface documentation
60 | ├── api/ # API documentation (auto-generated)
61 | ├── development/ # Developer guides
62 | ├── tutorials/ # Step-by-step tutorials
63 | ├── reference/ # Reference materials
64 | ├── conf.py # Sphinx configuration
65 | ├── index.rst # Main documentation entry point
66 | ├── requirements.txt # Documentation dependencies
67 | └── build.sh # Build automation script
68 | ```
69 |
70 | ## Contributing to Documentation
71 |
72 | 1. **Edit Markdown/RST Files**: Most documentation is in Markdown format for easy editing
73 | 2. **API Documentation**: Update docstrings in Python code; they're auto-included
74 | 3. **Build Locally**: Always build and preview your changes before submitting
75 | 4. **Check Links**: Run `make linkcheck` to verify all links work
76 |
77 | ## Deployment
78 |
79 | ### GitHub Pages
80 |
81 | The documentation is automatically deployed to GitHub Pages when changes are pushed to the main branch:
82 |
83 | 1. GitHub Actions builds the documentation
84 | 2. Deploys to the `gh-pages` branch
85 | 3. Available at: https://qbio.io/PaperSorter/
86 |
87 | ### Manual Deployment
88 |
89 | ```bash
90 | ./build.sh deploy
91 | ```
92 |
93 | ## Troubleshooting
94 |
95 | ### Common Issues
96 |
97 | **Import Errors in API Documentation**
98 | - Ensure PaperSorter is installed: `pip install -e ..`
99 | - Check that all dependencies are installed
100 |
101 | **Broken Links**
102 | - Run `make linkcheck` to identify broken links
103 | - Fix references in the source files
104 |
105 | **Build Warnings**
106 | - Missing toctree references: Create the missing files or remove references
107 | - Duplicate descriptions: Add `:no-index:` directive to one instance
108 |
109 | ## Documentation Standards
110 |
111 | - Use **Markdown** for general documentation
112 | - Use **reStructuredText** for complex formatting and directives
113 | - Follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) for docstrings
114 | - Include code examples wherever possible
115 | - Keep line length under 100 characters for better readability
116 |
117 | ## License
118 |
119 | The documentation is licensed under the same terms as PaperSorter (MIT License).
120 |
--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
1 | =================
2 | Reference
3 | =================
4 |
5 | Comprehensive technical reference documentation for PaperSorter. This section provides detailed specifications, schemas, and reference materials for advanced users and developers.
6 |
7 | Use this section when you need precise technical details about configuration options, database structures, environment variables, or terminology.
8 |
9 | Contents
10 | ========
11 |
12 | Technical Specifications
13 | ------------------------
14 |
15 | - **Configuration Reference**: Complete list of all configuration options with types, defaults, and descriptions
16 | - **Database Schema**: Full PostgreSQL schema including tables, indexes, and relationships
17 | - **Environment Variables**: All supported environment variables and their effects
18 | - **Glossary**: Definitions of terms and concepts used throughout PaperSorter
19 |
20 | .. toctree::
21 | :maxdepth: 2
22 |
23 | configuration-reference
24 | database-schema
25 | environment-variables
26 | glossary
27 |
28 | Quick Reference
29 | ===============
30 |
31 | Configuration Files
32 | -------------------
33 |
34 | Primary configuration is stored in ``config.yml`` with these main sections:
35 |
36 | - ``db``: Database connection settings
37 | - ``web``: Web interface configuration
38 | - ``oauth``: Authentication provider settings
39 | - ``embedding_api``: Embedding generation API
40 | - ``summarization_api``: Text summarization API
41 | - ``scholarly_database``: Academic database integration
42 |
43 | Database Tables
44 | ---------------
45 |
46 | Core tables in the PostgreSQL schema:
47 |
48 | - ``feeds``: Article metadata and content
49 | - ``embeddings``: Vector embeddings using pgvector
50 | - ``preferences``: User ratings and labels
51 | - ``predicted_preferences``: ML model predictions
52 | - ``broadcasts``: Notification queue and history
53 | - ``users``: User accounts and settings
54 | - ``channels``: Notification channel configuration
55 |
56 | API Endpoints
57 | -------------
58 |
59 | Web API organization:
60 |
61 | - ``/api/feeds/``: Article management operations
62 | - ``/api/search/``: Search and discovery features
63 | - ``/api/settings/``: Administrative configuration
64 | - ``/api/user/``: User preferences and data
65 |
66 | CLI Commands
67 | ------------
68 |
69 | Main command categories:
70 |
71 | - ``papersorter update``: Content ingestion and processing
72 | - ``papersorter train``: Model training and evaluation
73 | - ``papersorter broadcast``: Notification delivery
74 | - ``papersorter serve``: Web interface server
75 |
76 | Version Compatibility
77 | =====================
78 |
79 | This reference documentation applies to:
80 |
81 | - **PaperSorter**: Version 1.0+
82 | - **Python**: 3.9+
83 | - **PostgreSQL**: 12+ with pgvector extension
84 | - **Dependencies**: See ``setup.py`` for specific version requirements
85 |
86 | Standards and Conventions
87 | =========================
88 |
89 | Configuration Format
90 | --------------------
91 |
92 | - **YAML**: Human-readable configuration files
93 | - **Environment Variables**: Override any configuration value
94 | - **Validation**: Schema validation with helpful error messages
95 |
96 | Database Design
97 | ---------------
98 |
99 | - **PostgreSQL**: ACID compliance and advanced features
100 | - **pgvector**: Efficient vector similarity search
101 | - **Migrations**: Version-controlled schema changes
102 |
103 | API Design
104 | ----------
105 |
106 | - **REST**: Standard HTTP methods and status codes
107 | - **JSON**: Consistent request/response format
108 | - **Authentication**: OAuth 2.0 with multiple providers
109 |
110 | Related Sections
111 | ================
112 |
113 | - :doc:`../api/index` - API implementation details
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # For the full list of built-in configuration values, see the documentation:
4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
5 |
6 | import os
7 | import sys
8 | sys.path.insert(0, os.path.abspath('..'))
9 |
10 | # -- Project information -----------------------------------------------------
11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
12 |
13 | project = 'PaperSorter'
14 | copyright = '2024-2025, Seoul National University'
15 | author = 'PaperSorter Team'
16 | release = '1.0.0'
17 | version = '1.0'
18 |
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 |
22 | extensions = [
23 | 'sphinx.ext.autodoc',
24 | 'sphinx.ext.napoleon',
25 | 'sphinx.ext.viewcode',
26 | 'sphinx.ext.intersphinx',
27 | 'sphinx.ext.todo',
28 | 'sphinx_rtd_theme',
29 | 'myst_parser',
30 | 'sphinx_click',
31 | 'sphinx_copybutton',
32 | 'sphinx_tabs.tabs',
33 | ]
34 |
35 | templates_path = ['_templates']
36 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
37 |
38 | language = 'en'
39 |
40 | # Support for both RST and Markdown
41 | source_suffix = {
42 | '.rst': 'restructuredtext',
43 | '.md': 'markdown',
44 | }
45 |
46 | # -- Options for HTML output -------------------------------------------------
47 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
48 |
49 | html_theme = 'sphinx_rtd_theme'
50 | html_static_path = ['_static']
51 | html_logo = None # We'll add this later if needed
52 | html_favicon = None # We'll add this later if needed
53 |
54 | # Read the Docs theme options
55 | html_theme_options = {
56 | 'logo_only': False,
57 | 'display_version': True,
58 | 'prev_next_buttons_location': 'bottom',
59 | 'style_external_links': False,
60 | 'collapse_navigation': False,
61 | 'sticky_navigation': True,
62 | 'navigation_depth': 4,
63 | 'includehidden': True,
64 | 'titles_only': False,
65 | }
66 |
67 | # -- Options for intersphinx extension ---------------------------------------
68 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration
69 |
70 | intersphinx_mapping = {
71 | 'python': ('https://docs.python.org/3', None),
72 | 'numpy': ('https://numpy.org/doc/stable/', None),
73 | 'pandas': ('https://pandas.pydata.org/docs/', None),
74 | 'sklearn': ('https://scikit-learn.org/stable/', None),
75 | }
76 |
77 | # -- Options for todo extension ----------------------------------------------
78 | # https://www.sphinx-doc.org/en/master/usage/extensions/todo.html#configuration
79 |
80 | todo_include_todos = True
81 |
82 | # -- Options for autodoc ------------------------------------------------------
83 | autodoc_default_options = {
84 | 'members': True,
85 | 'member-order': 'bysource',
86 | 'special-members': '__init__',
87 | 'undoc-members': True,
88 | 'exclude-members': '__weakref__'
89 | }
90 |
91 | # -- Options for MyST Markdown parser ----------------------------------------
92 | myst_enable_extensions = [
93 | "amsmath",
94 | "colon_fence",
95 | "deflist",
96 | "dollarmath",
97 | "fieldlist",
98 | "html_admonition",
99 | "html_image",
100 | "linkify",
101 | "replacements",
102 | "smartquotes",
103 | "strikethrough",
104 | "substitution",
105 | "tasklist",
106 | ]
107 |
108 | myst_heading_anchors = 3
109 |
110 | # -- Options for copy button -------------------------------------------------
111 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: "
112 | copybutton_prompt_is_regexp = True
113 |
--------------------------------------------------------------------------------
/docs/development/database.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | Database Integration
3 | ====================
4 |
5 | PaperSorter now ships with a centralized PostgreSQL access layer located at
6 | ``PaperSorter/db/manager.py``. The :class:`~PaperSorter.db.DatabaseManager`
7 | wraps a thread-safe psycopg2 connection pool and provides convenient context
8 | managers for opening sessions and cursors with consistent settings (pgvector
9 | registration, RealDict cursors, timeouts, and automatic rollbacks).
10 |
11 | Key Features
12 | ============
13 |
14 | - **Connection pooling**: ``DatabaseManager`` relies on
15 | :class:`~psycopg2.pool.ThreadedConnectionPool` to reuse connections across the
16 | application.
17 | - **pgvector registration**: Every connection registers the pgvector extension
18 | once and caches the result so callers do not need to repeat the boilerplate.
19 | - **Context-managed sessions**: ``db_manager.session()`` yields a
20 | ``DatabaseSession`` object that commits on success and rolls back on failure.
21 | - **Legacy compatibility**: ``db_manager.connect()`` returns a
22 | ``PooledConnection`` wrapper that mimics the old ``psycopg2.connect`` object
23 | so existing code can opt in gradually.
24 |
25 | Web Application Usage
26 | =====================
27 |
28 | ``create_app`` instantiates a single ``DatabaseManager`` and stores it on the
29 | Flask application config as ``app.config["db_manager"]``. Application code
30 | should always work inside ``db_manager.session()`` blocks rather than calling a
31 | legacy ``get_db_connection`` helper:
32 |
33 | .. code-block:: python
34 |
35 | from flask import current_app
36 |
37 | db_manager = current_app.config["db_manager"]
38 | with db_manager.session() as session:
39 | cursor = session.cursor(dict_cursor=True)
40 | cursor.execute("SELECT ...")
41 | rows = cursor.fetchall()
42 |
43 | The session automatically commits when the ``with`` block exits without an
44 | exception. Call ``session.commit()`` explicitly if you need to flush changes
45 | midway through a longer workflow.
46 |
47 | CLI and Task Usage
48 | ==================
49 |
50 | Tasks that previously invoked ``psycopg2.connect`` should construct a manager
51 | from configuration and use sessions to run their queries. For example, both
52 | ``papersorter models`` and ``papersorter predict`` now follow this pattern:
53 |
54 | .. code-block:: python
55 |
56 | from PaperSorter.db import DatabaseManager
57 |
58 | db_manager = DatabaseManager.from_config(db_config, application_name="papersorter-cli-models")
59 | try:
60 | with db_manager.session() as session:
61 | cursor = session.cursor(dict_cursor=True)
62 | cursor.execute("SELECT ...")
63 | # session.commit() when writes are performed
64 | finally:
65 | db_manager.close()
66 |
67 | Within long-running loops, pass the current ``session`` alongside the cursor so
68 | helpers can issue ``session.commit()`` (e.g., after ``execute_batch`` calls).
69 |
70 | Migration Tips
71 | ==============
72 |
73 | - Replace manual ``psycopg2.connect`` calls with ``DatabaseManager.from_config``.
74 | - Wrap database work in ``with db_manager.session():`` and request cursors via
75 | ``session.cursor(dict_cursor=True)`` when row dictionaries are needed.
76 | - Remove explicit ``conn.commit()`` / ``conn.rollback()`` pairs; the session
77 | handles transaction boundaries. Keep explicit ``session.commit()`` invocations
78 | when you intentionally persist work before a long sequence continues.
79 | - Legacy helpers like ``PaperSorter.feed_database.FeedDatabase`` still manage
80 | their own connections. They can be refactored incrementally to depend on the
81 | manager when practical.
82 |
83 | Adopting the shared manager provides predictable transaction handling, unified
84 | logging, and a single place to evolve database settings across the codebase.
85 |
--------------------------------------------------------------------------------
/docs/development/index.rst:
--------------------------------------------------------------------------------
1 | =================
2 | Development Guide
3 | =================
4 |
5 | Welcome to PaperSorter development! This guide helps contributors, maintainers, and developers who want to extend or modify PaperSorter.
6 |
7 | PaperSorter is built with extensibility in mind, featuring modular architecture that allows for custom feed providers, notification channels, and machine learning models.
8 |
9 | Getting Started
10 | ===============
11 |
12 | Development Environment
13 | -----------------------
14 |
15 | - Python 3.9+ with virtual environment
16 | - PostgreSQL with pgvector extension
17 | - Code editor with Python support
18 | - Git for version control
19 |
20 | Development Workflow
21 | --------------------
22 |
23 | 1. Fork and clone the repository
24 | 2. Set up development environment
25 | 3. Create feature branch
26 | 4. Write tests and documentation
27 | 5. Submit pull request
28 |
29 | .. toctree::
30 | :maxdepth: 2
31 |
32 | contributing
33 | architecture
34 | database
35 | testing
36 | plugins
37 | release-process
38 |
39 | Architecture Principles
40 | =======================
41 |
42 | Modularity
43 | ----------
44 |
45 | PaperSorter is designed as a collection of loosely coupled modules:
46 |
47 | - **Separation of concerns**: Each module has a single responsibility
48 | - **Dependency injection**: Configuration and dependencies are injected
49 | - **Plugin architecture**: New providers and notifiers can be added easily
50 |
51 | Extensibility
52 | -------------
53 |
54 | Key extension points:
55 |
56 | - **Feed Providers**: Add support for new content sources
57 | - **Notification Channels**: Implement custom delivery methods
58 | - **ML Models**: Experiment with different recommendation algorithms
59 | - **Web Interface**: Add new API endpoints and UI components
60 |
61 | Code Quality
62 | ============
63 |
64 | Standards
65 | ---------
66 |
67 | - **PEP 8**: Python code style guidelines
68 | - **Type Hints**: All public APIs include type annotations
69 | - **Documentation**: Comprehensive docstrings and user guides
70 | - **Testing**: Unit tests with good coverage
71 |
72 | Tools
73 | -----
74 |
75 | - **Black**: Code formatting
76 | - **Flake8**: Linting and style checking
77 | - **MyPy**: Static type checking
78 | - **Pytest**: Testing framework
79 |
80 | Development Commands
81 | ====================
82 |
83 | .. code-block:: bash
84 |
85 | # Setup development environment
86 | python -m venv venv
87 | source venv/bin/activate
88 | pip install -e ".[dev]"
89 |
90 | # Code quality checks
91 | black PaperSorter/
92 | flake8 PaperSorter/
93 | mypy PaperSorter/
94 |
95 | # Run tests
96 | pytest
97 | pytest --cov=PaperSorter
98 |
99 | # Build documentation
100 | cd docs
101 | make html
102 |
103 | Contributing Guidelines
104 | =======================
105 |
106 | Code Contributions
107 | ------------------
108 |
109 | - Follow existing code patterns and conventions
110 | - Include tests for new functionality
111 | - Update documentation for user-facing changes
112 | - Keep commits focused and well-described
113 |
114 | Documentation
115 | -------------
116 |
117 | - API documentation using docstrings
118 | - User guides for new features
119 | - Architecture documentation for significant changes
120 | - Examples and tutorials for complex workflows
121 |
122 | Community
123 | =========
124 |
125 | - **Issues**: Bug reports and feature requests
126 | - **Discussions**: General questions and ideas
127 | - **Pull Requests**: Code contributions and reviews
128 | - **Wiki**: Community-maintained documentation
129 |
130 | Related Resources
131 | =================
132 |
133 | - :doc:`../api/index` - Complete API reference
134 | - :doc:`../reference/index` - Technical specifications
135 | - :doc:`../admin-guide/index` - Deployment and operations
136 |
--------------------------------------------------------------------------------
/PaperSorter/providers/base.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Base interface for feed providers."""
25 |
26 | from abc import ABC, abstractmethod
27 | from typing import List, Dict, Optional, Iterator, Any
28 | from datetime import datetime
29 | from dataclasses import dataclass
30 |
31 |
32 | @dataclass
33 | class FeedItem:
34 | """Represents a single feed item/article."""
35 |
36 | external_id: str
37 | title: str
38 | content: Optional[str] = None
39 | author: Optional[str] = None
40 | origin: str = ""
41 | journal: Optional[str] = None
42 | link: Optional[str] = None
43 | published: datetime = None
44 |
45 | def __post_init__(self):
46 | if self.published is None:
47 | self.published = datetime.now()
48 |
49 |
50 | class FeedProvider(ABC):
51 | """Abstract base class for feed providers."""
52 |
53 | def __init__(self, config: Dict[str, Any]):
54 | """Initialize the provider with configuration."""
55 | self.config = config
56 |
57 | @abstractmethod
58 | def get_items(
59 | self,
60 | source: Dict[str, Any],
61 | limit: Optional[int] = None,
62 | since: Optional[datetime] = None,
63 | ) -> Iterator[List[FeedItem]]:
64 | """
65 | Retrieve feed items from a source.
66 |
67 | Args:
68 | source: Source configuration (from feed_sources table)
69 | limit: Maximum number of items to retrieve
70 | since: Only get items published after this date
71 |
72 | Yields:
73 | Lists of FeedItem objects (batched for efficiency)
74 | """
75 | pass
76 |
77 | @abstractmethod
78 | def update_source_timestamp(self, source_id: int, has_new_items: bool = False):
79 | """
80 | Update the last_checked timestamp and optionally last_updated for a source.
81 |
82 | Args:
83 | source_id: ID of the source in feed_sources table
84 | has_new_items: Whether new items were found from this source
85 | """
86 | pass
87 |
88 | @abstractmethod
89 | def get_sources(self, source_type: str) -> List[Dict[str, Any]]:
90 | """
91 | Get all sources of a specific type that need updating.
92 |
93 | Args:
94 | source_type: Type of sources to retrieve (e.g., 'rss')
95 |
96 | Returns:
97 | List of source dictionaries from feed_sources table
98 | """
99 | pass
100 |
101 | def validate_source(self, source: Dict[str, Any]) -> bool:
102 | """
103 | Validate that a source has required fields for this provider.
104 |
105 | Args:
106 | source: Source configuration to validate
107 |
108 | Returns:
109 | True if valid, False otherwise
110 | """
111 | return True
112 |
--------------------------------------------------------------------------------
/PaperSorter/providers/factory.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Factory for creating scholarly database providers."""
25 |
26 | from typing import Dict, Any, Optional
27 | from .scholarly_database import ScholarlyDatabaseProvider
28 | from .semantic_scholar import SemanticScholarProvider
29 | from .openalex import OpenAlexProvider
30 | from ..log import log
31 |
32 |
33 | class ScholarlyDatabaseFactory:
34 | """Factory for creating scholarly database provider instances."""
35 |
36 | # Available providers
37 | PROVIDERS = {
38 | "semantic_scholar": SemanticScholarProvider,
39 | "semanticscholar": SemanticScholarProvider, # Alias for backward compatibility
40 | "openalex": OpenAlexProvider,
41 | }
42 |
43 | @classmethod
44 | def create_provider(
45 | cls,
46 | provider_name: str,
47 | config: Dict[str, Any]
48 | ) -> Optional[ScholarlyDatabaseProvider]:
49 | """
50 | Create a scholarly database provider instance.
51 |
52 | Args:
53 | provider_name: Name of the provider (semantic_scholar, openalex)
54 | config: Provider configuration dictionary
55 |
56 | Returns:
57 | Provider instance if successful, None otherwise
58 | """
59 | # Normalize provider name
60 | provider_name = provider_name.lower().replace("-", "_")
61 |
62 | # Get provider class
63 | provider_class = cls.PROVIDERS.get(provider_name)
64 | if not provider_class:
65 | log.error(f"Unknown scholarly database provider: {provider_name}")
66 | log.info(f"Available providers: {', '.join(cls.PROVIDERS.keys())}")
67 | return None
68 |
69 | # Create provider instance
70 | try:
71 | provider = provider_class(config)
72 |
73 | # Check if provider is configured
74 | if not provider.is_configured():
75 | log.error(f"{provider.name} is not properly configured")
76 | if provider.requires_api_key:
77 | log.error("API key is required but not provided")
78 | return None
79 | return provider
80 |
81 | except Exception as e:
82 | log.error(f"Failed to create {provider_name} provider: {e}")
83 | return None
84 |
85 | @classmethod
86 | def list_providers(cls) -> Dict[str, bool]:
87 | """
88 | List available providers and their API key requirements.
89 |
90 | Returns:
91 | Dictionary mapping provider names to whether they require API keys
92 | """
93 | result = {}
94 | for name, provider_class in cls.PROVIDERS.items():
95 | # Create temporary instance to check requirements
96 | temp = provider_class({})
97 | result[name] = temp.requires_api_key
98 | return result
99 |
100 |
--------------------------------------------------------------------------------
/PaperSorter/config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """Centralized configuration loader for PaperSorter.
4 |
5 | This module provides a lightweight, process-wide configuration singleton
6 | loaded from YAML. Prefer importing and calling `get_config()` from any
7 | module that needs configuration values.
8 |
9 | Load precedence:
10 | - Explicit path provided to `get_config(path)` / `reload_config(path)`
11 | - Environment variables: `PAPERSORTER_CONFIG` or `PAPER_SORTER_CONFIG`
12 | - Default path: `./config.yml`
13 |
14 | Usage:
15 | from PaperSorter.config import get_config
16 | cfg = get_config()
17 | db_cfg = cfg.raw.get('db', {})
18 | """
19 |
20 | from __future__ import annotations
21 |
22 | import os
23 | import threading
24 | from dataclasses import dataclass
25 | from pathlib import Path
26 | from typing import Any, Dict, Optional
27 |
28 | import yaml
29 |
30 |
31 | _LOCK = threading.RLock()
32 | _CONFIG: Optional["Config"] = None
33 | _CONFIG_PATH: Optional[str] = None
34 |
35 |
36 | @dataclass
37 | class Config:
38 | """Simple configuration holder with convenience accessors."""
39 |
40 | raw: Dict[str, Any]
41 |
42 | def get(self, path: str, default: Any = None) -> Any:
43 | """Get a nested value using dotted path notation.
44 |
45 | Example: cfg.get('web.port', 5001)
46 | """
47 | cur: Any = self.raw
48 | for part in path.split('.'):
49 | if not isinstance(cur, dict) or part not in cur:
50 | return default
51 | cur = cur[part]
52 | return cur
53 |
54 |
55 | def _resolve_config_path(preferred: Optional[str]) -> str:
56 | if preferred:
57 | return str(preferred)
58 |
59 | env = os.environ.get("PAPERSORTER_CONFIG") or os.environ.get("PAPER_SORTER_CONFIG")
60 | if env:
61 | return env
62 |
63 | return "./config.yml"
64 |
65 |
66 | def _load_yaml_config(path: str, explicit: bool) -> Dict[str, Any]:
67 | p = Path(path)
68 | if not p.exists():
69 | if explicit:
70 | raise FileNotFoundError(f"Configuration file not found: {path}")
71 | # Fallback to empty config when using defaults
72 | return {}
73 |
74 | with p.open("r") as f:
75 | data = yaml.safe_load(f) or {}
76 | if not isinstance(data, dict):
77 | raise ValueError("Configuration root must be a mapping (YAML dict)")
78 | return data
79 |
80 |
81 | def _load_config(path: Optional[str], refresh: bool = False) -> Config:
82 | global _CONFIG, _CONFIG_PATH
83 | with _LOCK:
84 | if _CONFIG is not None and not refresh:
85 | return _CONFIG
86 |
87 | resolved = _resolve_config_path(path)
88 | # Treat as explicit if caller supplied a path or env var is set
89 | explicit = path is not None or os.environ.get("PAPERSORTER_CONFIG") is not None or os.environ.get("PAPER_SORTER_CONFIG") is not None
90 | raw = _load_yaml_config(resolved, explicit=explicit)
91 |
92 | _CONFIG = Config(raw=raw)
93 | _CONFIG_PATH = resolved
94 | return _CONFIG
95 |
96 |
97 | def get_config(path: Optional[str] = None) -> Config:
98 | """Return the process-wide Config instance, loading it if necessary.
99 |
100 | The first explicit path provided will be remembered for subsequent calls.
101 | """
102 | if path is not None:
103 | return _load_config(path, refresh=False)
104 | return _load_config(None, refresh=False)
105 |
106 |
107 | def reload_config(path: Optional[str] = None) -> Config:
108 | """Force reload the configuration from the given path or the last one used."""
109 | # If no path provided, use the last resolved path
110 | target = path if path is not None else _CONFIG_PATH
111 | return _load_config(target, refresh=True)
112 |
113 |
114 | def configured() -> bool:
115 | """Return True if a configuration has been loaded."""
116 | return _CONFIG is not None
117 |
118 |
119 | def set_config_for_testing(cfg: Config) -> None:
120 | """Override the global configuration (use in tests)."""
121 | global _CONFIG
122 | with _LOCK:
123 | _CONFIG = cfg
124 |
125 |
--------------------------------------------------------------------------------
/PaperSorter/tasks/serve.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Web server task for PaperSorter."""
25 |
26 | from ..log import log, initialize_logging
27 | from ..web import create_app
28 | from ..cli.base import BaseCommand, registry
29 | import argparse
30 |
31 |
32 | class ServeCommand(BaseCommand):
33 | """Serve web interface for article labeling."""
34 |
35 | name = 'serve'
36 | help = 'Serve web interface for article labeling and other tasks'
37 |
38 | def add_arguments(self, parser: argparse.ArgumentParser) -> None:
39 | """Add serve-specific arguments."""
40 | parser.add_argument(
41 | '--host',
42 | default='0.0.0.0',
43 | help='Host to bind to'
44 | )
45 | parser.add_argument(
46 | '--port',
47 | type=int,
48 | default=5001,
49 | help='Port to bind to'
50 | )
51 | parser.add_argument(
52 | '--debug',
53 | action='store_true',
54 | help='Enable debug mode'
55 | )
56 | parser.add_argument(
57 | '--skip-authentication',
58 | help='Skip OAuth authentication and auto-login as specified admin user (DEVELOPMENT ONLY)'
59 | )
60 | parser.add_argument(
61 | '--demo-mode',
62 | action='store_true',
63 | help='Grant admin privileges to all users (DEMONSTRATION ONLY)'
64 | )
65 |
66 | def handle(self, args: argparse.Namespace, context) -> int:
67 | """Execute the serve command."""
68 | initialize_logging('serve', args.log_file, args.quiet)
69 | try:
70 | main(
71 | config=args.config,
72 | host=args.host,
73 | port=args.port,
74 | debug=args.debug,
75 | log_file=args.log_file,
76 | quiet=args.quiet,
77 | skip_authentication=args.skip_authentication,
78 | demo_mode=args.demo_mode
79 | )
80 | return 0
81 | except Exception as e:
82 | log.error(f"Serve failed: {e}")
83 | return 1
84 |
85 | # Register the command
86 | registry.register(ServeCommand)
87 |
88 |
89 | def main(config, host, port, debug, log_file, quiet, skip_authentication, demo_mode=False):
90 | """Serve web interface for article labeling and other tasks."""
91 |
92 | if skip_authentication:
93 | log.warning(
94 | f"⚠️ AUTHENTICATION BYPASS ENABLED for user '{skip_authentication}' - DEVELOPMENT USE ONLY!"
95 | )
96 |
97 | if demo_mode:
98 | log.warning(
99 | "⚠️ DEMO MODE ENABLED: All users have admin privileges! - DEMONSTRATION USE ONLY!"
100 | )
101 |
102 | log.info(f"Starting web server on {host}:{port}")
103 |
104 | app = create_app(config, skip_authentication=skip_authentication, demo_mode=demo_mode)
105 |
106 | # Run the Flask app
107 | app.run(host=host, port=port, debug=debug)
108 |
--------------------------------------------------------------------------------
/PaperSorter/services/summarization.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """High-level helpers for LLM-backed article summarization."""
3 |
4 | from __future__ import annotations
5 |
6 | from typing import Sequence
7 |
8 | from ..providers.openai_client import get_openai_client
9 |
10 | SUMMARY_PROMPT_TEMPLATE = """You are an expert scientific literature analyst. Analyze the following collection of research articles and provide a focused summary.
11 |
12 | {articles}
13 |
14 | Start your response directly with the numbered sections below. Do not include any introductory sentences like "Here is my analysis" or "Based on the provided articles". Do not repeat the format instructions (like "2-3 sentences" or "3-4 bullet points") in your output. Begin immediately with:
15 |
16 | 1. **Common Themes**: Identify the main research areas connecting these articles in 2-3 sentences.
17 |
18 | 2. **Key Topics**: List the most significant concepts, methods, or findings that appear across multiple papers as 3-4 bullet points.
19 |
20 | 3. **Unique Contributions**: For each article, briefly state what distinguishes it from the others in one sentence. Reference articles using their author-year format (e.g., "Smith 2023 introduces...").
21 |
22 | 4. **Future Directions**: Based on these papers, provide 2-3 bullet points on the most promising research opportunities.
23 |
24 | Keep your response focused and actionable, using clear Markdown formatting. When referencing specific papers, use the author-year format provided in square brackets for each article."""
25 |
26 |
27 | class ArticleSummarizer:
28 | """Wraps OpenAI chat completion calls for article summaries."""
29 |
30 | def __init__(
31 | self,
32 | *,
33 | client,
34 | model: str,
35 | temperature: float = 0.7,
36 | max_tokens: int = 8000,
37 | timeout: float | None = None,
38 | ) -> None:
39 | self._client = client
40 | self._model = model
41 | self._temperature = temperature
42 | self._max_tokens = max_tokens
43 | self._timeout = timeout
44 |
45 | @classmethod
46 | def from_config(cls, config):
47 | api_config = config.get("summarization_api")
48 | if not isinstance(api_config, dict):
49 | return None
50 |
51 | client = get_openai_client("summarization_api", cfg=config, optional=True)
52 | if client is None:
53 | return None
54 |
55 | model = api_config.get("model", "gpt-4o-mini")
56 | temperature = float(api_config.get("temperature", 0.7))
57 | max_tokens = int(api_config.get("max_tokens", 8000))
58 | timeout = api_config.get("timeout")
59 | try:
60 | timeout_value = float(timeout) if timeout is not None else None
61 | except (TypeError, ValueError):
62 | timeout_value = None
63 |
64 | return cls(
65 | client=client,
66 | model=model,
67 | temperature=temperature,
68 | max_tokens=max_tokens,
69 | timeout=timeout_value,
70 | )
71 |
72 | def summarize(self, snippets: Sequence[str]) -> str:
73 | if not snippets:
74 | raise ValueError("No article snippets provided for summarization")
75 |
76 | articles_text = "\n\n---\n\n".join(snippets)
77 | prompt = SUMMARY_PROMPT_TEMPLATE.format(articles=articles_text)
78 |
79 | request_kwargs = {
80 | "model": self._model,
81 | "messages": [
82 | {
83 | "role": "system",
84 | "content": "You are an expert at analyzing and summarizing scientific literature.",
85 | },
86 | {"role": "user", "content": prompt},
87 | ],
88 | "temperature": self._temperature,
89 | "max_tokens": self._max_tokens,
90 | }
91 | if self._timeout is not None:
92 | request_kwargs["timeout"] = self._timeout
93 |
94 | response = self._client.chat.completions.create(**request_kwargs)
95 |
96 | message = response.choices[0].message.content
97 | if not message:
98 | raise RuntimeError("Empty response from summarization model")
99 |
100 | if not isinstance(message, str):
101 | message = str(message)
102 |
103 | return message
104 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. PaperSorter documentation master file
2 |
3 | ========================================
4 | PaperSorter Documentation
5 | ========================================
6 |
7 | .. image:: https://img.shields.io/badge/python-3.9+-blue.svg
8 | :target: https://www.python.org/downloads/
9 | :alt: Python Version
10 |
11 | .. image:: https://img.shields.io/badge/license-MIT-green.svg
12 | :target: https://opensource.org/licenses/MIT
13 | :alt: License
14 |
15 | **PaperSorter** is an intelligent academic paper recommendation system that uses machine learning to help researchers stay up-to-date with the latest research in their fields. It automatically fetches papers from RSS feeds, generates embeddings, and uses XGBoost to predict which papers will be most relevant to you.
16 |
17 | Key Features
18 | ============
19 |
20 | - 🤖 **Smart Filtering**: Machine learning-based paper recommendations
21 | - 📰 **Multi-Source Support**: RSS/Atom feeds, arXiv, and more
22 | - 🔔 **Flexible Notifications**: Slack, Discord, and email newsletters
23 | - 🎯 **Personalized Models**: Train custom models for different research areas
24 | - 🌐 **Web Interface**: User-friendly labeling and management interface
25 | - 🔍 **Semantic Search**: Find related papers using embedding similarity
26 | - 📄 **Search from PDF**: Select text from PDFs to find similar papers (Paper Connect)
27 |
28 | Quick Start
29 | ===========
30 |
31 | .. code-block:: bash
32 |
33 | # Install PaperSorter
34 | pip install -e .
35 |
36 | # Configure your settings
37 | cp config.example.yml config.yml
38 | # Edit config.yml with your database and API credentials
39 |
40 | # Fetch papers and generate embeddings
41 | papersorter update
42 |
43 | # Train your first model (after labeling ~100 papers)
44 | papersorter train
45 |
46 | # Send notifications
47 | papersorter broadcast
48 |
49 | Documentation Overview
50 | ======================
51 |
52 | .. toctree::
53 | :maxdepth: 2
54 | :caption: Getting Started
55 |
56 | getting-started/index
57 | getting-started/installation
58 | getting-started/quickstart
59 | getting-started/first-model
60 |
61 | .. toctree::
62 | :maxdepth: 2
63 | :caption: User Guide
64 |
65 | user-guide/index
66 | user-guide/configuration
67 | user-guide/feed-sources
68 | user-guide/training-models
69 | user-guide/notifications
70 | user-guide/search-from-pdf
71 | user-guide/web-interface
72 | user-guide/workflows
73 |
74 | .. toctree::
75 | :maxdepth: 2
76 | :caption: Administrator Guide
77 |
78 | admin-guide/index
79 | admin-guide/deployment
80 | admin-guide/database-setup
81 | admin-guide/backup-restore
82 | admin-guide/monitoring
83 | admin-guide/security
84 | admin-guide/troubleshooting
85 |
86 | .. toctree::
87 | :maxdepth: 2
88 | :caption: CLI Reference
89 |
90 | cli-reference/index
91 | cli-reference/commands
92 | cli-reference/examples
93 |
94 | .. toctree::
95 | :maxdepth: 2
96 | :caption: API Documentation
97 |
98 | api/index
99 | api/modules
100 | api/database
101 | api/providers
102 | api/notifications
103 | api/web
104 |
105 | .. toctree::
106 | :maxdepth: 2
107 | :caption: Development
108 |
109 | development/index
110 | development/contributing
111 | development/architecture
112 | development/testing
113 | development/plugins
114 | development/release-process
115 |
116 | .. toctree::
117 | :maxdepth: 2
118 | :caption: Tutorials
119 |
120 | tutorials/index
121 | tutorials/gmail-setup
122 | tutorials/slack-integration
123 | tutorials/custom-embeddings
124 | tutorials/multi-model
125 |
126 | .. toctree::
127 | :maxdepth: 2
128 | :caption: Reference
129 |
130 | reference/index
131 | reference/configuration-reference
132 | reference/database-schema
133 | reference/environment-variables
134 | reference/glossary
135 |
136 | .. toctree::
137 | :maxdepth: 1
138 | :caption: About
139 |
140 | changelog
141 | license
142 |
143 | Indices and Tables
144 | ==================
145 |
146 | * :ref:`genindex`
147 | * :ref:`modindex`
148 | * :ref:`search`
149 |
150 | Need Help?
151 | ==========
152 |
153 | - 📖 Check the documentation guides
154 | - 🐛 Report issues on `GitHub `_
155 | - 💬 Join our community discussions
156 |
157 | License
158 | =======
159 |
160 | PaperSorter is released under the MIT License. See the LICENSE file for details.
--------------------------------------------------------------------------------
/PaperSorter/templates/feedback_success.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}Feedback Recorded - {{ site_name }}{% endblock %}
4 |
5 | {% block header %}{% endblock %}
6 |
7 | {% block styles %}
8 |
9 |
114 | {% endblock %}
115 |
116 | {% block main_container %}
117 |
118 |
119 | {% if feedback_type == 'interested' %}👍{% else %}👎{% endif %}
120 |
121 |
122 |
Thank You!
123 |
124 |
125 | Your feedback has been recorded. You marked
126 | "{{ feed_title }}"
127 | as {{ feedback_type }} .
128 |
129 |
130 |
138 |
139 |
142 |
143 | {% endblock %}
144 |
--------------------------------------------------------------------------------
/PaperSorter/templates/settings.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block title %}Settings - {{ site_name }}{% endblock %}
4 | {% block header_title %}Settings {% endblock %}
5 |
6 | {% block header_actions %}
7 | ← Back to Papers
8 | {{ super() }}
9 | {% endblock %}
10 |
11 | {% block styles %}
12 |
103 | {% endblock %}
104 |
105 | {% block content %}
106 |
139 | {% endblock %}
140 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # PaperSorter Docker Environment Configuration
2 | # Copy this file to .env and fill in your values
3 |
4 | # ============================================
5 | # Database Configuration
6 | # ============================================
7 | POSTGRES_DB=papersorter
8 | POSTGRES_USER=papersorter
9 | POSTGRES_PASSWORD=changeme # CHANGE THIS!
10 |
11 | # ============================================
12 | # Application Configuration
13 | # ============================================
14 | # Flask secret key for session management (generate with: python -c "import secrets; print(secrets.token_hex(32))")
15 | FLASK_SECRET_KEY=your-secret-key-here
16 |
17 | # Site configuration
18 | SITE_NAME=PaperSorter
19 | BASE_URL=http://localhost # Change to https://your-domain.com for production
20 | DOMAIN=localhost # Change to your-domain.com for production
21 | ADMIN_EMAIL=admin@example.com # Used for Let's Encrypt SSL certificates
22 | DEFAULT_TIMEZONE=UTC # Default timezone for new users (e.g., America/New_York, Asia/Seoul)
23 | DEFAULT_DATE_FORMAT=MMM D, YYYY # Default date format (e.g., YYYY-MM-DD, DD/MM/YYYY)
24 | # Comma-separated list of admin emails or ORCID IDs
25 | ADMIN_USERS= # Example: admin@example.com,0000-0002-1825-0097@orcid.org
26 |
27 | # ============================================
28 | # OAuth Configuration
29 | # ============================================
30 | # Google OAuth (https://console.cloud.google.com/)
31 | GOOGLE_CLIENT_ID=
32 | GOOGLE_CLIENT_SECRET=
33 |
34 | # GitHub OAuth (https://github.com/settings/developers)
35 | GITHUB_CLIENT_ID=
36 | GITHUB_CLIENT_SECRET=
37 |
38 | # ORCID OAuth (https://orcid.org/developer-tools)
39 | ORCID_CLIENT_ID=
40 | ORCID_CLIENT_SECRET=
41 |
42 | # ============================================
43 | # API Keys
44 | # ============================================
45 | # Embedding API configuration
46 | EMBEDDING_API_KEY=
47 | EMBEDDING_API_URL=https://api.openai.com/v1 # Or your custom endpoint
48 | EMBEDDING_MODEL=text-embedding-3-large # Model name for embeddings
49 | EMBEDDING_DIMENSIONS= # Optional: dimensions (e.g., 1536 for pgvector HNSW indexing)
50 |
51 | # Summarization API (e.g., Gemini)
52 | SUMMARIZATION_API_KEY=
53 | SUMMARIZATION_API_URL=https://generativelanguage.googleapis.com/v1beta/openai
54 | SUMMARIZATION_MODEL=gemini-2.0-flash-thinking-exp-01-21 # Model for summarization
55 |
56 | # Scholarly Database Provider
57 | SCHOLARLY_PROVIDER=semantic_scholar # Options: semantic_scholar, openalex
58 | MATCH_DATE_TOLERANCE_DAYS=60 # Date tolerance for automatic article matching
59 |
60 | # Semantic Scholar API
61 | SEMANTIC_SCHOLAR_API_KEY=
62 | # Optional: Retry configuration for rate limits (defaults shown)
63 | # SEMANTIC_SCHOLAR_MAX_RETRIES=5 # Number of retries for 429 errors
64 | # SEMANTIC_SCHOLAR_RETRY_BACKOFF_BASE=2 # Exponential backoff base
65 | # SEMANTIC_SCHOLAR_THROTTLE=1 # Seconds between requests
66 |
67 | # OpenAlex (if using instead of Semantic Scholar)
68 | OPENALEX_EMAIL=your-email@domain.com
69 | # Optional: Retry configuration for rate limits (defaults shown)
70 | # OPENALEX_MAX_RETRIES=5 # Number of retries for 429 errors
71 | # OPENALEX_RETRY_BACKOFF_BASE=2 # Exponential backoff base
72 | # OPENALEX_THROTTLE=0.1 # Seconds between requests
73 |
74 | # ============================================
75 | # Email/SMTP Configuration
76 | # ============================================
77 | # SMTP provider settings for email notifications
78 | # Option 1: Use a predefined provider (gmail, outlook)
79 | SMTP_PROVIDER=gmail # Options: gmail, outlook, custom
80 | # For Gmail/Outlook, only username and password are needed:
81 | SMTP_USERNAME=your-email@gmail.com
82 | SMTP_PASSWORD= # Use app-specific password, not regular password
83 |
84 | # Option 2: Custom SMTP configuration (when SMTP_PROVIDER=custom)
85 | SMTP_HOST=smtp.example.com
86 | SMTP_PORT=587
87 | SMTP_ENCRYPTION=tls # Options: tls, ssl, none
88 | SMTP_TIMEOUT=30
89 |
90 | # Email notification settings
91 | EMAIL_FROM=papersorter@example.com # Sender address for notifications
92 | EMAIL_FROM_NAME=PaperSorter Newsletter # Sender display name
93 | EMAIL_SUBJECT_TEMPLATE=Research Papers Digest - {date:%Y-%m-%d} # Subject line template
94 |
95 | # ============================================
96 | # Port Configuration (optional)
97 | # ============================================
98 | HTTP_PORT=80
99 | HTTPS_PORT=443
100 |
101 | # ============================================
102 | # Resource Limits (optional, for production)
103 | # ============================================
104 | # Uncomment and adjust for production deployments
105 | # WEB_MEMORY_LIMIT=4G
106 | # WEB_CPU_LIMIT=2
107 | # DB_MEMORY_LIMIT=2G
108 | # DB_CPU_LIMIT=2
--------------------------------------------------------------------------------
/docs/api/modules.rst:
--------------------------------------------------------------------------------
1 | API Modules Reference
2 | =====================
3 |
4 | This section contains the auto-generated API documentation for PaperSorter modules.
5 |
6 | .. contents:: Module Overview
7 | :local:
8 | :depth: 2
9 |
10 | Core Modules
11 | ------------
12 |
13 | PaperSorter.feed_database
14 | ~~~~~~~~~~~~~~~~~~~~~~~~~
15 |
16 | .. automodule:: PaperSorter.feed_database
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 | :special-members: __init__
21 |
22 | PaperSorter.embedding_database
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
24 |
25 | .. automodule:: PaperSorter.embedding_database
26 | :members:
27 | :undoc-members:
28 | :show-inheritance:
29 | :special-members: __init__
30 |
31 | Provider Modules
32 | ----------------
33 |
34 | PaperSorter.providers.base
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
36 |
37 | .. automodule:: PaperSorter.providers.base
38 | :members:
39 | :undoc-members:
40 | :show-inheritance:
41 | :special-members: __init__
42 |
43 | PaperSorter.providers.rss
44 | ~~~~~~~~~~~~~~~~~~~~~~~~~
45 |
46 | .. automodule:: PaperSorter.providers.rss
47 | :members:
48 | :undoc-members:
49 | :show-inheritance:
50 |
51 | Task Modules
52 | ------------
53 |
54 | PaperSorter.tasks.update
55 | ~~~~~~~~~~~~~~~~~~~~~~~~
56 |
57 | .. automodule:: PaperSorter.tasks.update
58 | :members:
59 | :undoc-members:
60 | :show-inheritance:
61 |
62 | PaperSorter.tasks.train
63 | ~~~~~~~~~~~~~~~~~~~~~~~
64 |
65 | .. automodule:: PaperSorter.tasks.train
66 | :members:
67 | :undoc-members:
68 | :show-inheritance:
69 |
70 | PaperSorter.tasks.broadcast
71 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
72 |
73 | .. automodule:: PaperSorter.tasks.broadcast
74 | :members:
75 | :undoc-members:
76 | :show-inheritance:
77 |
78 | PaperSorter.tasks.serve
79 | ~~~~~~~~~~~~~~~~~~~~~~~
80 |
81 | .. automodule:: PaperSorter.tasks.serve
82 | :members:
83 | :undoc-members:
84 | :show-inheritance:
85 |
86 | Web Modules
87 | -----------
88 |
89 | PaperSorter.web.app
90 | ~~~~~~~~~~~~~~~~~~~
91 |
92 | .. automodule:: PaperSorter.web.app
93 | :members:
94 | :undoc-members:
95 | :show-inheritance:
96 |
97 | PaperSorter.web.main
98 | ~~~~~~~~~~~~~~~~~~~~
99 |
100 | .. automodule:: PaperSorter.web.main
101 | :members:
102 | :undoc-members:
103 | :show-inheritance:
104 |
105 | PaperSorter.web.auth.models
106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
107 |
108 | .. automodule:: PaperSorter.web.auth.models
109 | :members:
110 | :undoc-members:
111 | :show-inheritance:
112 |
113 | PaperSorter.web.auth.routes
114 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
115 |
116 | .. automodule:: PaperSorter.web.auth.routes
117 | :members:
118 | :undoc-members:
119 | :show-inheritance:
120 |
121 | PaperSorter.web.auth.decorators
122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
123 |
124 | .. automodule:: PaperSorter.web.auth.decorators
125 | :members:
126 | :undoc-members:
127 | :show-inheritance:
128 |
129 | API Endpoints
130 | -------------
131 |
132 | PaperSorter.web.api.feeds
133 | ~~~~~~~~~~~~~~~~~~~~~~~~~
134 |
135 | .. automodule:: PaperSorter.web.api.feeds
136 | :members:
137 | :undoc-members:
138 | :show-inheritance:
139 |
140 | PaperSorter.web.api.search
141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
142 |
143 | .. automodule:: PaperSorter.web.api.search
144 | :members:
145 | :undoc-members:
146 | :show-inheritance:
147 |
148 | PaperSorter.web.api.settings
149 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150 |
151 | .. automodule:: PaperSorter.web.api.settings
152 | :members:
153 | :undoc-members:
154 | :show-inheritance:
155 |
156 | PaperSorter.web.api.user
157 | ~~~~~~~~~~~~~~~~~~~~~~~~
158 |
159 | .. automodule:: PaperSorter.web.api.user
160 | :members:
161 | :undoc-members:
162 | :show-inheritance:
163 |
164 | Utility Modules
165 | ---------------
166 |
167 | PaperSorter.utils.email
168 | ~~~~~~~~~~~~~~~~~~~~~~~
169 |
170 | .. automodule:: PaperSorter.utils.email
171 | :members:
172 | :undoc-members:
173 | :show-inheritance:
174 |
175 | PaperSorter.web.utils.database
176 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177 |
178 | .. automodule:: PaperSorter.web.utils.database
179 | :members:
180 | :undoc-members:
181 | :show-inheritance:
182 |
183 | Model Classes
184 | -------------
185 |
186 | PaperSorter.web.models.semantic_scholar
187 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
188 |
189 | .. automodule:: PaperSorter.web.models.semantic_scholar
190 | :members:
191 | :undoc-members:
192 | :show-inheritance:
193 |
194 | Background Jobs
195 | ---------------
196 |
197 | PaperSorter.web.jobs.poster
198 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
199 |
200 | .. automodule:: PaperSorter.web.jobs.poster
201 | :members:
202 | :undoc-members:
203 | :show-inheritance:
--------------------------------------------------------------------------------
/PaperSorter/static/css/main.css:
--------------------------------------------------------------------------------
1 | /* PaperSorter Main CSS - Import all stylesheets */
2 |
3 | /*
4 | * CSS Architecture:
5 | * 1. Variables - Design tokens and CSS custom properties
6 | * 2. Base - Reset, typography, and fundamental styles
7 | * 3. Components - Reusable UI components
8 | * 4. Layout - Application-specific layouts
9 | */
10 |
11 | /* Import CSS Variables (must be first) */
12 | @import url('variables.css');
13 |
14 | /* Import Base Styles */
15 | @import url('base.css');
16 |
17 | /* Import Component Styles */
18 | @import url('components.css');
19 |
20 | /* Import Layout Styles */
21 | @import url('layout.css');
22 |
23 | /* =================================== */
24 | /* Additional Global Overrides */
25 | /* =================================== */
26 |
27 | /* Ensure smooth scrolling */
28 | html {
29 | scroll-behavior: smooth;
30 | }
31 |
32 | /* Focus visible only for keyboard navigation */
33 | *:focus:not(:focus-visible) {
34 | outline: none;
35 | }
36 |
37 | /* Better text rendering */
38 | body {
39 | text-rendering: optimizeLegibility;
40 | }
41 |
42 | /* Prevent text selection on UI elements */
43 | button,
44 | .btn,
45 | .nav-link,
46 | .badge,
47 | .tag {
48 | user-select: none;
49 | }
50 |
51 | /* Ensure images are responsive by default */
52 | img {
53 | max-width: 100%;
54 | height: auto;
55 | display: block;
56 | }
57 |
58 | /* =================================== */
59 | /* Print Styles */
60 | /* =================================== */
61 |
62 | @media print {
63 | /* Hide navigation and action elements */
64 | .header,
65 | .nav-links,
66 | .action-bar,
67 | .feed-actions,
68 | .hamburger-menu,
69 | .modal,
70 | .btn,
71 | .pagination {
72 | display: none !important;
73 | }
74 |
75 | /* Reset backgrounds for print */
76 | body {
77 | background: white;
78 | color: black;
79 | padding: 0;
80 | }
81 |
82 | .card,
83 | .feed-item {
84 | box-shadow: none;
85 | border: 1px solid #ddd;
86 | page-break-inside: avoid;
87 | }
88 |
89 | /* Ensure links are visible */
90 | a {
91 | color: black;
92 | text-decoration: underline;
93 | }
94 |
95 | a[href]:after {
96 | content: " (" attr(href) ")";
97 | font-size: 0.8em;
98 | }
99 | }
100 |
101 | /* =================================== */
102 | /* Accessibility Improvements */
103 | /* =================================== */
104 |
105 | /* Skip to main content link */
106 | .skip-to-main {
107 | position: absolute;
108 | top: -40px;
109 | left: 0;
110 | background: var(--color-primary);
111 | color: var(--text-white);
112 | padding: var(--spacing-sm) var(--spacing-base);
113 | text-decoration: none;
114 | z-index: var(--z-index-tooltip);
115 | border-radius: var(--radius-base);
116 | }
117 |
118 | .skip-to-main:focus {
119 | top: var(--spacing-sm);
120 | }
121 |
122 | /* Screen reader only text */
123 | .sr-only {
124 | position: absolute;
125 | width: 1px;
126 | height: 1px;
127 | padding: 0;
128 | margin: -1px;
129 | overflow: hidden;
130 | clip: rect(0, 0, 0, 0);
131 | white-space: nowrap;
132 | border: 0;
133 | }
134 |
135 | /* High contrast mode support */
136 | @media (prefers-contrast: high) {
137 | .card,
138 | .feed-item,
139 | .btn,
140 | .form-control {
141 | border: 2px solid;
142 | }
143 | }
144 |
145 | /* Reduced motion support */
146 | @media (prefers-reduced-motion: reduce) {
147 | *,
148 | *::before,
149 | *::after {
150 | animation-duration: 0.01ms !important;
151 | animation-iteration-count: 1 !important;
152 | transition-duration: 0.01ms !important;
153 | scroll-behavior: auto !important;
154 | }
155 | }
156 |
157 | /* =================================== */
158 | /* Dark Mode Preparation */
159 | /* =================================== */
160 |
161 | /*
162 | * Dark mode styles are prepared but not active.
163 | * They will be activated when data-theme="dark" is set on body element.
164 | * This structure allows for easy dark mode implementation in the future.
165 | */
166 |
167 | [data-theme="dark"] {
168 | /* Dark mode overrides will be added here */
169 | }
170 |
171 | /* =================================== */
172 | /* Browser-Specific Fixes */
173 | /* =================================== */
174 |
175 | /* Firefox */
176 | @-moz-document url-prefix() {
177 | select.form-control {
178 | text-indent: 0.01px;
179 | text-overflow: '';
180 | }
181 | }
182 |
183 | /* Edge and IE */
184 | @supports (-ms-ime-align: auto) {
185 | select.form-control {
186 | padding-right: var(--spacing-xl);
187 | }
188 | }
189 |
190 | /* Safari */
191 | @supports (-webkit-appearance: none) {
192 | input[type="search"]::-webkit-search-decoration,
193 | input[type="search"]::-webkit-search-cancel-button,
194 | input[type="search"]::-webkit-search-results-button,
195 | input[type="search"]::-webkit-search-results-decoration {
196 | -webkit-appearance: none;
197 | }
198 | }
--------------------------------------------------------------------------------
/PaperSorter/cli/base.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Copyright (c) 2024-2025 Seoul National University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | # THE SOFTWARE.
22 | #
23 |
24 | """Base command class and registry for PaperSorter CLI."""
25 |
26 | import argparse
27 | from abc import ABC, abstractmethod
28 | from typing import Dict, Type, Optional, Any
29 |
30 |
31 | class BaseCommand(ABC):
32 | """Base class for all CLI commands."""
33 |
34 | name: str = None
35 | help: str = None
36 |
37 | @abstractmethod
38 | def add_arguments(self, parser: argparse.ArgumentParser) -> None:
39 | """Add command-specific arguments to the parser."""
40 | pass
41 |
42 | @abstractmethod
43 | def handle(self, args: argparse.Namespace, context: Any) -> int:
44 | """
45 | Execute the command.
46 |
47 | Args:
48 | args: Parsed command-line arguments
49 | context: Command context with config and utilities
50 |
51 | Returns:
52 | Exit code (0 for success)
53 | """
54 | pass
55 |
56 | def add_common_arguments(self, parser: argparse.ArgumentParser) -> None:
57 | """Add common arguments shared by all commands."""
58 | parser.add_argument(
59 | '--config', '-c',
60 | default='./config.yml',
61 | help='Database configuration file'
62 | )
63 | parser.add_argument(
64 | '--log-file',
65 | help='Log file path'
66 | )
67 | parser.add_argument(
68 | '-q', '--quiet',
69 | action='store_true',
70 | help='Suppress log output'
71 | )
72 |
73 |
74 | class CommandRegistry:
75 | """Registry for managing CLI commands."""
76 |
77 | def __init__(self):
78 | self._commands: Dict[str, Type[BaseCommand]] = {}
79 | self._instances: Dict[str, BaseCommand] = {}
80 |
81 | def register(self, command_class: Type[BaseCommand]) -> None:
82 | """Register a command class."""
83 | if not command_class.name:
84 | raise ValueError(f"Command {command_class.__name__} must have a name")
85 | self._commands[command_class.name] = command_class
86 |
87 | def get_command(self, name: str) -> Optional[BaseCommand]:
88 | """Get a command instance by name."""
89 | if name not in self._instances and name in self._commands:
90 | command_class = self._commands[name]
91 | # Check if it's already an instance
92 | if isinstance(command_class, BaseCommand):
93 | self._instances[name] = command_class
94 | else:
95 | self._instances[name] = command_class()
96 | return self._instances.get(name)
97 |
98 | def create_subparsers(self, parser: argparse.ArgumentParser) -> None:
99 | """Create subparsers for all registered commands."""
100 | subparsers = parser.add_subparsers(
101 | dest='command',
102 | help='Available commands',
103 | metavar=''
104 | )
105 |
106 | for name, command_class in sorted(self._commands.items()):
107 | command = self.get_command(name)
108 |
109 | # Replace underscores with hyphens in command names for CLI
110 | cli_name = name.replace('_', '-')
111 |
112 | subparser = subparsers.add_parser(
113 | cli_name,
114 | help=command.help,
115 | formatter_class=argparse.RawDescriptionHelpFormatter
116 | )
117 |
118 | # Add common arguments
119 | command.add_common_arguments(subparser)
120 |
121 | # Add command-specific arguments
122 | command.add_arguments(subparser)
123 |
124 | # Store the command instance for later execution
125 | subparser.set_defaults(command_handler=command)
126 |
127 | def list_commands(self) -> list:
128 | """Return a list of registered command names."""
129 | return sorted(self._commands.keys())
130 |
131 |
132 | # Global registry instance
133 | registry = CommandRegistry()
134 |
--------------------------------------------------------------------------------