├── PaperSorter ├── __version__.py ├── data │ └── __init__.py ├── __init__.py ├── static │ ├── favicon.ico │ ├── icons │ │ ├── favicon-16x16.png │ │ ├── favicon-32x32.png │ │ ├── favicon-96x96.png │ │ ├── android-icon-48x48.png │ │ ├── android-icon-72x72.png │ │ ├── android-icon-96x96.png │ │ ├── apple-icon-180x180.png │ │ ├── android-icon-144x144.png │ │ └── android-icon-192x192.png │ ├── css │ │ ├── pages │ │ │ ├── feedback.css │ │ │ ├── feed_struct.css │ │ │ └── paper_detail_similar.css │ │ └── main.css │ └── manifest.json ├── providers │ ├── __init__.py │ ├── openai_client.py │ ├── base.py │ └── factory.py ├── tasks │ ├── __init__.py │ └── serve.py ├── services │ ├── __init__.py │ └── summarization.py ├── db │ └── __init__.py ├── templates │ ├── settings_base.html │ ├── error.html │ ├── email │ │ ├── paper_card.html │ │ └── newsletter.txt │ ├── 403.html │ ├── partials │ │ └── similar_section.html │ ├── feedback_error.html │ ├── feedback_success.html │ └── settings.html ├── web │ ├── __init__.py │ ├── jobs │ │ └── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── scholarly_article.py │ │ └── semantic_scholar.py │ ├── auth │ │ ├── __init__.py │ │ ├── decorators.py │ │ └── models.py │ ├── api │ │ └── __init__.py │ ├── utils │ │ └── __init__.py │ └── wsgi.py ├── cli │ ├── __init__.py │ ├── types.py │ ├── context.py │ ├── parser.py │ └── base.py ├── notification │ ├── __init__.py │ ├── base.py │ └── factory.py ├── __main__.py ├── log.py ├── utils │ └── template_filters.py └── config.py ├── docker ├── scripts │ ├── wsgi.py │ ├── entrypoint.sh │ └── scheduler-entrypoint.sh ├── postgres │ └── init.sql ├── cron │ └── crontab ├── caddy │ ├── Caddyfile │ └── Caddyfile.prod └── config.docker.yml ├── requirements.txt ├── MANIFEST.in ├── CHANGELOG.md ├── docs ├── requirements.txt ├── getting-started │ └── index.rst ├── Makefile ├── user-guide │ └── index.rst ├── admin-guide │ └── index.rst ├── cli-reference │ └── index.rst ├── tutorials │ └── index.rst ├── api │ ├── index.rst │ └── modules.rst ├── changelog.md ├── README.md ├── reference │ └── index.rst ├── conf.py ├── development │ ├── database.rst │ └── index.rst └── index.rst ├── migrations └── add-predicted-preferences-score-index.sql ├── examples ├── crontab.example ├── papersorter.service ├── cron-broadcast.sh ├── cron-update.sh ├── README.md └── cron-combined.sh ├── .dockerignore ├── LICENSE.txt ├── setup.py ├── Dockerfile.scheduler ├── tests └── db │ └── test_manager.py ├── .gitignore ├── Dockerfile ├── .github └── workflows │ ├── docs.yml │ ├── claude.yml │ └── claude-code-review.yml ├── docker-compose.prod.yml ├── AGENTS.md ├── papersorter-cli ├── pyproject.toml └── .env.example /PaperSorter/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.9.0' 2 | -------------------------------------------------------------------------------- /PaperSorter/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Database schema data module 2 | -------------------------------------------------------------------------------- /PaperSorter/__init__.py: -------------------------------------------------------------------------------- 1 | from .__version__ import __version__ 2 | 3 | __all__ = ['__version__'] 4 | -------------------------------------------------------------------------------- /PaperSorter/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/favicon.ico -------------------------------------------------------------------------------- /PaperSorter/static/icons/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-16x16.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-32x32.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/favicon-96x96.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/android-icon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-48x48.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/android-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-72x72.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/android-icon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-96x96.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/apple-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/apple-icon-180x180.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/android-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-144x144.png -------------------------------------------------------------------------------- /PaperSorter/static/icons/android-icon-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChangLabSNU/PaperSorter/HEAD/PaperSorter/static/icons/android-icon-192x192.png -------------------------------------------------------------------------------- /PaperSorter/providers/__init__.py: -------------------------------------------------------------------------------- 1 | """Feed providers for PaperSorter.""" 2 | 3 | from .base import FeedProvider, FeedItem 4 | from .rss import RSSProvider 5 | 6 | __all__ = ["FeedProvider", "FeedItem", "RSSProvider"] 7 | 8 | -------------------------------------------------------------------------------- /docker/scripts/wsgi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """WSGI entry point for gunicorn with Docker.""" 3 | from PaperSorter.web.app import create_app 4 | 5 | # Create the application with the config path 6 | app = create_app("/app/config.yml") -------------------------------------------------------------------------------- /PaperSorter/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "init", 3 | "update", 4 | "train", 5 | "predict", 6 | "broadcast", 7 | "serve", 8 | "test", 9 | "import", 10 | "labeling", 11 | "models", 12 | "embeddings", 13 | ] 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | feedparser>=6.0 2 | numpy>=1.20 3 | openai>=1.30 4 | pandas>=2.0 5 | psycopg2-binary>=2.9 6 | pgvector>=0.2.0 7 | PyYAML>=6.0 8 | requests>=2.7.0 9 | scikit-learn>=1.4 10 | scipy>=1.10 11 | xgboost>2.0 12 | Flask>=2.0 13 | Flask-Login>=0.6.0 14 | Authlib>=1.2.0 15 | markdown2>=2.4.0 16 | -------------------------------------------------------------------------------- /PaperSorter/services/__init__.py: -------------------------------------------------------------------------------- 1 | """Service-layer helpers for PaperSorter.""" 2 | 3 | from .feed_prediction import ( # noqa: F401 4 | FeedPredictionService, 5 | FeedPredictor, 6 | refresh_embeddings_and_predictions, 7 | ) 8 | 9 | __all__ = [ 10 | "FeedPredictionService", 11 | "FeedPredictor", 12 | "refresh_embeddings_and_predictions", 13 | ] 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include requirements.txt 4 | include pyproject.toml 5 | recursive-include PaperSorter/templates *.html *.txt 6 | recursive-include PaperSorter/static *.css *.js *.ico *.json *.png .gitkeep 7 | recursive-include PaperSorter/data *.py 8 | recursive-include examples * 9 | global-exclude __pycache__ 10 | global-exclude *.py[co] 11 | global-exclude .DS_Store 12 | prune tests 13 | prune notebook 14 | prune tools 15 | prune old 16 | prune qbio 17 | prune qtest 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Release 0.2 - 2024-06-05 2 | 3 | - Implement `init` command for database setup and bulk load. (ec34a60) 4 | - Improved Excel feedback formatting for better usability. (97f382c) 5 | - Fixed error when sending notifications for articles with long titles. (f5a7b50) 6 | - Cleaned up outputs for clearer log files. (c5d5de1) 7 | - Disabled URL unfurling in Slack messages by default. (f5a7b50) 8 | - Enabled custom model names in Slack messages. (82c4e28) 9 | - Removed a divider from Slack messages. (d56b1de) 10 | 11 | ## Release 0.1 - 2024-06-01 12 | 13 | - Initial release. 14 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # Documentation dependencies for PaperSorter 2 | # Core Sphinx 3 | sphinx>=7.0.0 4 | sphinx-rtd-theme>=2.0.0 5 | 6 | # Markdown support 7 | myst-parser>=2.0.0 8 | linkify-it-py>=2.0.0 # Required for myst-parser linkify extension 9 | 10 | # Extensions for better documentation 11 | sphinx-autodoc-typehints>=1.25.0 12 | sphinx-click>=5.0.0 13 | sphinx-copybutton>=0.5.0 14 | sphinx-tabs>=3.4.0 15 | 16 | # Development tools 17 | sphinx-autobuild>=2021.3.14 18 | doc8>=1.1.0 19 | 20 | # For API documentation 21 | autodoc>=0.5.0 22 | 23 | # For better code highlighting 24 | pygments>=2.17.0 -------------------------------------------------------------------------------- /PaperSorter/static/css/pages/feedback.css: -------------------------------------------------------------------------------- 1 | /* Shared feedback page button styles */ 2 | 3 | .btn { 4 | padding: 12px 24px; 5 | border: none; 6 | border-radius: 6px; 7 | font-size: 16px; 8 | font-weight: bold; 9 | cursor: pointer; 10 | text-decoration: none; 11 | display: inline-block; 12 | transition: all var(--transition-base); 13 | } 14 | 15 | .btn-primary { 16 | background: var(--color-primary); 17 | color: var(--text-white); 18 | } 19 | 20 | .btn-primary:hover { 21 | background: var(--btn-primary-hover); 22 | transform: translateY(-1px); 23 | box-shadow: 0 2px 5px var(--shadow-xl); 24 | } 25 | 26 | -------------------------------------------------------------------------------- /docker/scripts/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Web container entrypoint script 3 | 4 | set -e 5 | 6 | echo "Starting PaperSorter web service..." 7 | echo "Data directory: $PAPERSORTER_DATADIR" 8 | echo "Config file: $PAPERSORTER_CONFIG" 9 | 10 | # Wait for database to be ready 11 | echo "Waiting for database..." 12 | until pg_isready -h postgres -U ${POSTGRES_USER:-papersorter}; do 13 | echo "Database is unavailable - sleeping" 14 | sleep 2 15 | done 16 | echo "Database is ready!" 17 | 18 | # Create data directories if they don't exist 19 | mkdir -p /data/logs /data/models /data/posters 20 | 21 | # Execute the command 22 | exec "$@" -------------------------------------------------------------------------------- /PaperSorter/db/__init__.py: -------------------------------------------------------------------------------- 1 | """Database access helpers for PaperSorter.""" 2 | 3 | from .manager import ( 4 | Connection, 5 | Cursor, 6 | DatabaseManager, 7 | DatabaseSession, 8 | OperationalError, 9 | PooledConnection, 10 | PoolConfig, 11 | RealDictCursor, 12 | execute_batch, 13 | errors, 14 | sql, 15 | ) 16 | 17 | __all__ = [ 18 | "Connection", 19 | "Cursor", 20 | "DatabaseManager", 21 | "DatabaseSession", 22 | "OperationalError", 23 | "PooledConnection", 24 | "PoolConfig", 25 | "RealDictCursor", 26 | "execute_batch", 27 | "errors", 28 | "sql", 29 | ] 30 | -------------------------------------------------------------------------------- /migrations/add-predicted-preferences-score-index.sql: -------------------------------------------------------------------------------- 1 | -- Migration: add covering index for predicted_preferences by model/score/feed 2 | -- This speeds up feed listing when filtering by prediction score. 3 | -- 4 | -- How to apply (cannot run inside a transaction because of CONCURRENTLY): 5 | -- psql -d your_database -f migrations/add-predicted-preferences-score-index.sql 6 | -- 7 | -- Recommended: run during low traffic; CREATE INDEX CONCURRENTLY takes a bit longer 8 | -- but avoids long writes locks on the table. 9 | 10 | SET search_path TO papersorter; 11 | 12 | CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_predpref_model_score_feed 13 | ON papersorter.predicted_preferences (model_id, score DESC, feed_id); 14 | -------------------------------------------------------------------------------- /docker/scripts/scheduler-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Scheduler entrypoint script 3 | 4 | set -e 5 | 6 | echo "Starting PaperSorter scheduler..." 7 | echo "Data directory: $PAPERSORTER_DATADIR" 8 | echo "Config file: $PAPERSORTER_CONFIG" 9 | 10 | # Wait for database to be ready 11 | echo "Waiting for database..." 12 | until pg_isready -h postgres -U ${POSTGRES_USER:-papersorter}; do 13 | echo "Database is unavailable - sleeping" 14 | sleep 2 15 | done 16 | echo "Database is ready!" 17 | 18 | # Create log directory if it doesn't exist 19 | mkdir -p /data/logs 20 | 21 | # Ensure cron environment has access to environment variables 22 | printenv | grep -E '^(DATABASE_URL|PAPERSORTER_|OPENAI_|EMBEDDING_|PATH)' > /etc/environment 23 | 24 | # Start cron in foreground 25 | echo "Starting cron daemon..." 26 | cron -f -------------------------------------------------------------------------------- /PaperSorter/templates/settings_base.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block header_actions %} 4 | 8 | {{ super() }} 9 | {% endblock %} 10 | 11 | {% block styles %} 12 | 13 | 17 | {% endblock %} 18 | 19 | {% block content %} 20 |
21 | {% block settings_content %}{% endblock %} 22 |
23 | 24 | {% block modals %}{% endblock %} 25 | {% endblock %} 26 | 27 | {% block scripts %} 28 | {% block page_scripts %}{% endblock %} 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /PaperSorter/static/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PaperSorter", 3 | "short_name": "PaperSorter", 4 | "description": "Academic paper recommendation system", 5 | "start_url": "/", 6 | "display": "standalone", 7 | "background_color": "#ffffff", 8 | "theme_color": "#1976d2", 9 | "icons": [ 10 | { 11 | "src": "/static/icons/android-icon-48x48.png", 12 | "sizes": "48x48", 13 | "type": "image/png" 14 | }, 15 | { 16 | "src": "/static/icons/android-icon-72x72.png", 17 | "sizes": "72x72", 18 | "type": "image/png" 19 | }, 20 | { 21 | "src": "/static/icons/android-icon-96x96.png", 22 | "sizes": "96x96", 23 | "type": "image/png" 24 | }, 25 | { 26 | "src": "/static/icons/android-icon-144x144.png", 27 | "sizes": "144x144", 28 | "type": "image/png" 29 | }, 30 | { 31 | "src": "/static/icons/android-icon-192x192.png", 32 | "sizes": "192x192", 33 | "type": "image/png" 34 | } 35 | ] 36 | } -------------------------------------------------------------------------------- /examples/crontab.example: -------------------------------------------------------------------------------- 1 | # Example crontab entries for PaperSorter 2 | # Add these lines to your crontab with: crontab -e 3 | # Adjust paths and schedules according to your needs 4 | 5 | # Option 1: Recommended - Separate update and broadcast tasks 6 | # Update every 3 hours 7 | 0 */3 * * * /path/to/papersorter/examples/cron-update.sh 8 | 9 | # Broadcast every hour (channels have individual hour restrictions configured in web interface) 10 | 0 * * * * /path/to/papersorter/examples/cron-broadcast.sh 11 | 12 | # Option 2: Combined task (update + broadcast) 13 | # Run every 3 hours 14 | 0 */3 * * * /path/to/papersorter/examples/cron-combined.sh 15 | 16 | # Option 3: More frequent updates 17 | # Update every hour 18 | 0 * * * * /path/to/papersorter/examples/cron-update.sh 19 | 20 | # Broadcast every hour (respects per-channel broadcast hours) 21 | 0 * * * * /path/to/papersorter/examples/cron-broadcast.sh 22 | 23 | # Note: Make sure the scripts are executable: 24 | # chmod +x /path/to/papersorter/examples/*.sh -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git files 2 | .git 3 | .gitignore 4 | 5 | # Python cache 6 | __pycache__ 7 | *.pyc 8 | *.pyo 9 | *.pyd 10 | .Python 11 | *.so 12 | *.egg 13 | *.egg-info 14 | dist 15 | build 16 | 17 | # Virtual environments 18 | venv/ 19 | env/ 20 | ENV/ 21 | 22 | # IDE files 23 | .vscode/ 24 | .idea/ 25 | *.swp 26 | *.swo 27 | *~ 28 | .DS_Store 29 | 30 | # Documentation build 31 | docs/_build/ 32 | docs/build/ 33 | 34 | # Test and coverage 35 | .coverage 36 | .pytest_cache/ 37 | .tox/ 38 | htmlcov/ 39 | 40 | # Local data files 41 | *.db 42 | *.pkl 43 | *.npz 44 | *.log 45 | *.sqlite 46 | *.sqlite3 47 | 48 | # Docker files (don't copy these into the image) 49 | Dockerfile* 50 | docker-compose*.yml 51 | .dockerignore 52 | 53 | # Environment files 54 | .env 55 | .env.* 56 | 57 | # Local development directories 58 | qbio/ 59 | pubmedsync/ 60 | notebook/ 61 | tmp/ 62 | error.png 63 | 64 | # Backup files 65 | *.bak 66 | *.backup 67 | *~ 68 | 69 | # Large data directories 70 | embeddings.db/ 71 | models/ 72 | logs/ 73 | posters/ 74 | 75 | # Build artifacts 76 | build/ 77 | dist/ 78 | *.egg-info/ -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024-2025 Seoul National University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker/postgres/init.sql: -------------------------------------------------------------------------------- 1 | -- PostgreSQL initialization script for PaperSorter 2 | -- This script runs when the database container is first created 3 | 4 | -- Create pgvector extension if not exists 5 | CREATE EXTENSION IF NOT EXISTS vector; 6 | 7 | -- Create additional extensions that might be useful 8 | CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; 9 | CREATE EXTENSION IF NOT EXISTS "pg_trgm"; -- For text search 10 | 11 | -- Set default configuration for better performance 12 | ALTER SYSTEM SET shared_buffers = '256MB'; 13 | ALTER SYSTEM SET effective_cache_size = '1GB'; 14 | ALTER SYSTEM SET maintenance_work_mem = '64MB'; 15 | ALTER SYSTEM SET checkpoint_completion_target = '0.9'; 16 | ALTER SYSTEM SET wal_buffers = '16MB'; 17 | ALTER SYSTEM SET default_statistics_target = '100'; 18 | ALTER SYSTEM SET random_page_cost = '1.1'; 19 | ALTER SYSTEM SET effective_io_concurrency = '200'; 20 | ALTER SYSTEM SET work_mem = '4MB'; 21 | ALTER SYSTEM SET min_wal_size = '1GB'; 22 | ALTER SYSTEM SET max_wal_size = '4GB'; 23 | 24 | -- Create indexes for better performance (will be created after tables are initialized) 25 | -- Note: The actual schema will be created by 'papersorter init' command -------------------------------------------------------------------------------- /docker/cron/crontab: -------------------------------------------------------------------------------- 1 | # PaperSorter Cron Schedule 2 | # This file is installed in the scheduler container 3 | 4 | # Environment setup 5 | SHELL=/bin/bash 6 | PATH=/usr/local/bin:/usr/bin:/bin 7 | 8 | # Load environment variables 9 | # Note: The scheduler-entrypoint.sh script writes env vars to /etc/environment 10 | 11 | # Update task - Fetch new papers and generate embeddings 12 | # Run every 3 hours 13 | 0 */3 * * * . /etc/environment && cd /app && papersorter update --config /app/config.yml --log-file /data/logs/cron-update.log --quiet >> /data/logs/cron.log 2>&1 14 | 15 | # Broadcast task - Send notifications for high-scoring papers 16 | # Run every hour (respects per-channel broadcast hours configured in web interface) 17 | 0 * * * * . /etc/environment && cd /app && papersorter broadcast --config /app/config.yml --log-file /data/logs/cron-broadcast.log --quiet >> /data/logs/cron.log 2>&1 18 | 19 | # Cleanup old logs - Remove logs older than 30 days 20 | # Run daily at 3 AM 21 | 0 3 * * * find /data/logs -name "*.log*" -mtime +30 -delete >> /data/logs/cron.log 2>&1 22 | 23 | # Health check - Write timestamp to verify cron is running 24 | */5 * * * * echo "Cron health check: $(date)" > /data/logs/cron-health.txt 25 | -------------------------------------------------------------------------------- /examples/papersorter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Gunicorn instance to serve PaperSorter 3 | After=network.target 4 | 5 | [Service] 6 | # Run the service as a non-root user 7 | User=papersorter 8 | Group=papersorter 9 | 10 | # The root directory of your project 11 | WorkingDirectory=/home/papersorter/papersorter 12 | 13 | # Environment variables 14 | # Point to your configuration file 15 | Environment="PAPER_SORTER_CONFIG=/home/papersorter/papersorter/config.yml" 16 | # Optional: Skip authentication for development (specify a user email) 17 | #Environment="PAPER_SORTER_SKIP_AUTH=user@example.com" 18 | 19 | # The command to start the service 20 | # Adjust the path to your Python environment's gunicorn 21 | ExecStart=/home/papersorter/venv/bin/gunicorn \ 22 | --workers 2 \ 23 | --threads 4 \ 24 | --worker-class gthread \ 25 | --bind 0.0.0.0:8000 \ 26 | --timeout 120 \ 27 | --access-logfile /var/log/papersorter/access.log \ 28 | --error-logfile /var/log/papersorter/error.log \ 29 | PaperSorter.web.wsgi:app 30 | 31 | # Restart the service if it fails 32 | Restart=on-failure 33 | RestartSec=5 34 | 35 | # Send SIGTERM for clean shutdown 36 | KillSignal=SIGTERM 37 | 38 | [Install] 39 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /docs/getting-started/index.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting Started 3 | =============== 4 | 5 | Welcome to PaperSorter! This section will help you get up and running with your personal academic paper recommendation system. 6 | 7 | PaperSorter uses machine learning to automatically filter and rank research papers from RSS feeds, helping you stay current with the latest developments in your field without information overload. 8 | 9 | What You'll Learn 10 | ================= 11 | 12 | - How to install and configure PaperSorter 13 | - Setting up your first feed sources 14 | - Training your initial recommendation model 15 | - Understanding the basic workflow 16 | 17 | Prerequisites 18 | ============= 19 | 20 | - Python 3.9 or later 21 | - PostgreSQL database 22 | - OpenAI-compatible API access for embeddings 23 | - Basic familiarity with command-line tools 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | 28 | installation 29 | quickstart 30 | first-model 31 | 32 | Next Steps 33 | ========== 34 | 35 | Once you've completed the getting started guide, explore: 36 | 37 | - :doc:`../user-guide/index` for detailed usage instructions 38 | - :doc:`../admin-guide/index` for deployment and maintenance 39 | - :doc:`../tutorials/index` for specific integration examples -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | from setuptools import setup 25 | 26 | setup() -------------------------------------------------------------------------------- /PaperSorter/templates/error.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Error - {{ site_name }}{% endblock %} 4 | 5 | {% block main_container %} 6 |
7 |

Error

8 |

9 | {{ error|default("An error occurred") }} 10 |

11 | 14 |
15 | {% endblock %} 16 | 17 | {% block styles %} 18 | 38 | {% endblock %} 39 | -------------------------------------------------------------------------------- /PaperSorter/web/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Web interface package for PaperSorter.""" 25 | 26 | from .app import create_app 27 | 28 | __all__ = ["create_app"] 29 | -------------------------------------------------------------------------------- /PaperSorter/web/jobs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Background job processing for the web interface.""" 25 | 26 | from .poster import process_poster_job 27 | 28 | __all__ = ["process_poster_job"] 29 | -------------------------------------------------------------------------------- /PaperSorter/web/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Data models for the web interface.""" 25 | 26 | from .semantic_scholar import SemanticScholarItem 27 | 28 | __all__ = ["SemanticScholarItem"] 29 | -------------------------------------------------------------------------------- /docker/caddy/Caddyfile: -------------------------------------------------------------------------------- 1 | # Caddyfile for PaperSorter 2 | # Development/default configuration 3 | 4 | {$DOMAIN:localhost} { 5 | # Disable automatic HTTPS redirect for development 6 | # Reverse proxy to PaperSorter web service 7 | reverse_proxy web:5001 { 8 | header_up X-Real-IP {remote_host} 9 | header_up X-Forwarded-For {remote_host} 10 | header_up X-Forwarded-Proto {scheme} 11 | header_up X-Forwarded-Host {host} 12 | 13 | # Health check 14 | health_uri /health 15 | health_interval 30s 16 | health_timeout 5s 17 | } 18 | 19 | # Compression 20 | encode gzip 21 | 22 | # Security headers 23 | header { 24 | X-Content-Type-Options "nosniff" 25 | X-Frame-Options "SAMEORIGIN" 26 | X-XSS-Protection "1; mode=block" 27 | Referrer-Policy "strict-origin-when-cross-origin" 28 | -Server 29 | } 30 | 31 | # Logging 32 | log { 33 | output file /data/logs/caddy_access.log { 34 | roll_size 100mb 35 | roll_keep 5 36 | roll_keep_for 720h 37 | } 38 | } 39 | 40 | # Handle errors 41 | handle_errors { 42 | respond "{http.error.status_code} {http.error.status_text}" {http.error.status_code} 43 | } 44 | 45 | # Larger uploads for file imports 46 | request_body { 47 | max_size 100MB 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /PaperSorter/web/auth/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Authentication module for PaperSorter web interface.""" 25 | 26 | from .models import User 27 | from .decorators import admin_required 28 | from .routes import auth_bp 29 | 30 | __all__ = ["User", "admin_required", "auth_bp"] 31 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | 3 | # You can set these variables from the command line. 4 | SPHINXOPTS ?= 5 | SPHINXBUILD ?= sphinx-build 6 | SOURCEDIR = . 7 | BUILDDIR = _build 8 | 9 | # Put it first so that "make" without argument is like "make help". 10 | help: 11 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 12 | 13 | .PHONY: help Makefile 14 | 15 | # Custom targets 16 | .PHONY: clean 17 | clean: 18 | rm -rf $(BUILDDIR)/* 19 | @echo "Build directory cleaned." 20 | 21 | .PHONY: livehtml 22 | livehtml: 23 | sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html \ 24 | --watch ../PaperSorter \ 25 | --ignore "*.pyc" \ 26 | --ignore "*~" \ 27 | --ignore ".*" 28 | 29 | .PHONY: serve 30 | serve: html 31 | @cd $(BUILDDIR)/html && python -m http.server 8000 32 | 33 | .PHONY: github-pages 34 | github-pages: clean html 35 | @touch $(BUILDDIR)/html/.nojekyll 36 | @echo "Documentation ready for GitHub Pages in $(BUILDDIR)/html" 37 | 38 | .PHONY: check 39 | check: 40 | @$(SPHINXBUILD) -b linkcheck "$(SOURCEDIR)" "$(BUILDDIR)/linkcheck" $(SPHINXOPTS) $(O) 41 | @$(SPHINXBUILD) -b doctest "$(SOURCEDIR)" "$(BUILDDIR)/doctest" $(SPHINXOPTS) $(O) 42 | @echo "All checks passed!" 43 | 44 | # Catch-all target: route all unknown targets to Sphinx using the new 45 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 46 | %: Makefile 47 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 48 | -------------------------------------------------------------------------------- /PaperSorter/web/api/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """API blueprints for PaperSorter web interface.""" 25 | 26 | from .feeds import feeds_bp 27 | from .settings import settings_bp 28 | from .search import search_bp 29 | from .user import user_bp 30 | 31 | __all__ = ["feeds_bp", "settings_bp", "search_bp", "user_bp"] 32 | -------------------------------------------------------------------------------- /PaperSorter/cli/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Command-line interface infrastructure for PaperSorter.""" 25 | 26 | from .base import BaseCommand, CommandRegistry 27 | from .context import CommandContext 28 | from .parser import create_parser 29 | 30 | __all__ = ['BaseCommand', 'CommandRegistry', 'CommandContext', 'create_parser'] 31 | -------------------------------------------------------------------------------- /Dockerfile.scheduler: -------------------------------------------------------------------------------- 1 | # PaperSorter Scheduler Image (for cron jobs) 2 | FROM python:3.11-slim 3 | 4 | # Install system dependencies including cron 5 | RUN apt-get update && apt-get install -y \ 6 | gcc \ 7 | g++ \ 8 | postgresql-client \ 9 | cron \ 10 | curl \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Set working directory 14 | WORKDIR /app 15 | 16 | # Copy requirements first for better caching 17 | COPY requirements.txt pyproject.toml setup.py ./ 18 | COPY PaperSorter/__version__.py ./PaperSorter/ 19 | 20 | # Install Python dependencies 21 | RUN pip install --no-cache-dir -r requirements.txt 22 | 23 | # Copy the application code 24 | COPY . . 25 | 26 | # Install PaperSorter package 27 | RUN pip install --no-cache-dir -e . 28 | 29 | # Create non-root user (same UID as main container) 30 | RUN useradd -m -u 1000 papersorter && \ 31 | mkdir -p /data/logs /data/models /data/posters && \ 32 | chown -R papersorter:papersorter /app /data 33 | 34 | # Copy cron configuration 35 | COPY docker/cron/crontab /etc/cron.d/papersorter-cron 36 | 37 | # Set permissions for cron file 38 | RUN chmod 0644 /etc/cron.d/papersorter-cron && \ 39 | crontab -u papersorter /etc/cron.d/papersorter-cron 40 | 41 | # Copy entrypoint script 42 | COPY docker/scripts/scheduler-entrypoint.sh /entrypoint.sh 43 | RUN chmod +x /entrypoint.sh 44 | 45 | # Set environment variables 46 | ENV PAPERSORTER_DATADIR=/data \ 47 | PAPERSORTER_CONFIG=/app/config.yml \ 48 | PYTHONUNBUFFERED=1 49 | 50 | # Run cron in foreground 51 | CMD ["/entrypoint.sh"] -------------------------------------------------------------------------------- /PaperSorter/web/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Utility functions for the web interface.""" 25 | 26 | from .database import ( 27 | get_default_model_id, 28 | get_user_model_id, 29 | get_unlabeled_item, 30 | get_labeling_stats, 31 | ) 32 | 33 | __all__ = [ 34 | "get_default_model_id", 35 | "get_user_model_id", 36 | "get_unlabeled_item", 37 | "get_labeling_stats", 38 | ] 39 | -------------------------------------------------------------------------------- /tests/db/test_manager.py: -------------------------------------------------------------------------------- 1 | from PaperSorter.db import manager 2 | 3 | 4 | class DummyConnection: 5 | def __init__(self, *, closed=False, autocommit=True): 6 | self.closed = int(bool(closed)) 7 | self.autocommit = autocommit 8 | self.closed_calls = 0 9 | 10 | def close(self): 11 | self.closed = 1 12 | self.closed_calls += 1 13 | 14 | 15 | class FakeThreadedPool: 16 | initial_connections = [] 17 | 18 | def __init__(self, minconn, maxconn, **kwargs): 19 | self._queue = list(self.initial_connections) 20 | self.put_calls = [] 21 | 22 | def getconn(self): 23 | if not self._queue: 24 | raise RuntimeError("No connections left in fake pool") 25 | return self._queue.pop(0) 26 | 27 | def putconn(self, conn, close=False): 28 | self.put_calls.append((conn, bool(close))) 29 | if not close: 30 | self._queue.append(conn) 31 | 32 | def closeall(self): 33 | self._queue.clear() 34 | 35 | 36 | def test_acquire_discards_stale_connections(monkeypatch): 37 | closed_conn = DummyConnection(closed=True) 38 | healthy_conn = DummyConnection(autocommit=True) 39 | 40 | FakeThreadedPool.initial_connections = [closed_conn, healthy_conn] 41 | monkeypatch.setattr(manager, "ThreadedConnectionPool", FakeThreadedPool) 42 | 43 | db_manager = manager.DatabaseManager({}, register_pgvector=False) 44 | 45 | conn = db_manager._acquire() 46 | 47 | assert conn is healthy_conn 48 | assert healthy_conn.autocommit is False 49 | assert db_manager._pool.put_calls == [(closed_conn, True)] 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | qtest/ 24 | tmp/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | *.db 31 | *.pkl 32 | *.xlsx 33 | *eaDir 34 | 35 | # Temporary files and directories 36 | *.swp 37 | *.swo 38 | *~ 39 | .*.swp 40 | background-updates.log* 41 | notebook/ 42 | old/ 43 | qbio/ 44 | run-update.sh 45 | run-broadcast.sh 46 | config.yml 47 | .activate.sh 48 | 49 | # Documentation build output 50 | docs/_build/ 51 | docs/build/ 52 | docs/.doctrees/ 53 | docs/_static/generated/ 54 | docs/_autosummary/ 55 | *.doctree 56 | docs/api/generated/ 57 | 58 | # Sphinx autobuild 59 | docs/.sass-cache/ 60 | docs/.sphinx-build/ 61 | 62 | # Temporary directories 63 | tmp/ 64 | temp/ 65 | .tmp/ 66 | .temp/ 67 | cache/ 68 | .cache/ 69 | 70 | # IDE and editor files 71 | .vscode/ 72 | .idea/ 73 | *.sublime-* 74 | .project 75 | .pydevproject 76 | 77 | # Data files 78 | *.tsv 79 | search-dump.json 80 | sqlnotes.txt 81 | dbbackup-* 82 | 83 | # Model comparison files 84 | model_comparison.tsv 85 | model_*.png 86 | *_analysis.py 87 | *_differences.py 88 | *_comparison.py 89 | compare_model_predictions.py 90 | analyze_model_differences.py 91 | summarize_model_comparison.py 92 | plot_model_comparisons.py 93 | 94 | # Temporary scripts 95 | recreate_embeddings_vertex.py 96 | openai-google-embedding.py 97 | TODO 98 | NOTES 99 | -------------------------------------------------------------------------------- /PaperSorter/web/wsgi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """WSGI entry point for gunicorn and other WSGI servers.""" 25 | 26 | import os 27 | from .app import create_app 28 | 29 | # Get the config path from environment variable or use default 30 | config_path = os.environ.get('PAPER_SORTER_CONFIG', './config.yml') 31 | 32 | # Get optional skip authentication user from environment 33 | skip_authentication = os.environ.get('PAPER_SORTER_SKIP_AUTH') 34 | 35 | # Create the application with the config path 36 | app = create_app(config_path, skip_authentication=skip_authentication) -------------------------------------------------------------------------------- /PaperSorter/web/auth/decorators.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Authentication decorators.""" 25 | 26 | from functools import wraps 27 | from flask import abort 28 | from flask_login import login_required, current_user 29 | 30 | 31 | def admin_required(f): 32 | """Decorator to require admin privileges for a route.""" 33 | 34 | @wraps(f) 35 | @login_required 36 | def decorated_function(*args, **kwargs): 37 | if not current_user.is_admin: 38 | abort(403) # Forbidden 39 | return f(*args, **kwargs) 40 | 41 | return decorated_function 42 | -------------------------------------------------------------------------------- /PaperSorter/notification/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Notification provider system for sending alerts to various platforms.""" 25 | 26 | from .base import NotificationProvider, NotificationError 27 | from .slack import SlackProvider 28 | from .discord import DiscordProvider 29 | from .email import EmailProvider 30 | from .factory import create_notification_provider 31 | 32 | __all__ = [ 33 | "NotificationProvider", 34 | "NotificationError", 35 | "SlackProvider", 36 | "DiscordProvider", 37 | "EmailProvider", 38 | "create_notification_provider", 39 | ] 40 | -------------------------------------------------------------------------------- /PaperSorter/templates/email/paper_card.html: -------------------------------------------------------------------------------- 1 | {# Reusable paper card component for email templates #} 2 | {# Usage: {% include 'email/paper_card.html' %} #} 3 | {# Expects: paper object in context #} 4 | 5 |
6 |

7 | {{ paper.title|safe_html }} 8 |

9 | 10 | {% if paper.author %} 11 |
12 | {{ paper.author }} 13 |
14 | {% endif %} 15 | 16 |
17 | {% if paper.origin %} 18 | {{ paper.origin }} 19 | {% endif %} 20 | {% if paper.published %} 21 | • Published: {{ paper.published.strftime('%Y-%m-%d') }} 22 | {% endif %} 23 |
24 | 25 | {% if paper.score is defined %} 26 |
27 | 28 | Score: {{ "%.2f"|format(paper.score) }} 29 | 30 |
31 | {% endif %} 32 | 33 | {% if paper.content %} 34 |
35 | {{ paper.content|safe_html|truncate(500) }} 36 |
37 | {% elif paper.tldr %} 38 |
39 | Summary: {{ paper.tldr|safe_html }} 40 |
41 | {% endif %} 42 | 43 | {% if base_url %} 44 | 50 | {% endif %} 51 |
52 | -------------------------------------------------------------------------------- /examples/cron-broadcast.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example cron wrapper script for PaperSorter broadcast task 3 | # Copy and customize this script for your environment 4 | 5 | # Set your PaperSorter command 6 | # For system-wide installation: 7 | PAPERSORTER_CMD="papersorter" 8 | # For virtual environment: 9 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter" 10 | # For conda environment: 11 | # PAPERSORTER_CMD="conda run -n myenv papersorter" 12 | 13 | # Configuration 14 | PAPERSORTER_DIR="/path/to/papersorter" 15 | LOGFILE="background-updates.log" 16 | CONFIG_FILE="./config.yml" 17 | 18 | # Change to PaperSorter directory 19 | cd "$PAPERSORTER_DIR" || exit 1 20 | 21 | # Function to rotate logs when they get too large 22 | rotate_logs() { 23 | # Check if log file exists and is larger than 50MB 24 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then 25 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE" 26 | 27 | # Remove oldest compressed log if we have 5 or more 28 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then 29 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1) 30 | rm -f "$oldest_log" 31 | fi 32 | 33 | # Rotate and compress current log 34 | timestamp=$(date +%Y%m%d_%H%M%S) 35 | mv "$LOGFILE" "${LOGFILE}.${timestamp}" 36 | gzip "${LOGFILE}.${timestamp}" 37 | 38 | # Create new log file 39 | touch "$LOGFILE" 40 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE" 41 | fi 42 | } 43 | 44 | $PAPERSORTER_CMD broadcast \ 45 | --config "$CONFIG_FILE" \ 46 | --log-file "$LOGFILE" \ 47 | --quiet 48 | 49 | # Rotate logs if needed 50 | rotate_logs -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # PaperSorter Docker Image 2 | FROM python:3.11-slim 3 | 4 | # Install system dependencies 5 | RUN apt-get update && apt-get install -y \ 6 | gcc \ 7 | g++ \ 8 | postgresql-client \ 9 | curl \ 10 | && rm -rf /var/lib/apt/lists/* 11 | 12 | # Set working directory 13 | WORKDIR /app 14 | 15 | # Copy requirements first for better caching 16 | COPY requirements.txt pyproject.toml setup.py ./ 17 | COPY PaperSorter/__version__.py ./PaperSorter/ 18 | 19 | # Install Python dependencies 20 | RUN pip install --no-cache-dir -r requirements.txt && \ 21 | pip install --no-cache-dir gunicorn[gthread] 22 | 23 | # Copy the application code 24 | COPY . . 25 | 26 | # Install PaperSorter package 27 | RUN pip install --no-cache-dir -e . 28 | 29 | # Create non-root user 30 | RUN useradd -m -u 1000 papersorter && \ 31 | mkdir -p /data/logs /data/models /data/posters && \ 32 | chown -R papersorter:papersorter /app /data 33 | 34 | # Copy entrypoint script 35 | COPY docker/scripts/entrypoint.sh /entrypoint.sh 36 | RUN chmod +x /entrypoint.sh 37 | 38 | # Switch to non-root user 39 | USER papersorter 40 | 41 | # Set environment variables 42 | ENV PAPERSORTER_DATADIR=/data \ 43 | PAPERSORTER_CONFIG=/app/config.yml \ 44 | PYTHONUNBUFFERED=1 45 | 46 | # Expose port 47 | EXPOSE 5001 48 | 49 | # Use entrypoint for database wait 50 | ENTRYPOINT ["/entrypoint.sh"] 51 | 52 | # Default command (can be overridden) 53 | CMD ["gunicorn", \ 54 | "--worker-class", "gthread", \ 55 | "--workers", "1", \ 56 | "--threads", "4", \ 57 | "--bind", "0.0.0.0:5001", \ 58 | "--access-logfile", "/data/logs/access.log", \ 59 | "--error-logfile", "/data/logs/error.log", \ 60 | "--log-level", "info", \ 61 | "--timeout", "120", \ 62 | "docker.scripts.wsgi:app"] -------------------------------------------------------------------------------- /PaperSorter/templates/403.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Access Denied - {{ site_name }}{% endblock %} 4 | {% block meta_description %}403 Forbidden - You don't have permission to access this page{% endblock %} 5 | 6 | {% block header %} 7 | 8 | {% endblock %} 9 | 10 | {% block styles %} 11 | 48 | {% endblock %} 49 | 50 | {% block content %} 51 |
52 |

403

53 |

Access Denied

54 |

55 | You don't have permission to access this page. 56 | This area is restricted to administrators only. 57 |

58 | ← Back to Papers 59 |
60 | {% endblock %} -------------------------------------------------------------------------------- /examples/cron-update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example cron wrapper script for PaperSorter update task 3 | # Copy and customize this script for your environment 4 | 5 | # Set your PaperSorter command 6 | # For system-wide installation: 7 | PAPERSORTER_CMD="papersorter" 8 | # For virtual environment: 9 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter" 10 | # For conda environment: 11 | # PAPERSORTER_CMD="conda run -n myenv papersorter" 12 | 13 | # Configuration 14 | PAPERSORTER_DIR="/path/to/papersorter" 15 | LOGFILE="background-updates.log" 16 | CONFIG_FILE="./config.yml" 17 | 18 | # Change to PaperSorter directory 19 | cd "$PAPERSORTER_DIR" || exit 1 20 | 21 | # Function to rotate logs when they get too large 22 | rotate_logs() { 23 | # Check if log file exists and is larger than 50MB 24 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then 25 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE" 26 | 27 | # Remove oldest compressed log if we have 5 or more 28 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then 29 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1) 30 | rm -f "$oldest_log" 31 | fi 32 | 33 | # Rotate and compress current log 34 | timestamp=$(date +%Y%m%d_%H%M%S) 35 | mv "$LOGFILE" "${LOGFILE}.${timestamp}" 36 | gzip "${LOGFILE}.${timestamp}" 37 | 38 | # Create new log file 39 | touch "$LOGFILE" 40 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE" 41 | fi 42 | } 43 | 44 | # Run the update task 45 | $PAPERSORTER_CMD update \ 46 | --config "$CONFIG_FILE" \ 47 | --log-file "$LOGFILE" \ 48 | --quiet \ 49 | --limit-sources 20 \ 50 | --check-interval-hours 3 51 | 52 | # Rotate logs if needed 53 | rotate_logs -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'docs/**' 8 | - 'PaperSorter/**/*.py' 9 | - '.github/workflows/docs.yml' 10 | pull_request: 11 | branches: [ main ] 12 | paths: 13 | - 'docs/**' 14 | - 'PaperSorter/**/*.py' 15 | workflow_dispatch: 16 | 17 | permissions: 18 | contents: read 19 | pages: write 20 | id-token: write 21 | 22 | concurrency: 23 | group: "pages" 24 | cancel-in-progress: false 25 | 26 | jobs: 27 | build: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - name: Set up Python 33 | uses: actions/setup-python@v5 34 | with: 35 | python-version: '3.11' 36 | 37 | - name: Cache dependencies 38 | uses: actions/cache@v3 39 | with: 40 | path: ~/.cache/pip 41 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} 42 | restore-keys: | 43 | ${{ runner.os }}-pip- 44 | 45 | - name: Install dependencies 46 | run: | 47 | python -m pip install --upgrade pip 48 | pip install -e . 49 | pip install -r docs/requirements.txt 50 | 51 | - name: Build documentation 52 | run: | 53 | cd docs 54 | make clean 55 | make html 56 | 57 | - name: Check for broken links 58 | run: | 59 | cd docs 60 | make linkcheck 61 | continue-on-error: true 62 | 63 | - name: Upload artifact 64 | uses: actions/upload-pages-artifact@v3 65 | with: 66 | path: docs/_build/html 67 | 68 | deploy: 69 | if: github.event_name == 'push' && github.ref == 'refs/heads/main' 70 | needs: build 71 | runs-on: ubuntu-latest 72 | environment: 73 | name: github-pages 74 | url: ${{ steps.deployment.outputs.page_url }} 75 | steps: 76 | - name: Deploy to GitHub Pages 77 | id: deployment 78 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /PaperSorter/templates/partials/similar_section.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 |
6 |

Similar Articles

7 |
8 | 9 | 10 |
11 |
12 | 13 | 14 | 35 | 36 | 37 |
38 |
39 |
40 |
41 | Loading similar articles... 42 |
43 |
44 | 45 |
46 | -------------------------------------------------------------------------------- /PaperSorter/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | import sys 25 | import importlib 26 | from .tasks import __all__ as alltasks 27 | from .cli.parser import main as cli_main 28 | 29 | def main(): 30 | """Main entry point for PaperSorter CLI.""" 31 | 32 | # Import and register all commands 33 | 34 | for task in alltasks: 35 | # Import the task module (this triggers registration for migrated commands) 36 | try: 37 | importlib.import_module(f".tasks.{task}", package="PaperSorter") 38 | except ImportError as e: 39 | print(f"Warning: Could not import task {task}: {e}", file=sys.stderr) 40 | continue 41 | 42 | # Run the CLI 43 | return cli_main() 44 | 45 | if __name__ == "__main__": 46 | sys.exit(main()) 47 | 48 | # Export main for use as console script 49 | __all__ = ['main'] 50 | -------------------------------------------------------------------------------- /docs/user-guide/index.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | User Guide 3 | ========== 4 | 5 | This comprehensive guide covers all aspects of using PaperSorter effectively, from basic configuration to advanced workflows. 6 | 7 | Whether you're a researcher looking to streamline your paper discovery process or an administrator managing a team's research feeds, this guide provides the knowledge you need to get the most out of PaperSorter. 8 | 9 | Overview 10 | ======== 11 | 12 | PaperSorter helps you: 13 | 14 | - **Discover relevant papers** automatically from multiple sources 15 | - **Train personalized models** that learn your research interests 16 | - **Receive targeted notifications** through Slack, email, or other channels 17 | - **Manage and label papers** through an intuitive web interface 18 | - **Search and explore** related work using semantic similarity 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | configuration 24 | feed-sources 25 | training-models 26 | sharing-broadcasting 27 | notifications 28 | search-from-pdf 29 | web-interface 30 | workflows 31 | 32 | Quick Reference 33 | =============== 34 | 35 | Common Tasks 36 | ------------ 37 | 38 | - **Add new feeds**: Use the web interface or directly edit the database 39 | - **Train a model**: Label ~100 papers, then run ``papersorter train`` 40 | - **Check new papers**: Run ``papersorter update`` to fetch and score articles 41 | - **Send notifications**: Use ``papersorter broadcast`` to deliver recommendations 42 | 43 | Best Practices 44 | -------------- 45 | 46 | - Start with broad feeds and narrow down based on model performance 47 | - Label papers consistently to improve model accuracy 48 | - Regularly retrain models as your research interests evolve 49 | - Monitor notification channels to ensure appropriate content delivery 50 | 51 | Related Sections 52 | ================ 53 | 54 | - :doc:`../getting-started/index` - Initial setup and installation 55 | - :doc:`../cli-reference/index` - Complete command reference 56 | - :doc:`../tutorials/index` - Step-by-step integration guides -------------------------------------------------------------------------------- /PaperSorter/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | import logging 25 | 26 | console = logging.StreamHandler() 27 | console.setLevel(logging.INFO) 28 | 29 | log = logging.getLogger("PaperSorter") 30 | log.setLevel(logging.INFO) 31 | log.addHandler(console) 32 | 33 | 34 | def initialize_logging(task="", logfile=None, quiet=False): 35 | if logfile is not None: 36 | formatter = logging.Formatter( 37 | f"[%(asctime)s/{task}] %(message)s", "%Y-%m-%d %H:%M:%S" 38 | ) 39 | logf_handler = logging.FileHandler(logfile, mode="a") 40 | logf_handler.setLevel(logging.INFO) 41 | logf_handler.setFormatter(formatter) 42 | logf_handler.addFilter( 43 | lambda record: not record.name.endswith("TheOldReaderConnection") 44 | ) 45 | log.addHandler(logf_handler) 46 | 47 | if quiet: 48 | console.setLevel(logging.WARNING) 49 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # PaperSorter Examples 2 | 3 | This directory contains example configuration and automation scripts for PaperSorter. 4 | 5 | ## Files 6 | 7 | ### config.yml 8 | Example configuration file for PaperSorter. Copy this to your working directory and customize with your API keys and database credentials. 9 | 10 | ### Cron Wrapper Scripts 11 | 12 | These scripts provide automated execution with log rotation: 13 | 14 | - **cron-update.sh** - Runs the update task to fetch new articles 15 | - **cron-broadcast.sh** - Runs the broadcast task to send notifications 16 | - **cron-combined.sh** - Runs both update and broadcast in sequence 17 | 18 | To use these scripts: 19 | 1. Copy to your preferred location 20 | 2. Edit the configuration variables at the top of each script 21 | 3. Make executable: `chmod +x *.sh` 22 | 4. Add to crontab (see crontab.example) 23 | 24 | ### crontab.example 25 | Example crontab entries showing different scheduling strategies for running PaperSorter tasks. 26 | 27 | ## Usage 28 | 29 | 1. **Initial Setup** 30 | ```bash 31 | # Copy and customize configuration 32 | cp examples/config.yml ./config.yml 33 | # Edit config.yml with your API keys and settings 34 | ``` 35 | 36 | 2. **Manual Execution** 37 | ```bash 38 | # Run tasks manually 39 | papersorter update --config ./config.yml 40 | papersorter broadcast --config ./config.yml 41 | ``` 42 | 43 | 3. **Automated Execution** 44 | ```bash 45 | # Copy and customize cron scripts 46 | cp examples/cron-combined.sh ~/bin/papersorter-cron.sh 47 | chmod +x ~/bin/papersorter-cron.sh 48 | # Edit the script with your paths 49 | 50 | # Add to crontab 51 | crontab -e 52 | # Add: 0 */3 * * * /home/username/bin/papersorter-cron.sh 53 | ``` 54 | 55 | ## Notes 56 | 57 | - The cron scripts include automatic log rotation to prevent logs from growing too large 58 | - Broadcast hours are now configured per channel in the web interface settings 59 | - The broadcast task can run every hour and will automatically respect each channel's configured hours 60 | - Adjust the update schedule according to your preferences -------------------------------------------------------------------------------- /examples/cron-combined.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Example combined cron wrapper script for PaperSorter 3 | # Runs both update and broadcast tasks in sequence 4 | # Copy and customize this script for your environment 5 | 6 | # Set your PaperSorter command 7 | # For system-wide installation: 8 | PAPERSORTER_CMD="papersorter" 9 | # For virtual environment: 10 | # PAPERSORTER_CMD="/path/to/venv/bin/papersorter" 11 | # For conda environment: 12 | # PAPERSORTER_CMD="conda run -n myenv papersorter" 13 | 14 | # Configuration 15 | PAPERSORTER_DIR="/path/to/papersorter" 16 | LOGFILE="background-updates.log" 17 | CONFIG_FILE="./config.yml" 18 | 19 | # Change to PaperSorter directory 20 | cd "$PAPERSORTER_DIR" || exit 1 21 | 22 | # Function to rotate logs when they get too large 23 | rotate_logs() { 24 | # Check if log file exists and is larger than 50MB 25 | if [ -f "$LOGFILE" ] && [ $(stat -c%s "$LOGFILE") -gt 52428800 ]; then 26 | echo "$(date): Rotating log file (size: $(stat -c%s "$LOGFILE") bytes)" >> "$LOGFILE" 27 | 28 | # Remove oldest compressed log if we have 5 or more 29 | if [ $(ls -1 ${LOGFILE}.*.gz 2>/dev/null | wc -l) -ge 5 ]; then 30 | oldest_log=$(ls -1t ${LOGFILE}.*.gz | tail -1) 31 | rm -f "$oldest_log" 32 | fi 33 | 34 | # Rotate and compress current log 35 | timestamp=$(date +%Y%m%d_%H%M%S) 36 | mv "$LOGFILE" "${LOGFILE}.${timestamp}" 37 | gzip "${LOGFILE}.${timestamp}" 38 | 39 | # Create new log file 40 | touch "$LOGFILE" 41 | echo "$(date): Log rotated. Previous log compressed as ${LOGFILE}.${timestamp}.gz" >> "$LOGFILE" 42 | fi 43 | } 44 | 45 | # Always run update to fetch new articles 46 | echo "$(date): Starting update task" >> "$LOGFILE" 47 | $PAPERSORTER_CMD update \ 48 | --config "$CONFIG_FILE" \ 49 | --log-file "$LOGFILE" \ 50 | --quiet \ 51 | --limit-sources 20 \ 52 | --check-interval-hours 3 53 | 54 | # Run broadcast task (channels have their own hour restrictions) 55 | echo "$(date): Starting broadcast task" >> "$LOGFILE" 56 | $PAPERSORTER_CMD broadcast \ 57 | --config "$CONFIG_FILE" \ 58 | --log-file "$LOGFILE" \ 59 | --quiet 60 | 61 | # Rotate logs if needed 62 | rotate_logs -------------------------------------------------------------------------------- /docker/caddy/Caddyfile.prod: -------------------------------------------------------------------------------- 1 | # Caddyfile for PaperSorter 2 | # Production configuration with automatic HTTPS 3 | 4 | {$DOMAIN} { 5 | # Automatic HTTPS with Let's Encrypt 6 | tls {$EMAIL} 7 | 8 | # Reverse proxy to PaperSorter web service 9 | reverse_proxy web:5001 { 10 | header_up X-Real-IP {remote_host} 11 | header_up X-Forwarded-For {remote_host} 12 | header_up X-Forwarded-Proto {scheme} 13 | header_up X-Forwarded-Host {host} 14 | 15 | # Health check 16 | health_uri /health 17 | health_interval 30s 18 | health_timeout 5s 19 | 20 | # Retry policy 21 | lb_policy round_robin 22 | lb_try_duration 10s 23 | lb_try_interval 1s 24 | } 25 | 26 | # Compression 27 | encode gzip zstd 28 | 29 | # Security headers 30 | header { 31 | Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" 32 | X-Content-Type-Options "nosniff" 33 | X-Frame-Options "SAMEORIGIN" 34 | X-XSS-Protection "1; mode=block" 35 | Referrer-Policy "strict-origin-when-cross-origin" 36 | Content-Security-Policy "default-src 'self' https:; script-src 'self' 'unsafe-inline' 'unsafe-eval' https:; style-src 'self' 'unsafe-inline' https:; img-src 'self' data: https:; font-src 'self' data: https:;" 37 | -Server 38 | } 39 | 40 | # Logging 41 | log { 42 | output file /data/logs/caddy_access.log { 43 | roll_size 100mb 44 | roll_keep 10 45 | roll_keep_for 2160h 46 | } 47 | } 48 | 49 | # Handle errors 50 | handle_errors { 51 | @404 { 52 | expression {http.error.status_code} == 404 53 | } 54 | respond @404 "Page not found" 404 55 | respond "{http.error.status_code} {http.error.status_text}" {http.error.status_code} 56 | } 57 | 58 | # Larger uploads for file imports 59 | request_body { 60 | max_size 100MB 61 | } 62 | 63 | # Timeouts for long-running requests (training, imports) 64 | timeouts { 65 | read_body 5m 66 | write 5m 67 | } 68 | } 69 | 70 | # Redirect www to non-www 71 | www.{$DOMAIN} { 72 | redir https://{$DOMAIN}{uri} permanent 73 | } -------------------------------------------------------------------------------- /docs/admin-guide/index.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Administrator Guide 3 | ================= 4 | 5 | This guide is designed for system administrators, DevOps engineers, and technical users responsible for deploying, maintaining, and scaling PaperSorter installations. 6 | 7 | Learn how to set up robust, production-ready deployments that can serve multiple users and handle large volumes of research papers efficiently. 8 | 9 | Scope 10 | ===== 11 | 12 | This guide covers: 13 | 14 | - **Production deployment** strategies and best practices 15 | - **Database administration** including setup, optimization, and maintenance 16 | - **Security considerations** for multi-user environments 17 | - **Monitoring and troubleshooting** common issues 18 | - **Backup and disaster recovery** procedures 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | authentication 24 | deployment 25 | database-setup 26 | backup-restore 27 | monitoring 28 | security 29 | troubleshooting 30 | 31 | Key Responsibilities 32 | ==================== 33 | 34 | System Architecture 35 | ------------------- 36 | 37 | - Database server management (PostgreSQL with pgvector) 38 | - Web server configuration and load balancing 39 | - Background task scheduling (cron jobs or task queues) 40 | - API key and credential management 41 | 42 | Operational Tasks 43 | ----------------- 44 | 45 | - Regular database maintenance and optimization 46 | - Model performance monitoring and retraining schedules 47 | - User access management and authentication setup 48 | - System resource monitoring and scaling decisions 49 | 50 | Security Considerations 51 | ----------------------- 52 | 53 | - OAuth provider configuration (Google, GitHub, ORCID) 54 | - Database access controls and encryption 55 | - API key rotation and secure storage 56 | - Network security and SSL certificate management 57 | 58 | Production Readiness 59 | ==================== 60 | 61 | Before deploying to production, ensure: 62 | 63 | - Database backups are automated and tested 64 | - Monitoring and alerting systems are in place 65 | - Security policies are implemented and documented 66 | - Disaster recovery procedures are established 67 | 68 | Related Resources 69 | ================= 70 | 71 | - :doc:`../development/index` - Contributing and extending PaperSorter 72 | - :doc:`../api/index` - API reference for custom integrations -------------------------------------------------------------------------------- /PaperSorter/cli/types.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Type converter functions for argparse.""" 25 | 26 | import argparse 27 | from typing import List 28 | 29 | 30 | def positive_int(value: str) -> int: 31 | """Validate positive integer.""" 32 | ivalue = int(value) 33 | if ivalue <= 0: 34 | raise argparse.ArgumentTypeError(f"{value} must be a positive integer") 35 | return ivalue 36 | 37 | 38 | def probability_float(value: str) -> float: 39 | """Validate probability value between 0 and 1.""" 40 | fvalue = float(value) 41 | if not 0.0 <= fvalue <= 1.0: 42 | raise argparse.ArgumentTypeError(f"{value} must be between 0.0 and 1.0") 43 | return fvalue 44 | 45 | 46 | def comma_separated_list(value: str) -> List[str]: 47 | """Parse comma-separated list.""" 48 | return [item.strip() for item in value.split(',') if item.strip()] 49 | 50 | 51 | def issn_list(value: str) -> str: 52 | """Validate ISSN format (XXXX-XXXX).""" 53 | value = value.strip() 54 | if len(value) == 9 and value[4] == '-': 55 | return value 56 | raise argparse.ArgumentTypeError(f"{value} is not a valid ISSN format (XXXX-XXXX)") 57 | -------------------------------------------------------------------------------- /PaperSorter/templates/email/newsletter.txt: -------------------------------------------------------------------------------- 1 | PAPERSORTER DIGEST 2 | {%- if channel_name %} 3 | {{ channel_name }} 4 | {%- endif %} 5 | {{ date.strftime('%B %d, %Y') }} 6 | ================================================================================ 7 | 8 | {% if papers|length == 1 -%} 9 | 1 paper selected for your review 10 | {%- else -%} 11 | {{ papers|length }} papers selected for your review 12 | {%- endif -%} 13 | {%- if source_count %} from {{ source_count }} sources{% endif %} 14 | 15 | ================================================================================ 16 | 17 | {% for paper in papers %} 18 | {{ loop.index }}. {{ paper.title }} 19 | {{ '-' * paper.title|length }} 20 | 21 | {% if paper.author -%} 22 | Authors: {{ paper.author }} 23 | {% endif -%} 24 | {%- if paper.origin -%} 25 | Source: {{ paper.origin }} 26 | {% endif -%} 27 | {%- if paper.published -%} 28 | Published: {{ paper.published.strftime('%Y-%m-%d') }} 29 | {% endif -%} 30 | {%- if paper.score is defined -%} 31 | Score: {{ "%.0f"|format(paper.score * 100) }} 32 | {%- if paper.other_scores %} 33 | {%- for other_score in paper.other_scores %} 34 | {%- if other_score.score is defined %} 35 | {{ other_score.score_name }}: {{ "%.0f"|format(other_score.score * 100) }} 36 | {%- endif %} 37 | {%- endfor %} 38 | {%- endif %} 39 | {% endif %} 40 | 41 | {% if include_abstracts -%} 42 | {%- if paper.content -%} 43 | Abstract: 44 | {{ paper.content|truncate(500)|wordwrap(78) }} 45 | {%- elif paper.tldr -%} 46 | Summary: 47 | {{ paper.tldr|wordwrap(78) }} 48 | {%- endif -%} 49 | {%- elif paper.tldr -%} 50 | TL;DR: 51 | {{ paper.tldr|wordwrap(78) }} 52 | {%- elif paper.get('_abstract_fallback') -%} 53 | Abstract: 54 | {{ paper.get('_abstract_fallback')|wordwrap(78) }} 55 | {%- endif %} 56 | 57 | Full Paper: {{ paper.link }} 58 | {%- if base_url and paper.id %} 59 | Paper Details: {{ base_url }}/paper/{{ paper.id }} 60 | {%- endif %} 61 | 62 | {% if not loop.last %} 63 | -------------------------------------------------------------------------------- 64 | {% endif %} 65 | {% endfor %} 66 | 67 | ================================================================================ 68 | 69 | This digest was generated by PaperSorter 70 | {%- if base_url %} 71 | View in Web Interface: {{ base_url }} 72 | {%- endif %} 73 | 74 | To unsubscribe or adjust preferences, please contact your administrator. 75 | -------------------------------------------------------------------------------- /docker-compose.prod.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | # Production overrides for docker-compose.yml 4 | # Usage: docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d 5 | 6 | services: 7 | postgres: 8 | # Production database settings 9 | deploy: 10 | resources: 11 | limits: 12 | cpus: '2' 13 | memory: 2G 14 | reservations: 15 | cpus: '0.5' 16 | memory: 512M 17 | # Add backup volume 18 | volumes: 19 | - postgres_backup:/backup 20 | 21 | web: 22 | # Production web settings 23 | deploy: 24 | resources: 25 | limits: 26 | cpus: '2' 27 | memory: 4G 28 | reservations: 29 | cpus: '1' 30 | memory: 1G 31 | environment: 32 | # Production settings 33 | FLASK_ENV: production 34 | LOG_LEVEL: warning 35 | # Increase gunicorn workers/threads for production 36 | command: ["gunicorn", 37 | "--worker-class", "gthread", 38 | "--workers", "1", 39 | "--threads", "8", 40 | "--bind", "0.0.0.0:5001", 41 | "--access-logfile", "/data/logs/access.log", 42 | "--error-logfile", "/data/logs/error.log", 43 | "--log-level", "warning", 44 | "--timeout", "120", 45 | "--max-requests", "1000", 46 | "--max-requests-jitter", "100", 47 | "PaperSorter.web.app:create_app()"] 48 | 49 | scheduler: 50 | # Production scheduler settings 51 | deploy: 52 | resources: 53 | limits: 54 | cpus: '1' 55 | memory: 2G 56 | reservations: 57 | cpus: '0.25' 58 | memory: 256M 59 | 60 | caddy: 61 | # Production Caddy settings with proper domain 62 | environment: 63 | # These should be set in .env for production 64 | DOMAIN: ${DOMAIN} 65 | EMAIL: ${ADMIN_EMAIL} 66 | # Mount production Caddyfile 67 | volumes: 68 | - ./docker/caddy/Caddyfile.prod:/etc/caddy/Caddyfile:ro 69 | - caddy_data:/data 70 | - caddy_config:/config 71 | deploy: 72 | resources: 73 | limits: 74 | cpus: '0.5' 75 | memory: 256M 76 | reservations: 77 | cpus: '0.1' 78 | memory: 64M 79 | 80 | volumes: 81 | postgres_backup: 82 | name: papersorter_postgres_backup -------------------------------------------------------------------------------- /PaperSorter/web/auth/models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """User model for authentication.""" 25 | 26 | from flask_login import UserMixin 27 | 28 | 29 | class User(UserMixin): 30 | """User model for Flask-Login integration.""" 31 | 32 | def __init__( 33 | self, 34 | id, 35 | username, 36 | email=None, 37 | is_admin=False, 38 | timezone="UTC", 39 | date_format="MMM D, YYYY", 40 | feedlist_minscore=None, 41 | primary_channel_id=None, 42 | theme="light", 43 | ): 44 | self.id = id 45 | self.username = username 46 | self.email = email 47 | self.is_admin = is_admin 48 | self.timezone = timezone 49 | self.date_format = date_format 50 | # Store the integer value from DB, convert to decimal for internal use 51 | self.feedlist_minscore_int = ( 52 | feedlist_minscore if feedlist_minscore is not None else 25 53 | ) 54 | self.feedlist_minscore = ( 55 | self.feedlist_minscore_int / 100.0 56 | ) # Convert to decimal (e.g., 25 -> 0.25) 57 | self.primary_channel_id = primary_channel_id 58 | self.theme = theme if theme in ["light", "dark", "auto"] else "light" 59 | -------------------------------------------------------------------------------- /docs/cli-reference/index.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | CLI Reference 3 | ============= 4 | 5 | PaperSorter provides a comprehensive command-line interface for all system operations. This reference documents every command, option, and usage pattern. 6 | 7 | The CLI is built using Click and follows standard Unix conventions for options and arguments. All commands support ``--help`` for detailed usage information. 8 | 9 | Command Overview 10 | ================ 11 | 12 | Core Operations 13 | --------------- 14 | 15 | - **update**: Fetch new articles, generate embeddings, and score papers 16 | - **train**: Train or retrain machine learning models on labeled data 17 | - **broadcast**: Send notifications and recommendations to configured channels 18 | - **serve**: Start the web interface for interactive paper management 19 | 20 | The typical workflow involves running these commands in sequence, often automated via cron jobs for regular operation. 21 | 22 | .. toctree:: 23 | :maxdepth: 2 24 | 25 | commands 26 | examples 27 | 28 | Global Options 29 | ============== 30 | 31 | All commands support these common options: 32 | 33 | ``--config PATH`` 34 | Configuration file location (default: ./config.yml) 35 | 36 | ``--log-file PATH`` 37 | Write logs to specified file instead of stdout 38 | 39 | ``-q, --quiet`` 40 | Suppress non-error output 41 | 42 | ``--help`` 43 | Show command-specific help and exit 44 | 45 | Environment Variables 46 | ===================== 47 | 48 | Configuration can also be provided via environment variables: 49 | 50 | - ``PAPERSORTER_CONFIG``: Path to configuration file 51 | - ``PAPERSORTER_LOG_LEVEL``: Logging level (DEBUG, INFO, WARNING, ERROR) 52 | - ``PAPERSORTER_LOG_FILE``: Log file path 53 | 54 | Exit Codes 55 | ========== 56 | 57 | PaperSorter follows standard Unix exit code conventions: 58 | 59 | - ``0``: Success 60 | - ``1``: General error 61 | - ``2``: Command-line usage error 62 | - ``3``: Configuration error 63 | - ``4``: Database error 64 | 65 | Examples 66 | ======== 67 | 68 | Common usage patterns: 69 | 70 | .. code-block:: bash 71 | 72 | # Daily automation (typical cron setup) 73 | papersorter update --batch-size 50 74 | papersorter train --rounds 100 75 | papersorter broadcast --limit 10 76 | 77 | # Development and testing 78 | papersorter serve --debug --port 5000 79 | papersorter update --limit-sources 5 --check-interval-hours 1 80 | 81 | Related Documentation 82 | ===================== 83 | 84 | - :doc:`../admin-guide/deployment` - Production automation setup -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- 1 | name: Claude Code 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | pull_request_review_comment: 7 | types: [created] 8 | issues: 9 | types: [opened, assigned] 10 | pull_request_review: 11 | types: [submitted] 12 | 13 | jobs: 14 | claude: 15 | if: | 16 | (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || 17 | (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || 18 | (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || 19 | (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) 20 | runs-on: ubuntu-latest 21 | permissions: 22 | contents: read 23 | pull-requests: read 24 | issues: read 25 | id-token: write 26 | actions: read # Required for Claude to read CI results on PRs 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 1 32 | 33 | - name: Run Claude Code 34 | id: claude 35 | uses: anthropics/claude-code-action@beta 36 | with: 37 | claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} 38 | 39 | # This is an optional setting that allows Claude to read CI results on PRs 40 | additional_permissions: | 41 | actions: read 42 | 43 | # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) 44 | # model: "claude-opus-4-1-20250805" 45 | 46 | # Optional: Customize the trigger phrase (default: @claude) 47 | # trigger_phrase: "/claude" 48 | 49 | # Optional: Trigger when specific user is assigned to an issue 50 | # assignee_trigger: "claude-bot" 51 | 52 | # Optional: Allow Claude to run specific commands 53 | # allowed_tools: "Bash(npm install),Bash(npm run build),Bash(npm run test:*),Bash(npm run lint:*)" 54 | 55 | # Optional: Add custom instructions for Claude to customize its behavior for your project 56 | # custom_instructions: | 57 | # Follow our coding standards 58 | # Ensure all new code has tests 59 | # Use TypeScript for new files 60 | 61 | # Optional: Custom environment variables for Claude 62 | # claude_env: | 63 | # NODE_ENV: test 64 | 65 | -------------------------------------------------------------------------------- /PaperSorter/static/css/pages/feed_struct.css: -------------------------------------------------------------------------------- 1 | /* Shared feed list structure (no colors) */ 2 | 3 | /* Header row inside a feed item */ 4 | .feed-header { 5 | padding: 20px; 6 | display: flex; 7 | align-items: center; 8 | gap: 15px; 9 | position: relative; 10 | } 11 | 12 | /* Score badge geometry */ 13 | .score-badge { 14 | display: inline-flex; 15 | align-items: center; 16 | padding: 4px 12px; 17 | border-radius: 12px; 18 | font-size: 12px; 19 | font-weight: bold; 20 | min-width: 50px; 21 | justify-content: center; 22 | flex-shrink: 0; 23 | position: relative; 24 | } 25 | 26 | /* Container for score icons */ 27 | .score-icons { 28 | position: absolute; 29 | top: -8px; 30 | right: -8px; 31 | display: flex; 32 | gap: 2px; 33 | } 34 | 35 | /* Score icon geometry only */ 36 | .score-icon { 37 | border-radius: 50%; 38 | width: 16px; 39 | height: 16px; 40 | display: flex; 41 | align-items: center; 42 | justify-content: center; 43 | font-size: 10px; 44 | } 45 | 46 | /* Content column */ 47 | .feed-content { 48 | flex: 1; 49 | min-width: 0; 50 | } 51 | 52 | /* Title */ 53 | .feed-title { 54 | font-size: 18px; 55 | font-weight: bold; 56 | margin-bottom: 5px; 57 | line-height: 1.3; 58 | } 59 | 60 | /* Meta row */ 61 | .feed-meta { 62 | display: flex; 63 | gap: 15px; 64 | font-size: 14px; 65 | margin-bottom: 5px; 66 | } 67 | 68 | .feed-meta-item { 69 | white-space: nowrap; 70 | overflow: hidden; 71 | text-overflow: ellipsis; 72 | } 73 | 74 | .feed-author { max-width: 200px; } 75 | .feed-item.expanded .feed-author { 76 | white-space: normal; 77 | overflow: visible; 78 | text-overflow: unset; 79 | max-width: none; 80 | } 81 | 82 | .feed-origin { font-weight: bold; } 83 | .feed-date { flex-shrink: 0; } 84 | 85 | /* Label badges */ 86 | .feed-labels { 87 | display: flex; 88 | gap: 8px; 89 | align-items: center; 90 | margin-left: auto; 91 | flex-shrink: 0; 92 | } 93 | 94 | .label-badge { 95 | padding: 2px 8px; 96 | border-radius: 10px; 97 | font-size: 11px; 98 | font-weight: bold; 99 | } 100 | 101 | /* Details block */ 102 | .feed-details { padding: 20px; } 103 | .feed-details.expanded { display: block; } 104 | 105 | /* Abstract */ 106 | .feed-abstract { 107 | font-size: 15px; 108 | line-height: 1.6; 109 | margin-bottom: 15px; 110 | } 111 | 112 | /* Actions row */ 113 | .feed-actions { 114 | display: flex; 115 | gap: 10px; 116 | } 117 | 118 | .badges-container { 119 | display: flex; 120 | gap: 10px; 121 | align-items: center; 122 | } 123 | 124 | -------------------------------------------------------------------------------- /PaperSorter/templates/feedback_error.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Error - {{ site_name }}{% endblock %} 4 | 5 | {% block header %}{% endblock %} 6 | 7 | {% block styles %} 8 | 9 | 77 | {% endblock %} 78 | 79 | {% block main_container %} 80 |
81 |
82 | 83 |

Oops!

84 | 85 |
86 | {{ message }} 87 |
88 | 89 |
90 | 91 | 🏠 Go to Home 92 | 93 |
94 |
95 | {% endblock %} 96 | -------------------------------------------------------------------------------- /docker/config.docker.yml: -------------------------------------------------------------------------------- 1 | # PaperSorter Configuration Template for Docker 2 | # This file should be mounted as /app/config.yml in the container 3 | # Environment variables are expanded when needed 4 | 5 | admin_users: 6 | # Auto-promoted admin users (comma-separated in env var) 7 | # ${ADMIN_USERS} 8 | 9 | db: 10 | type: postgres 11 | host: postgres # Docker service name 12 | database: ${POSTGRES_DB:-papersorter} 13 | user: ${POSTGRES_USER:-papersorter} 14 | password: ${POSTGRES_PASSWORD:-changeme} 15 | 16 | web: 17 | site_name: ${SITE_NAME:-PaperSorter} 18 | base_url: ${BASE_URL:-http://localhost} 19 | flask_secret_key: ${FLASK_SECRET_KEY} 20 | default_timezone: ${DEFAULT_TIMEZONE:-UTC} 21 | default_date_format: ${DEFAULT_DATE_FORMAT:-MMM D, YYYY} 22 | 23 | oauth: 24 | google: 25 | client_id: ${GOOGLE_CLIENT_ID} 26 | secret: ${GOOGLE_CLIENT_SECRET} 27 | github: 28 | client_id: ${GITHUB_CLIENT_ID} 29 | secret: ${GITHUB_CLIENT_SECRET} 30 | orcid: 31 | client_id: ${ORCID_CLIENT_ID} 32 | secret: ${ORCID_CLIENT_SECRET} 33 | sandbox: false 34 | 35 | embedding_api: 36 | api_key: ${EMBEDDING_API_KEY} 37 | api_url: ${EMBEDDING_API_URL:-https://api.openai.com/v1} 38 | model: ${EMBEDDING_MODEL:-text-embedding-3-large} 39 | dimensions: ${EMBEDDING_DIMENSIONS} 40 | 41 | models: 42 | path: /data/models 43 | 44 | storage: 45 | ai_poster_dir: /data/posters 46 | 47 | summarization_api: 48 | api_key: ${SUMMARIZATION_API_KEY} 49 | api_url: ${SUMMARIZATION_API_URL:-https://generativelanguage.googleapis.com/v1beta/openai} 50 | model: ${SUMMARIZATION_MODEL:-gemini-2.0-flash-thinking-exp-01-21} 51 | 52 | scholarly_database: 53 | provider: ${SCHOLARLY_PROVIDER:-semantic_scholar} 54 | match_date_tolerance_days: ${MATCH_DATE_TOLERANCE_DAYS:-60} 55 | semantic_scholar: 56 | api_key: ${SEMANTIC_SCHOLAR_API_KEY} 57 | max_retries: ${SEMANTIC_SCHOLAR_MAX_RETRIES:-5} 58 | retry_backoff_base: ${SEMANTIC_SCHOLAR_RETRY_BACKOFF_BASE:-2} 59 | throttle: ${SEMANTIC_SCHOLAR_THROTTLE:-1} 60 | openalex: 61 | email: ${OPENALEX_EMAIL} 62 | max_retries: ${OPENALEX_MAX_RETRIES:-5} 63 | retry_backoff_base: ${OPENALEX_RETRY_BACKOFF_BASE:-2} 64 | throttle: ${OPENALEX_THROTTLE:-0.1} 65 | 66 | smtp: 67 | provider: ${SMTP_PROVIDER:-custom} 68 | host: ${SMTP_HOST} 69 | port: ${SMTP_PORT:-587} 70 | username: ${SMTP_USERNAME} 71 | password: ${SMTP_PASSWORD} 72 | encryption: ${SMTP_ENCRYPTION:-tls} 73 | timeout: ${SMTP_TIMEOUT:-30} 74 | 75 | email: 76 | from_address: ${EMAIL_FROM:-papersorter@example.com} 77 | from_name: ${EMAIL_FROM_NAME:-PaperSorter Newsletter} 78 | subject_template: ${EMAIL_SUBJECT_TEMPLATE:-Research Papers Digest - {date:%Y-%m-%d}} -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # Repository Guidelines 2 | 3 | ## Project Structure & Module Organization 4 | - `PaperSorter/` – main package: `cli/`, `tasks/`, `providers/`, `notification/`, `utils/`, `web/`, `templates/`, `static/`, data helpers. 5 | - `docs/` – Sphinx docs (`make html`). 6 | - `docker/`, `docker-compose*.yml`, `papersorter-cli` – containerized runtime. 7 | - `migrations/`, `SQL_SCHEMA.sql` – database schema/migrations. 8 | - `examples/`, `tools/`, `notebook/` – scripts and prototypes. 9 | - `config.yml` – root config (often a symlink); do not commit secrets. 10 | 11 | ## Build, Test, and Development Commands 12 | - Create env: `python -m venv .venv && source .venv/bin/activate && pip install -e ".[dev]"` 13 | - Initialize DB: `papersorter init` (Docker: `./papersorter-cli init`). 14 | - Run web locally: `papersorter serve --debug --port 5001`. 15 | - Update/predict/train: `papersorter update`, `papersorter predict`, `papersorter train --name "Model v1"`. 16 | - Lint/format/types: `black PaperSorter/`, `flake8 PaperSorter/`, `mypy PaperSorter/`. 17 | - Tests: `pytest` (optional coverage: `pytest --cov=PaperSorter` if `pytest-cov` installed). 18 | - Docs: `cd docs && make html`. 19 | 20 | ## Coding Style & Naming Conventions 21 | - Python 3.8+; PEP 8 with Black formatting (88 cols default). 22 | - Lint with Flake8; type hints required on public APIs; keep `mypy` clean. 23 | - Naming: modules/functions/vars `snake_case`; classes `PascalCase`; constants `UPPER_CASE`. 24 | - Keep modules focused; prefer explicit imports; add docstrings for non-trivial functions. 25 | 26 | ## Testing Guidelines 27 | - Framework: Pytest. Place tests under `tests/` mirroring `PaperSorter/` paths. 28 | - File naming: `tests/test_*.py`; use fixtures and fakes over hitting real services. 29 | - Database: prefer isolated test DB or mocks; avoid modifying prod schemas. 30 | - Aim for coverage on new/changed code; include CLI and key branches. 31 | 32 | ## Commit & Pull Request Guidelines 33 | - Commits: imperative, concise subject (e.g., "Fix event logging in update task"). 34 | - Include rationale in body when behavior changes; reference issues (`Fixes #123`). 35 | - PRs must: describe scope and impact, include screenshots for UI, sample CLI output/logs for tasks, update docs/CHANGELOG when user-facing, note migration impacts. 36 | - CI hygiene: run `black`, `flake8`, `mypy`, and `pytest` locally before opening PRs. 37 | 38 | ## Security & Configuration Tips 39 | - Do not commit secrets. Use `.env` (copy from `.env.example`) and `config.yml` locally; keep lab-specific configs (e.g., `qbio/`) out of PRs unless intended. 40 | - Validate external API keys via environment/config; never hardcode. 41 | - For Docker, prefer `docker-compose up -d` and manage settings in `.env`. 42 | -------------------------------------------------------------------------------- /PaperSorter/web/models/scholarly_article.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Scholarly article item model for web interface.""" 25 | 26 | from datetime import datetime 27 | from ...providers import FeedItem 28 | from ...providers.scholarly_database import ScholarlyArticle 29 | 30 | 31 | class ScholarlyArticleItem(FeedItem): 32 | """Item model for scholarly articles from any database provider.""" 33 | 34 | def __init__(self, article: ScholarlyArticle): 35 | """Initialize from a ScholarlyArticle object.""" 36 | self.article = article 37 | 38 | # Extract content with tldr fallback 39 | content = article.abstract 40 | if not content and article.tldr: 41 | content = f"(tl;dr) {article.tldr}" 42 | elif article.tldr: 43 | # Prepend tldr if available 44 | content = f"(tl;dr) {article.tldr}\n\n{content}" 45 | 46 | # Use publication date or current date 47 | published_datetime = article.publication_date or datetime.now() 48 | 49 | # Initialize parent FeedItem 50 | super().__init__( 51 | external_id=article.unique_id, 52 | title=article.title, 53 | content=content or "", 54 | author=article.format_authors(), 55 | origin="", # Source will be set by caller when adding 56 | journal=article.venue or "Unknown", 57 | link=article.url or "", 58 | published=published_datetime, 59 | ) 60 | 61 | # Store raw article for access to all fields 62 | self.raw_article = article 63 | -------------------------------------------------------------------------------- /papersorter-cli: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # PaperSorter CLI wrapper for Docker 3 | # This script allows running PaperSorter commands from the host system 4 | # against the dockerized instance 5 | 6 | set -e 7 | 8 | # Colors for output 9 | RED='\033[0;31m' 10 | GREEN='\033[0;32m' 11 | YELLOW='\033[1;33m' 12 | NC='\033[0m' # No Color 13 | 14 | # Check if docker-compose is available 15 | if ! command -v docker &> /dev/null; then 16 | echo -e "${RED}Error: Docker is not installed or not in PATH${NC}" 17 | exit 1 18 | fi 19 | 20 | # Check if we're in the right directory (where docker-compose.yml exists) 21 | if [ ! -f "docker-compose.yml" ]; then 22 | echo -e "${RED}Error: docker-compose.yml not found in current directory${NC}" 23 | echo "Please run this script from the PaperSorter root directory" 24 | exit 1 25 | fi 26 | 27 | # Check if the web container is running 28 | if ! docker-compose ps --services --filter "status=running" | grep -q "^web$"; then 29 | echo -e "${YELLOW}Warning: PaperSorter web container is not running${NC}" 30 | echo "Starting services..." 31 | docker-compose up -d 32 | 33 | # Wait for services to be ready 34 | echo "Waiting for services to be ready..." 35 | sleep 5 36 | 37 | # Wait for database to be ready 38 | for i in {1..30}; do 39 | if docker-compose exec -T web pg_isready -h postgres -U papersorter &> /dev/null; then 40 | echo -e "${GREEN}Services are ready!${NC}" 41 | break 42 | fi 43 | if [ $i -eq 30 ]; then 44 | echo -e "${RED}Error: Services failed to start${NC}" 45 | exit 1 46 | fi 47 | echo -n "." 48 | sleep 2 49 | done 50 | echo 51 | fi 52 | 53 | # Special handling for certain commands 54 | case "$1" in 55 | "logs") 56 | # Show logs from web container 57 | docker-compose logs -f web 58 | ;; 59 | "shell") 60 | # Open interactive shell in web container 61 | docker-compose exec web bash 62 | ;; 63 | "db-shell") 64 | # Open PostgreSQL shell 65 | docker-compose exec postgres psql -U papersorter papersorter 66 | ;; 67 | "status") 68 | # Show status of all services 69 | docker-compose ps 70 | ;; 71 | "restart") 72 | # Restart all services 73 | docker-compose restart 74 | ;; 75 | "update-image") 76 | # Rebuild and update Docker images 77 | echo "Rebuilding PaperSorter images..." 78 | docker-compose build --no-cache 79 | docker-compose up -d 80 | ;; 81 | *) 82 | # Pass through to papersorter command in container 83 | # Use -T flag to disable TTY allocation for non-interactive commands 84 | docker-compose exec -T web papersorter "$@" 85 | ;; 86 | esac -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Tutorials 3 | =================== 4 | 5 | Step-by-step tutorials for common PaperSorter integrations and advanced use cases. These hands-on guides walk you through real-world scenarios with detailed instructions and examples. 6 | 7 | Each tutorial is designed to be self-contained and includes all necessary configuration files, code snippets, and troubleshooting tips. 8 | 9 | What You'll Find Here 10 | ===================== 11 | 12 | Integration Guides 13 | ------------------ 14 | 15 | Learn how to connect PaperSorter with popular services and tools: 16 | 17 | - **Email Newsletter Setup**: Configure Gmail or other email providers 18 | - **Slack Integration**: Set up team notifications and channels 19 | - **Custom Embedding Models**: Use local or specialized embedding APIs 20 | - **Multi-Model Workflows**: Manage different models for different research areas 21 | 22 | These tutorials assume you have completed the :doc:`../getting-started/index` guide and have a working PaperSorter installation. 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | 27 | gmail-setup 28 | slack-integration 29 | custom-embeddings 30 | multi-model 31 | 32 | Tutorial Structure 33 | ================== 34 | 35 | Each tutorial follows a consistent format: 36 | 37 | **Prerequisites** 38 | What you need before starting (accounts, API keys, etc.) 39 | 40 | **Overview** 41 | What you'll accomplish and why it's useful 42 | 43 | **Step-by-Step Instructions** 44 | Detailed walkthrough with commands and configuration 45 | 46 | **Testing and Verification** 47 | How to confirm everything is working correctly 48 | 49 | **Troubleshooting** 50 | Common issues and their solutions 51 | 52 | **Next Steps** 53 | Related tutorials and advanced configurations 54 | 55 | Difficulty Levels 56 | ================= 57 | 58 | 🟢 **Beginner**: Basic configuration and setup tasks 59 | 60 | 🟡 **Intermediate**: Requires some technical knowledge and customization 61 | 62 | 🔴 **Advanced**: Complex integrations requiring development skills 63 | 64 | Prerequisites 65 | ============= 66 | 67 | Before starting any tutorial: 68 | 69 | - Complete the :doc:`../getting-started/quickstart` guide 70 | - Have a working PaperSorter installation 71 | - Access to necessary external services (Gmail, Slack, etc.) 72 | - Basic familiarity with configuration files and command-line tools 73 | 74 | Getting Help 75 | ============ 76 | 77 | If you encounter issues: 78 | 79 | 1. Check the tutorial's troubleshooting section 80 | 2. Search existing GitHub issues 81 | 3. Ask for help in community discussions 82 | 83 | Contributing Tutorials 84 | ====================== 85 | 86 | Have a useful integration or workflow? We welcome tutorial contributions! 87 | 88 | Check the development documentation for information on: 89 | 90 | - Tutorial writing guidelines 91 | - Documentation standards 92 | - Submission process 93 | 94 | Related Documentation 95 | ===================== 96 | 97 | - :doc:`../user-guide/index` - Complete feature documentation 98 | - :doc:`../cli-reference/index` - Command reference -------------------------------------------------------------------------------- /PaperSorter/providers/openai_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Centralized OpenAI client management. 3 | 4 | This module exposes helper functions for retrieving shared OpenAI clients 5 | configured via the main PaperSorter configuration. Clients are cached per 6 | configuration section so that callers reuse authenticated sessions instead of 7 | recreating them throughout the codebase. 8 | """ 9 | 10 | from __future__ import annotations 11 | 12 | from threading import RLock 13 | from typing import Any, Dict, Mapping, Optional, Tuple 14 | 15 | from openai import OpenAI 16 | 17 | from ..config import get_config 18 | 19 | _DEFAULT_BASE_URL = "https://api.openai.com/v1" 20 | 21 | # Cache initialized clients keyed by (section, api_key, base_url) 22 | _CLIENT_CACHE: Dict[Tuple[str, str, str], OpenAI] = {} 23 | _CACHE_LOCK = RLock() 24 | 25 | 26 | def _normalize_base_url(url: Optional[str]) -> str: 27 | if not url: 28 | return _DEFAULT_BASE_URL 29 | return url.rstrip("/") or _DEFAULT_BASE_URL 30 | 31 | 32 | def get_openai_client( 33 | section: str, 34 | cfg: Optional[Mapping[str, Any]] = None, 35 | *, 36 | optional: bool = False, 37 | ) -> Optional[OpenAI]: 38 | """Return a shared OpenAI client for the given configuration section. 39 | 40 | Args: 41 | section: Name of the configuration section (e.g., ``"summarization_api"``). 42 | cfg: Optional configuration mapping overriding the global config. 43 | optional: When ``True``, return ``None`` instead of raising if the 44 | section is missing or lacks credentials. 45 | 46 | Raises: 47 | ValueError: If the configuration section or API key is missing and 48 | ``optional`` is ``False``. 49 | 50 | Returns: 51 | An initialized :class:`~openai.OpenAI` client or ``None`` when optional. 52 | """ 53 | 54 | config_source: Optional[Mapping[str, Any]] = cfg if cfg is not None else get_config().raw 55 | api_section = config_source.get(section) if config_source else None 56 | 57 | if not isinstance(api_section, Mapping): 58 | if optional: 59 | return None 60 | raise ValueError(f"Configuration section '{section}' is missing or invalid") 61 | 62 | api_config: Mapping[str, Any] = api_section 63 | 64 | api_key = api_config.get("api_key") 65 | if not isinstance(api_key, str) or not api_key.strip(): 66 | if optional: 67 | return None 68 | raise ValueError(f"Configuration section '{section}' is missing 'api_key'") 69 | 70 | base_url_value = api_config.get("api_url") 71 | base_url = _normalize_base_url(base_url_value if isinstance(base_url_value, str) else None) 72 | 73 | cache_key = (section, api_key, base_url) 74 | 75 | with _CACHE_LOCK: 76 | client = _CLIENT_CACHE.get(cache_key) 77 | if client is None: 78 | client = OpenAI(api_key=api_key, base_url=base_url) 79 | _CLIENT_CACHE[cache_key] = client 80 | 81 | return client 82 | 83 | 84 | def reset_openai_client_cache() -> None: 85 | """Clear the cached OpenAI clients (useful in tests).""" 86 | 87 | with _CACHE_LOCK: 88 | _CLIENT_CACHE.clear() 89 | -------------------------------------------------------------------------------- /PaperSorter/notification/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Base classes for notification providers.""" 25 | 26 | from abc import ABC, abstractmethod 27 | import re 28 | 29 | 30 | class NotificationError(Exception): 31 | """Base exception for notification errors.""" 32 | 33 | pass 34 | 35 | 36 | class NotificationProvider(ABC): 37 | """Abstract base class for notification providers.""" 38 | 39 | @abstractmethod 40 | def send_notifications(self, items, message_options, base_url=None): 41 | """Send notifications for a batch of items. 42 | 43 | Args: 44 | items: List of dictionaries, each containing article information with keys: 45 | - id: Article ID 46 | - title: Article title 47 | - content: Article content/abstract 48 | - author: Article authors 49 | - origin: Source of the article 50 | - link: URL to the article 51 | - score: Prediction score (0.0 to 1.0) 52 | message_options: Additional options for the message 53 | - model_name: Name of the model used for scoring 54 | - channel_name: Name of the channel 55 | base_url: Base URL for web interface links 56 | 57 | Returns: 58 | List of (item_id, success) tuples indicating which items were sent successfully 59 | 60 | Raises: 61 | NotificationError: If sending fails completely 62 | """ 63 | pass 64 | 65 | @staticmethod 66 | def normalize_text(text): 67 | """Normalize whitespace in text.""" 68 | if not text: 69 | return "" 70 | return re.sub(r"\s+", " ", text).strip() 71 | 72 | @staticmethod 73 | def limit_text_length(text, limit): 74 | """Truncate text to specified length.""" 75 | if not text: 76 | return "" 77 | if len(text) > limit: 78 | return text[: limit - 3] + "…" 79 | return text 80 | -------------------------------------------------------------------------------- /PaperSorter/utils/template_filters.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Shared Jinja2 template filters for PaperSorter.""" 25 | 26 | import html 27 | import re 28 | from markupsafe import Markup, escape 29 | 30 | 31 | def safe_html_filter(text): 32 | """ 33 | Filter to allow only safe HTML tags in text content. 34 | Allows: i, b, em, strong, sup, sub tags while escaping everything else. 35 | """ 36 | if not text: 37 | return text 38 | 39 | # First escape all HTML 40 | escaped_text = escape(text) 41 | 42 | # Define allowed tags and their replacements back to HTML 43 | allowed_tags = { 44 | r'<i>(.*?)</i>': r'\1', 45 | r'<b>(.*?)</b>': r'\1', 46 | r'<em>(.*?)</em>': r'\1', 47 | r'<strong>(.*?)</strong>': r'\1', 48 | r'<sup>(.*?)</sup>': r'\1', 49 | r'<sub>(.*?)</sub>': r'\1', 50 | } 51 | 52 | # Convert back allowed tags from escaped to HTML 53 | result = str(escaped_text) 54 | for pattern, replacement in allowed_tags.items(): 55 | result = re.sub(pattern, replacement, result, flags=re.IGNORECASE | re.DOTALL) 56 | 57 | return Markup(result) 58 | 59 | 60 | def strip_html_filter(text): 61 | """ 62 | Strip all HTML tags from text for use in page titles and meta tags. 63 | """ 64 | if not text: 65 | return text 66 | 67 | # Remove all HTML tags using regex 68 | clean_text = re.sub(r'<[^>]+>', '', str(text)) 69 | 70 | # Also decode HTML entities 71 | clean_text = html.unescape(clean_text) 72 | 73 | return clean_text 74 | 75 | 76 | def register_filters(jinja_env): 77 | """ 78 | Register all custom template filters with a Jinja2 environment. 79 | 80 | Args: 81 | jinja_env: Jinja2 Environment instance 82 | """ 83 | jinja_env.filters['safe_html'] = safe_html_filter 84 | jinja_env.filters['strip_html'] = strip_html_filter -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "papersorter" 7 | version = "0.9.0" 8 | description = "Intelligent academic paper recommendation system with ML-powered filtering and Slack notifications" 9 | readme = "README.md" 10 | requires-python = ">=3.8" 11 | license = {text = "MIT"} 12 | authors = [ 13 | {name = "Hyeshik Chang", email = "hyeshik@snu.ac.kr"}, 14 | ] 15 | maintainers = [ 16 | {name = "Hyeshik Chang", email = "hyeshik@snu.ac.kr"}, 17 | ] 18 | keywords = [ 19 | "academic papers", 20 | "machine learning", 21 | "RSS feed", 22 | "research tools", 23 | "paper recommendation", 24 | "slack integration", 25 | "scientific literature" 26 | ] 27 | classifiers = [ 28 | "Development Status :: 4 - Beta", 29 | "Environment :: Console", 30 | "Environment :: Web Environment", 31 | "Intended Audience :: Education", 32 | "Intended Audience :: Science/Research", 33 | "License :: OSI Approved :: MIT License", 34 | "Operating System :: OS Independent", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.8", 37 | "Programming Language :: Python :: 3.9", 38 | "Programming Language :: Python :: 3.10", 39 | "Programming Language :: Python :: 3.11", 40 | "Programming Language :: Python :: 3.12", 41 | "Topic :: Scientific/Engineering", 42 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 43 | "Topic :: Scientific/Engineering :: Information Analysis", 44 | ] 45 | 46 | dependencies = [ 47 | "feedparser>=6.0", 48 | "numpy>=1.20", 49 | "openai>=1.30", 50 | "pandas>=2.0", 51 | "psycopg2-binary>=2.9", 52 | "pgvector>=0.2.0", 53 | "PyYAML>=6.0", 54 | "requests>=2.7.0", 55 | "scikit-learn>=1.4", 56 | "scipy>=1.10", 57 | "xgboost>2.0", 58 | "Flask>=2.0", 59 | "Flask-Login>=0.6.0", 60 | "Authlib>=1.2.0", 61 | "markdown2>=2.4.0", 62 | "tabulate>=0.9.0", 63 | ] 64 | 65 | [project.urls] 66 | Homepage = "https://github.com/ChangLabSNU/papersorter" 67 | Repository = "https://github.com/ChangLabSNU/papersorter" 68 | Documentation = "https://github.com/ChangLabSNU/papersorter#readme" 69 | "Bug Tracker" = "https://github.com/ChangLabSNU/papersorter/issues" 70 | 71 | [project.scripts] 72 | papersorter = "PaperSorter.__main__:main" 73 | 74 | [project.optional-dependencies] 75 | dev = [ 76 | "pytest>=7.0", 77 | "black>=22.0", 78 | "flake8>=5.0", 79 | "mypy>=1.0", 80 | ] 81 | server = [ 82 | "uwsgi>=2.0", 83 | ] 84 | 85 | [tool.setuptools] 86 | include-package-data = true 87 | 88 | [tool.setuptools.packages.find] 89 | include = ["PaperSorter*"] 90 | exclude = ["tests*", "notebook*", "tools*", "old*"] 91 | 92 | [tool.setuptools.package-data] 93 | PaperSorter = [ 94 | "templates/*.html", 95 | "templates/email/*.html", 96 | "templates/email/*.txt", 97 | "static/favicon.ico", 98 | "static/manifest.json", 99 | "static/css/*.css", 100 | "static/css/components/.gitkeep", 101 | "static/js/*.js", 102 | "static/icons/*.png", 103 | "data/*.py", 104 | ] 105 | -------------------------------------------------------------------------------- /docs/api/index.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | API Documentation 3 | ================= 4 | 5 | This section provides comprehensive documentation for PaperSorter's internal APIs, modules, and extension points. 6 | 7 | Whether you're developing custom integrations, contributing to the project, or building extensions, this reference will help you understand PaperSorter's architecture and interfaces. 8 | 9 | Architecture Overview 10 | ===================== 11 | 12 | PaperSorter is organized into several key components: 13 | 14 | - **Core Modules**: Database interfaces, embedding generation, and ML models 15 | - **Feed Providers**: Pluggable RSS/Atom feed processors 16 | - **Web Framework**: Flask-based REST API and user interface 17 | - **Notification System**: Multi-channel broadcast capabilities 18 | - **CLI Tasks**: Command-line interface implementations 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | modules 24 | database 25 | providers 26 | notifications 27 | web 28 | 29 | Key Interfaces 30 | ============== 31 | 32 | Database Layer 33 | -------------- 34 | 35 | The database layer provides unified access to PostgreSQL with pgvector support: 36 | 37 | - **FeedDatabase**: Article metadata and user preferences 38 | - **EmbeddingDatabase**: Vector storage and similarity search 39 | - **Schema Management**: Migrations and table definitions 40 | 41 | Provider System 42 | --------------- 43 | 44 | Feed providers implement a common interface for content ingestion: 45 | 46 | - **BaseProvider**: Abstract interface for all feed sources 47 | - **RSSProvider**: RSS/Atom feed implementation 48 | - **Custom Providers**: Extension points for new content sources 49 | 50 | Web API 51 | ------- 52 | 53 | RESTful endpoints organized by functional domain: 54 | 55 | - **Feeds API**: Article management and labeling 56 | - **Search API**: Text and semantic search capabilities 57 | - **Settings API**: Administrative configuration 58 | - **User API**: Preferences and personalization 59 | 60 | Extension Points 61 | ================ 62 | 63 | Custom Feed Providers 64 | ---------------------- 65 | 66 | Implement ``BaseProvider`` to add new content sources: 67 | 68 | .. code-block:: python 69 | 70 | from PaperSorter.providers.base import BaseProvider 71 | 72 | class CustomProvider(BaseProvider): 73 | def fetch_articles(self): 74 | # Implementation here 75 | pass 76 | 77 | Custom Notification Channels 78 | ----------------------------- 79 | 80 | Extend the notification system for new delivery methods: 81 | 82 | .. code-block:: python 83 | 84 | from PaperSorter.notifications import BaseNotifier 85 | 86 | class CustomNotifier(BaseNotifier): 87 | def send(self, articles): 88 | # Implementation here 89 | pass 90 | 91 | API Conventions 92 | =============== 93 | 94 | - All APIs use consistent error handling and response formats 95 | - Database operations support transaction management 96 | - Configuration is injected via dependency injection patterns 97 | - Logging follows structured format for operational monitoring 98 | 99 | Related Resources 100 | ================= 101 | 102 | - :doc:`../development/index` - Development and contributing guidelines -------------------------------------------------------------------------------- /.github/workflows/claude-code-review.yml: -------------------------------------------------------------------------------- 1 | name: Claude Code Review 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | # Optional: Only run on specific file changes 7 | # paths: 8 | # - "src/**/*.ts" 9 | # - "src/**/*.tsx" 10 | # - "src/**/*.js" 11 | # - "src/**/*.jsx" 12 | 13 | jobs: 14 | claude-review: 15 | # Optional: Filter by PR author 16 | # if: | 17 | # github.event.pull_request.user.login == 'external-contributor' || 18 | # github.event.pull_request.user.login == 'new-developer' || 19 | # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' 20 | 21 | runs-on: ubuntu-latest 22 | permissions: 23 | contents: read 24 | pull-requests: read 25 | issues: read 26 | id-token: write 27 | 28 | steps: 29 | - name: Checkout repository 30 | uses: actions/checkout@v4 31 | with: 32 | fetch-depth: 1 33 | 34 | - name: Run Claude Code Review 35 | id: claude-review 36 | uses: anthropics/claude-code-action@beta 37 | with: 38 | claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} 39 | 40 | # Optional: Specify model (defaults to Claude Sonnet 4, uncomment for Claude Opus 4.1) 41 | # model: "claude-opus-4-1-20250805" 42 | 43 | # Direct prompt for automated review (no @claude mention needed) 44 | direct_prompt: | 45 | Please review this pull request and provide feedback on: 46 | - Code quality and best practices 47 | - Potential bugs or issues 48 | - Performance considerations 49 | - Security concerns 50 | - Test coverage 51 | 52 | Be constructive and helpful in your feedback. 53 | 54 | # Optional: Use sticky comments to make Claude reuse the same comment on subsequent pushes to the same PR 55 | # use_sticky_comment: true 56 | 57 | # Optional: Customize review based on file types 58 | # direct_prompt: | 59 | # Review this PR focusing on: 60 | # - For TypeScript files: Type safety and proper interface usage 61 | # - For API endpoints: Security, input validation, and error handling 62 | # - For React components: Performance, accessibility, and best practices 63 | # - For tests: Coverage, edge cases, and test quality 64 | 65 | # Optional: Different prompts for different authors 66 | # direct_prompt: | 67 | # ${{ github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' && 68 | # 'Welcome! Please review this PR from a first-time contributor. Be encouraging and provide detailed explanations for any suggestions.' || 69 | # 'Please provide a thorough code review focusing on our coding standards and best practices.' }} 70 | 71 | # Optional: Add specific tools for running tests or linting 72 | # allowed_tools: "Bash(npm run test),Bash(npm run lint),Bash(npm run typecheck)" 73 | 74 | # Optional: Skip review for certain conditions 75 | # if: | 76 | # !contains(github.event.pull_request.title, '[skip-review]') && 77 | # !contains(github.event.pull_request.title, '[WIP]') 78 | 79 | -------------------------------------------------------------------------------- /PaperSorter/web/models/semantic_scholar.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Semantic Scholar item model.""" 25 | 26 | import uuid 27 | from datetime import datetime 28 | from ...providers import FeedItem 29 | 30 | 31 | class SemanticScholarItem(FeedItem): 32 | """Item model for Semantic Scholar papers.""" 33 | 34 | def __init__(self, paper_info): 35 | self.paper_info = paper_info 36 | article_id = uuid.uuid3(uuid.NAMESPACE_URL, paper_info["url"]) 37 | 38 | # Extract content with tldr fallback 39 | tldr = ( 40 | ("(tl;dr) " + paper_info["tldr"]["text"]) 41 | if paper_info["tldr"] and paper_info["tldr"]["text"] 42 | else "" 43 | ) 44 | content = paper_info["abstract"] or tldr 45 | 46 | # Parse publication date 47 | published_datetime = None 48 | pdate = paper_info["publicationDate"] 49 | if pdate is not None: 50 | published_datetime = datetime( 51 | *list(map(int, paper_info["publicationDate"].split("-"))) 52 | ) 53 | else: 54 | published_datetime = datetime.now() 55 | 56 | # Initialize parent FeedItem 57 | super().__init__( 58 | external_id=str(article_id), 59 | title=paper_info["title"], 60 | content=content, 61 | author=", ".join([a["name"] for a in paper_info["authors"]]), 62 | origin="Semantic Scholar", 63 | journal=self.determine_journal(paper_info), 64 | link=paper_info["url"], 65 | published=published_datetime, 66 | ) 67 | 68 | # Store additional attributes for compatibility 69 | self.href = self.link 70 | self.mediaUrl = self.link 71 | self.item_id = self.external_id # Alias for database compatibility 72 | 73 | def determine_journal(self, paper_info): 74 | if paper_info["journal"]: 75 | return paper_info["journal"]["name"] 76 | elif paper_info["venue"]: 77 | return paper_info["venue"] 78 | elif "ArXiv" in paper_info["externalIds"]: 79 | return "arXiv" 80 | else: 81 | return "Unknown" 82 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to PaperSorter will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Added 11 | - Comprehensive Sphinx documentation with Read the Docs theme 12 | - Auto-generated API documentation from docstrings 13 | - CLI command reference with examples 14 | - Getting Started guides for new users 15 | - GitHub Actions workflow for documentation deployment 16 | 17 | ## [1.0.0] - 2025-01-16 18 | 19 | ### Added 20 | - Initial release of PaperSorter 21 | - RSS/Atom feed support for paper ingestion 22 | - Machine learning-based paper recommendation using XGBoost 23 | - PostgreSQL database with pgvector for embeddings 24 | - Web interface for paper labeling and management 25 | - Slack, Discord, and email notification support 26 | - OAuth authentication (Google, GitHub, ORCID) 27 | - Multi-model support for different research domains 28 | - Semantic search using embedding similarity 29 | - AI-powered summarization and poster generation 30 | - Admin interface for system configuration 31 | - Comprehensive CLI with task automation 32 | - Docker and Kubernetes deployment support 33 | 34 | ### Changed 35 | - Migrated from SQLite to PostgreSQL for better scalability 36 | - Improved embedding generation with configurable models 37 | - Enhanced web UI with responsive design 38 | - Optimized database queries for large datasets 39 | 40 | ### Fixed 41 | - Unicode handling in paper titles and abstracts 42 | - Memory leaks in long-running update processes 43 | - Race conditions in parallel feed processing 44 | - Authentication session management issues 45 | 46 | ## [0.9.0] - 2024-12-01 (Beta) 47 | 48 | ### Added 49 | - Beta release for testing 50 | - Core functionality implementation 51 | - Basic web interface 52 | - Initial model training capabilities 53 | 54 | ### Known Issues 55 | - Limited to single-user deployment 56 | - No backup/restore functionality 57 | - Manual configuration required 58 | 59 | ## [0.5.0] - 2024-10-15 (Alpha) 60 | 61 | ### Added 62 | - Alpha release for internal testing 63 | - Proof of concept implementation 64 | - Basic RSS feed parsing 65 | - Simple XGBoost model training 66 | 67 | --- 68 | 69 | ## Version History Summary 70 | 71 | - **1.0.0** - Production-ready release with full feature set 72 | - **0.9.0** - Beta release with core functionality 73 | - **0.5.0** - Alpha release for testing 74 | 75 | ## Upgrade Notes 76 | 77 | ### Upgrading from 0.9.x to 1.0.0 78 | 79 | 1. **Database Migration Required** 80 | ```bash 81 | papersorter migrate --from 0.9 82 | ``` 83 | 84 | 2. **Configuration Changes** 85 | - `google_oauth` renamed to `oauth.google` 86 | - New `web.base_url` setting required 87 | - `embedding_api.dimensions` now optional 88 | 89 | 3. **Breaking Changes** 90 | - CLI command structure reorganized 91 | - API endpoints moved to `/api/v1/` prefix 92 | - Model file format updated (retrain required) 93 | 94 | ### Upgrading from 0.5.x to 1.0.0 95 | 96 | Complete reinstallation recommended due to extensive changes. 97 | 98 | ## Support 99 | 100 | For upgrade assistance: 101 | - Issues: https://github.com/ChangLabSNU/papersorter/issues -------------------------------------------------------------------------------- /PaperSorter/notification/factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Factory for creating notification providers based on webhook URL.""" 25 | 26 | from urllib.parse import urlparse 27 | from ..log import log 28 | from .slack import SlackProvider 29 | from .discord import DiscordProvider 30 | from .email import EmailProvider 31 | 32 | 33 | def create_notification_provider(webhook_url): 34 | """Create appropriate notification provider based on webhook URL. 35 | 36 | Automatically detects the webhook type based on the URL scheme or hostname: 37 | - URLs starting with 'mailto:' -> EmailProvider 38 | - Hostnames ending with 'slack.com' -> SlackProvider 39 | - Hostnames ending with 'discord.com' or 'discordapp.com' -> DiscordProvider 40 | 41 | Args: 42 | webhook_url: The webhook URL to analyze 43 | config_path: Path to configuration file (for email provider) 44 | 45 | Returns: 46 | NotificationProvider: Appropriate provider instance 47 | 48 | Raises: 49 | ValueError: If webhook URL is invalid or empty 50 | """ 51 | if not webhook_url: 52 | raise ValueError("Webhook URL cannot be empty") 53 | 54 | # Check for mailto: URLs first 55 | if webhook_url.startswith("mailto:"): 56 | log.debug(f"Detected email notification: {webhook_url}") 57 | return EmailProvider(webhook_url) 58 | 59 | # Parse the URL to get hostname 60 | try: 61 | parsed = urlparse(webhook_url) 62 | hostname = parsed.hostname or "" 63 | except Exception as e: 64 | raise ValueError(f"Invalid webhook URL: {e}") 65 | 66 | if not hostname: 67 | raise ValueError(f"Could not extract hostname from URL: {webhook_url}") 68 | 69 | # Determine provider based on hostname 70 | hostname_lower = hostname.lower() 71 | 72 | if hostname_lower.endswith("slack.com"): 73 | log.debug(f"Detected Slack webhook: {hostname}") 74 | return SlackProvider(webhook_url) 75 | elif hostname_lower.endswith("discord.com") or hostname_lower.endswith( 76 | "discordapp.com" 77 | ): 78 | log.debug(f"Detected Discord webhook: {hostname}") 79 | return DiscordProvider(webhook_url) 80 | else: 81 | # Default to Slack for backward compatibility 82 | log.warning( 83 | f"Unknown webhook hostname '{hostname}', defaulting to Slack provider" 84 | ) 85 | return SlackProvider(webhook_url) 86 | -------------------------------------------------------------------------------- /PaperSorter/static/css/pages/paper_detail_similar.css: -------------------------------------------------------------------------------- 1 | /* Similar section styles embedded in paper details */ 2 | 3 | .summary-section { 4 | background: var(--bg-card); 5 | border-radius: 8px; 6 | padding: 20px; 7 | margin-bottom: 20px; 8 | box-shadow: var(--shadow-subtle); 9 | } 10 | 11 | .similar-section-header { 12 | display: flex; 13 | align-items: center; 14 | justify-content: space-between; 15 | gap: 16px; 16 | flex-wrap: wrap; 17 | margin-bottom: 12px; 18 | } 19 | 20 | .similar-section-actions { 21 | display: flex; 22 | align-items: center; 23 | gap: 8px; 24 | margin-left: auto; 25 | } 26 | 27 | .similar-section-actions .btn { 28 | white-space: nowrap; 29 | } 30 | 31 | .similar-section-actions .btn-generate.btn-sm { 32 | padding: 6px 16px; 33 | font-size: 14px; 34 | border-radius: var(--radius-base); 35 | } 36 | 37 | .summary-initial { 38 | display: flex; 39 | justify-content: center; 40 | align-items: center; 41 | min-height: 80px; 42 | text-align: center; 43 | } 44 | 45 | .summary-placeholder { 46 | margin: 0; 47 | color: var(--text-secondary); 48 | font-size: 14px; 49 | } 50 | 51 | .summary-header { 52 | display: flex; 53 | justify-content: space-between; 54 | align-items: center; 55 | margin-bottom: 0; /* align content to top */ 56 | } 57 | 58 | .summary-header h3 { 59 | margin: 0; 60 | color: var(--similar-summary-header-color); 61 | font-size: 20px; 62 | } 63 | 64 | .summary-text { font-size: 15px; line-height: 1.8; color: var(--similar-summary-text-color); } 65 | .summary-text strong { color: var(--similar-summary-strong-color); } 66 | /* Align list indentation with similar_articles.html */ 67 | .summary-text ul, 68 | .summary-text ol { 69 | margin: 10px 0; 70 | padding-left: 25px; 71 | } 72 | 73 | .summary-text ul ul, 74 | .summary-text ol ol, 75 | .summary-text ul ol, 76 | .summary-text ol ul { 77 | margin-top: 5px; 78 | padding-left: 25px; /* incremental per level */ 79 | } 80 | 81 | .summary-disclaimer { 82 | margin-top: 16px; 83 | padding: 10px 14px; 84 | background-color: var(--similar-disclaimer-bg); 85 | border-left: 3px solid var(--similar-disclaimer-border); 86 | border-radius: 4px; 87 | } 88 | 89 | .poster-content { width: 100%; margin-top: 16px; } 90 | .poster-iframe { 91 | width: 100%; 92 | height: 800px; 93 | border: 1px solid var(--similar-disclaimer-border); 94 | border-radius: 8px; 95 | box-shadow: var(--shadow-subtle); 96 | } 97 | 98 | .poster-actions { display: flex; gap: 10px; margin-bottom: 12px; justify-content: flex-end; } 99 | 100 | .btn-print { display: inline-flex; align-items: center; gap: 8px; } 101 | 102 | #similarFeedsList, 103 | .similarFeedsList { 104 | padding-left: 0; 105 | padding-right: 0; 106 | margin-left: 0; 107 | margin-right: 0; 108 | } 109 | 110 | .similarity-header { 111 | background: var(--bg-table-header); 112 | padding: 15px 20px; 113 | border-bottom: 1px solid var(--border-light); 114 | font-weight: bold; 115 | color: var(--text-primary); 116 | font-size: 16px; 117 | } 118 | 119 | #similar-section hr { 120 | border: none; 121 | border-top: 1px solid var(--border-medium); 122 | margin: 20px 0 16px; 123 | } 124 | 125 | #similar-section .summary-loading .spinner, 126 | #similar-section .poster-loading .spinner { 127 | width: 40px; 128 | height: 40px; 129 | margin: 0 auto 20px; 130 | border: 4px solid var(--similar-loading-spinner-border); 131 | border-top: 4px solid var(--similar-btn-generate-bg); 132 | border-radius: 50%; 133 | animation: spin 1s linear infinite; 134 | } 135 | -------------------------------------------------------------------------------- /PaperSorter/cli/context.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Command context management for PaperSorter CLI.""" 25 | 26 | from typing import Optional 27 | 28 | from ..config import get_config 29 | from ..db import DatabaseManager 30 | 31 | 32 | class CommandContext: 33 | """Context object passed to all commands.""" 34 | 35 | def __init__(self, log_file: Optional[str] = None, quiet: bool = False): 36 | """ 37 | Initialize command context. 38 | 39 | Args: 40 | log_file: Optional log file path 41 | quiet: Whether to suppress output 42 | """ 43 | self.log_file = log_file 44 | self.quiet = quiet 45 | self._config = None 46 | self._db_manager = None 47 | self._db = None 48 | self._embedding_db = None 49 | 50 | @property 51 | def config(self) -> dict: 52 | """Load and cache configuration.""" 53 | if self._config is None: 54 | self._config = get_config().raw 55 | return self._config 56 | 57 | @property 58 | def db_manager(self) -> DatabaseManager: 59 | """Return a pooled database manager.""" 60 | if self._db_manager is None: 61 | db_config = self.config["db"] 62 | self._db_manager = DatabaseManager.from_config( 63 | db_config, 64 | application_name="papersorter-cli", 65 | ) 66 | return self._db_manager 67 | 68 | @property 69 | def db(self): 70 | """Get database connection (lazy loading).""" 71 | if self._db is None: 72 | from ..feed_database import FeedDatabase 73 | self._db = FeedDatabase(db_manager=self.db_manager) 74 | return self._db 75 | 76 | @property 77 | def embedding_db(self): 78 | """Get embedding database connection (lazy loading).""" 79 | if self._embedding_db is None: 80 | from ..embedding_database import EmbeddingDatabase 81 | self._embedding_db = EmbeddingDatabase(db_manager=self.db_manager) 82 | return self._embedding_db 83 | 84 | def cleanup(self): 85 | """Clean up resources.""" 86 | if self._db is not None: 87 | self._db.close() 88 | self._db = None 89 | if self._embedding_db is not None: 90 | self._embedding_db.close() 91 | self._embedding_db = None 92 | if self._db_manager is not None: 93 | self._db_manager.close() 94 | self._db_manager = None 95 | -------------------------------------------------------------------------------- /PaperSorter/cli/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Main parser creation for PaperSorter CLI.""" 25 | 26 | import argparse 27 | from .base import registry 28 | from .context import CommandContext 29 | from ..config import get_config 30 | from ..__version__ import __version__ 31 | 32 | 33 | def create_parser() -> argparse.ArgumentParser: 34 | """Create the main argument parser.""" 35 | parser = argparse.ArgumentParser( 36 | prog='papersorter', 37 | description='Intelligent academic paper recommendation system', 38 | formatter_class=argparse.RawDescriptionHelpFormatter, 39 | add_help=False # We'll add custom help 40 | ) 41 | 42 | # Add help manually to support both -h and --help 43 | parser.add_argument( 44 | '-h', '--help', 45 | action='help', 46 | help='Show this help message and exit' 47 | ) 48 | 49 | # Add version option 50 | parser.add_argument( 51 | '--version', 52 | action='version', 53 | version=f'PaperSorter, version {__version__}' 54 | ) 55 | 56 | return parser 57 | 58 | 59 | def execute_command(args: argparse.Namespace) -> int: 60 | """Execute the parsed command.""" 61 | if not hasattr(args, 'command_handler'): 62 | return 1 63 | 64 | try: 65 | if getattr(args, 'config', None): 66 | get_config(args.config) 67 | else: 68 | get_config() 69 | except Exception: 70 | # Defer errors to individual commands where appropriate 71 | pass 72 | 73 | # Create context (no config argument) 74 | context = CommandContext( 75 | log_file=getattr(args, 'log_file', None), 76 | quiet=getattr(args, 'quiet', False) 77 | ) 78 | 79 | try: 80 | # Execute the command 81 | return args.command_handler.handle(args, context) 82 | finally: 83 | # Clean up resources 84 | context.cleanup() 85 | 86 | 87 | def main(argv=None): 88 | """Main entry point for the CLI.""" 89 | parser = create_parser() 90 | 91 | # Create subparsers for all registered commands 92 | registry.create_subparsers(parser) 93 | 94 | # Parse arguments 95 | args = parser.parse_args(argv) 96 | 97 | # If no command specified, show help 98 | if not hasattr(args, 'command') or args.command is None: 99 | parser.print_help() 100 | return 0 101 | 102 | # Execute the command 103 | return execute_command(args) 104 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # PaperSorter Documentation 2 | 3 | This directory contains the Sphinx-based documentation for PaperSorter. 4 | 5 | ## Quick Start 6 | 7 | ### Prerequisites 8 | 9 | Install the documentation dependencies: 10 | 11 | ```bash 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | ### Building Documentation 16 | 17 | #### Build HTML Documentation 18 | 19 | ```bash 20 | make html 21 | ``` 22 | 23 | The built documentation will be in `_build/html/`. Open `_build/html/index.html` in your browser to view it. 24 | 25 | #### Live Development Server 26 | 27 | For development with automatic rebuilds: 28 | 29 | ```bash 30 | make livehtml 31 | ``` 32 | 33 | This will start a server at http://localhost:8000 that automatically rebuilds when you make changes. 34 | 35 | ### Alternative Build Methods 36 | 37 | Using the build script: 38 | 39 | ```bash 40 | ./build.sh html # Build HTML 41 | ./build.sh serve # Build and serve locally 42 | ./build.sh live # Live reload for development 43 | ./build.sh all # Build everything (HTML, PDF, check links) 44 | ``` 45 | 46 | Using Sphinx directly: 47 | 48 | ```bash 49 | sphinx-build -b html . _build/html 50 | ``` 51 | 52 | ## Documentation Structure 53 | 54 | ``` 55 | docs/ 56 | ├── getting-started/ # Quick start guides for new users 57 | ├── user-guide/ # Detailed user documentation 58 | ├── admin-guide/ # System administration guides 59 | ├── cli-reference/ # Command-line interface documentation 60 | ├── api/ # API documentation (auto-generated) 61 | ├── development/ # Developer guides 62 | ├── tutorials/ # Step-by-step tutorials 63 | ├── reference/ # Reference materials 64 | ├── conf.py # Sphinx configuration 65 | ├── index.rst # Main documentation entry point 66 | ├── requirements.txt # Documentation dependencies 67 | └── build.sh # Build automation script 68 | ``` 69 | 70 | ## Contributing to Documentation 71 | 72 | 1. **Edit Markdown/RST Files**: Most documentation is in Markdown format for easy editing 73 | 2. **API Documentation**: Update docstrings in Python code; they're auto-included 74 | 3. **Build Locally**: Always build and preview your changes before submitting 75 | 4. **Check Links**: Run `make linkcheck` to verify all links work 76 | 77 | ## Deployment 78 | 79 | ### GitHub Pages 80 | 81 | The documentation is automatically deployed to GitHub Pages when changes are pushed to the main branch: 82 | 83 | 1. GitHub Actions builds the documentation 84 | 2. Deploys to the `gh-pages` branch 85 | 3. Available at: https://qbio.io/PaperSorter/ 86 | 87 | ### Manual Deployment 88 | 89 | ```bash 90 | ./build.sh deploy 91 | ``` 92 | 93 | ## Troubleshooting 94 | 95 | ### Common Issues 96 | 97 | **Import Errors in API Documentation** 98 | - Ensure PaperSorter is installed: `pip install -e ..` 99 | - Check that all dependencies are installed 100 | 101 | **Broken Links** 102 | - Run `make linkcheck` to identify broken links 103 | - Fix references in the source files 104 | 105 | **Build Warnings** 106 | - Missing toctree references: Create the missing files or remove references 107 | - Duplicate descriptions: Add `:no-index:` directive to one instance 108 | 109 | ## Documentation Standards 110 | 111 | - Use **Markdown** for general documentation 112 | - Use **reStructuredText** for complex formatting and directives 113 | - Follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) for docstrings 114 | - Include code examples wherever possible 115 | - Keep line length under 100 characters for better readability 116 | 117 | ## License 118 | 119 | The documentation is licensed under the same terms as PaperSorter (MIT License). 120 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Reference 3 | ================= 4 | 5 | Comprehensive technical reference documentation for PaperSorter. This section provides detailed specifications, schemas, and reference materials for advanced users and developers. 6 | 7 | Use this section when you need precise technical details about configuration options, database structures, environment variables, or terminology. 8 | 9 | Contents 10 | ======== 11 | 12 | Technical Specifications 13 | ------------------------ 14 | 15 | - **Configuration Reference**: Complete list of all configuration options with types, defaults, and descriptions 16 | - **Database Schema**: Full PostgreSQL schema including tables, indexes, and relationships 17 | - **Environment Variables**: All supported environment variables and their effects 18 | - **Glossary**: Definitions of terms and concepts used throughout PaperSorter 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | configuration-reference 24 | database-schema 25 | environment-variables 26 | glossary 27 | 28 | Quick Reference 29 | =============== 30 | 31 | Configuration Files 32 | ------------------- 33 | 34 | Primary configuration is stored in ``config.yml`` with these main sections: 35 | 36 | - ``db``: Database connection settings 37 | - ``web``: Web interface configuration 38 | - ``oauth``: Authentication provider settings 39 | - ``embedding_api``: Embedding generation API 40 | - ``summarization_api``: Text summarization API 41 | - ``scholarly_database``: Academic database integration 42 | 43 | Database Tables 44 | --------------- 45 | 46 | Core tables in the PostgreSQL schema: 47 | 48 | - ``feeds``: Article metadata and content 49 | - ``embeddings``: Vector embeddings using pgvector 50 | - ``preferences``: User ratings and labels 51 | - ``predicted_preferences``: ML model predictions 52 | - ``broadcasts``: Notification queue and history 53 | - ``users``: User accounts and settings 54 | - ``channels``: Notification channel configuration 55 | 56 | API Endpoints 57 | ------------- 58 | 59 | Web API organization: 60 | 61 | - ``/api/feeds/``: Article management operations 62 | - ``/api/search/``: Search and discovery features 63 | - ``/api/settings/``: Administrative configuration 64 | - ``/api/user/``: User preferences and data 65 | 66 | CLI Commands 67 | ------------ 68 | 69 | Main command categories: 70 | 71 | - ``papersorter update``: Content ingestion and processing 72 | - ``papersorter train``: Model training and evaluation 73 | - ``papersorter broadcast``: Notification delivery 74 | - ``papersorter serve``: Web interface server 75 | 76 | Version Compatibility 77 | ===================== 78 | 79 | This reference documentation applies to: 80 | 81 | - **PaperSorter**: Version 1.0+ 82 | - **Python**: 3.9+ 83 | - **PostgreSQL**: 12+ with pgvector extension 84 | - **Dependencies**: See ``setup.py`` for specific version requirements 85 | 86 | Standards and Conventions 87 | ========================= 88 | 89 | Configuration Format 90 | -------------------- 91 | 92 | - **YAML**: Human-readable configuration files 93 | - **Environment Variables**: Override any configuration value 94 | - **Validation**: Schema validation with helpful error messages 95 | 96 | Database Design 97 | --------------- 98 | 99 | - **PostgreSQL**: ACID compliance and advanced features 100 | - **pgvector**: Efficient vector similarity search 101 | - **Migrations**: Version-controlled schema changes 102 | 103 | API Design 104 | ---------- 105 | 106 | - **REST**: Standard HTTP methods and status codes 107 | - **JSON**: Consistent request/response format 108 | - **Authentication**: OAuth 2.0 with multiple providers 109 | 110 | Related Sections 111 | ================ 112 | 113 | - :doc:`../api/index` - API implementation details -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | import os 7 | import sys 8 | sys.path.insert(0, os.path.abspath('..')) 9 | 10 | # -- Project information ----------------------------------------------------- 11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 12 | 13 | project = 'PaperSorter' 14 | copyright = '2024-2025, Seoul National University' 15 | author = 'PaperSorter Team' 16 | release = '1.0.0' 17 | version = '1.0' 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | 'sphinx.ext.autodoc', 24 | 'sphinx.ext.napoleon', 25 | 'sphinx.ext.viewcode', 26 | 'sphinx.ext.intersphinx', 27 | 'sphinx.ext.todo', 28 | 'sphinx_rtd_theme', 29 | 'myst_parser', 30 | 'sphinx_click', 31 | 'sphinx_copybutton', 32 | 'sphinx_tabs.tabs', 33 | ] 34 | 35 | templates_path = ['_templates'] 36 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 37 | 38 | language = 'en' 39 | 40 | # Support for both RST and Markdown 41 | source_suffix = { 42 | '.rst': 'restructuredtext', 43 | '.md': 'markdown', 44 | } 45 | 46 | # -- Options for HTML output ------------------------------------------------- 47 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 48 | 49 | html_theme = 'sphinx_rtd_theme' 50 | html_static_path = ['_static'] 51 | html_logo = None # We'll add this later if needed 52 | html_favicon = None # We'll add this later if needed 53 | 54 | # Read the Docs theme options 55 | html_theme_options = { 56 | 'logo_only': False, 57 | 'display_version': True, 58 | 'prev_next_buttons_location': 'bottom', 59 | 'style_external_links': False, 60 | 'collapse_navigation': False, 61 | 'sticky_navigation': True, 62 | 'navigation_depth': 4, 63 | 'includehidden': True, 64 | 'titles_only': False, 65 | } 66 | 67 | # -- Options for intersphinx extension --------------------------------------- 68 | # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#configuration 69 | 70 | intersphinx_mapping = { 71 | 'python': ('https://docs.python.org/3', None), 72 | 'numpy': ('https://numpy.org/doc/stable/', None), 73 | 'pandas': ('https://pandas.pydata.org/docs/', None), 74 | 'sklearn': ('https://scikit-learn.org/stable/', None), 75 | } 76 | 77 | # -- Options for todo extension ---------------------------------------------- 78 | # https://www.sphinx-doc.org/en/master/usage/extensions/todo.html#configuration 79 | 80 | todo_include_todos = True 81 | 82 | # -- Options for autodoc ------------------------------------------------------ 83 | autodoc_default_options = { 84 | 'members': True, 85 | 'member-order': 'bysource', 86 | 'special-members': '__init__', 87 | 'undoc-members': True, 88 | 'exclude-members': '__weakref__' 89 | } 90 | 91 | # -- Options for MyST Markdown parser ---------------------------------------- 92 | myst_enable_extensions = [ 93 | "amsmath", 94 | "colon_fence", 95 | "deflist", 96 | "dollarmath", 97 | "fieldlist", 98 | "html_admonition", 99 | "html_image", 100 | "linkify", 101 | "replacements", 102 | "smartquotes", 103 | "strikethrough", 104 | "substitution", 105 | "tasklist", 106 | ] 107 | 108 | myst_heading_anchors = 3 109 | 110 | # -- Options for copy button ------------------------------------------------- 111 | copybutton_prompt_text = r">>> |\.\.\. |\$ |In \[\d*\]: | {2,5}\.\.\.: | {5,8}: " 112 | copybutton_prompt_is_regexp = True 113 | -------------------------------------------------------------------------------- /docs/development/database.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Database Integration 3 | ==================== 4 | 5 | PaperSorter now ships with a centralized PostgreSQL access layer located at 6 | ``PaperSorter/db/manager.py``. The :class:`~PaperSorter.db.DatabaseManager` 7 | wraps a thread-safe psycopg2 connection pool and provides convenient context 8 | managers for opening sessions and cursors with consistent settings (pgvector 9 | registration, RealDict cursors, timeouts, and automatic rollbacks). 10 | 11 | Key Features 12 | ============ 13 | 14 | - **Connection pooling**: ``DatabaseManager`` relies on 15 | :class:`~psycopg2.pool.ThreadedConnectionPool` to reuse connections across the 16 | application. 17 | - **pgvector registration**: Every connection registers the pgvector extension 18 | once and caches the result so callers do not need to repeat the boilerplate. 19 | - **Context-managed sessions**: ``db_manager.session()`` yields a 20 | ``DatabaseSession`` object that commits on success and rolls back on failure. 21 | - **Legacy compatibility**: ``db_manager.connect()`` returns a 22 | ``PooledConnection`` wrapper that mimics the old ``psycopg2.connect`` object 23 | so existing code can opt in gradually. 24 | 25 | Web Application Usage 26 | ===================== 27 | 28 | ``create_app`` instantiates a single ``DatabaseManager`` and stores it on the 29 | Flask application config as ``app.config["db_manager"]``. Application code 30 | should always work inside ``db_manager.session()`` blocks rather than calling a 31 | legacy ``get_db_connection`` helper: 32 | 33 | .. code-block:: python 34 | 35 | from flask import current_app 36 | 37 | db_manager = current_app.config["db_manager"] 38 | with db_manager.session() as session: 39 | cursor = session.cursor(dict_cursor=True) 40 | cursor.execute("SELECT ...") 41 | rows = cursor.fetchall() 42 | 43 | The session automatically commits when the ``with`` block exits without an 44 | exception. Call ``session.commit()`` explicitly if you need to flush changes 45 | midway through a longer workflow. 46 | 47 | CLI and Task Usage 48 | ================== 49 | 50 | Tasks that previously invoked ``psycopg2.connect`` should construct a manager 51 | from configuration and use sessions to run their queries. For example, both 52 | ``papersorter models`` and ``papersorter predict`` now follow this pattern: 53 | 54 | .. code-block:: python 55 | 56 | from PaperSorter.db import DatabaseManager 57 | 58 | db_manager = DatabaseManager.from_config(db_config, application_name="papersorter-cli-models") 59 | try: 60 | with db_manager.session() as session: 61 | cursor = session.cursor(dict_cursor=True) 62 | cursor.execute("SELECT ...") 63 | # session.commit() when writes are performed 64 | finally: 65 | db_manager.close() 66 | 67 | Within long-running loops, pass the current ``session`` alongside the cursor so 68 | helpers can issue ``session.commit()`` (e.g., after ``execute_batch`` calls). 69 | 70 | Migration Tips 71 | ============== 72 | 73 | - Replace manual ``psycopg2.connect`` calls with ``DatabaseManager.from_config``. 74 | - Wrap database work in ``with db_manager.session():`` and request cursors via 75 | ``session.cursor(dict_cursor=True)`` when row dictionaries are needed. 76 | - Remove explicit ``conn.commit()`` / ``conn.rollback()`` pairs; the session 77 | handles transaction boundaries. Keep explicit ``session.commit()`` invocations 78 | when you intentionally persist work before a long sequence continues. 79 | - Legacy helpers like ``PaperSorter.feed_database.FeedDatabase`` still manage 80 | their own connections. They can be refactored incrementally to depend on the 81 | manager when practical. 82 | 83 | Adopting the shared manager provides predictable transaction handling, unified 84 | logging, and a single place to evolve database settings across the codebase. 85 | -------------------------------------------------------------------------------- /docs/development/index.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Development Guide 3 | ================= 4 | 5 | Welcome to PaperSorter development! This guide helps contributors, maintainers, and developers who want to extend or modify PaperSorter. 6 | 7 | PaperSorter is built with extensibility in mind, featuring modular architecture that allows for custom feed providers, notification channels, and machine learning models. 8 | 9 | Getting Started 10 | =============== 11 | 12 | Development Environment 13 | ----------------------- 14 | 15 | - Python 3.9+ with virtual environment 16 | - PostgreSQL with pgvector extension 17 | - Code editor with Python support 18 | - Git for version control 19 | 20 | Development Workflow 21 | -------------------- 22 | 23 | 1. Fork and clone the repository 24 | 2. Set up development environment 25 | 3. Create feature branch 26 | 4. Write tests and documentation 27 | 5. Submit pull request 28 | 29 | .. toctree:: 30 | :maxdepth: 2 31 | 32 | contributing 33 | architecture 34 | database 35 | testing 36 | plugins 37 | release-process 38 | 39 | Architecture Principles 40 | ======================= 41 | 42 | Modularity 43 | ---------- 44 | 45 | PaperSorter is designed as a collection of loosely coupled modules: 46 | 47 | - **Separation of concerns**: Each module has a single responsibility 48 | - **Dependency injection**: Configuration and dependencies are injected 49 | - **Plugin architecture**: New providers and notifiers can be added easily 50 | 51 | Extensibility 52 | ------------- 53 | 54 | Key extension points: 55 | 56 | - **Feed Providers**: Add support for new content sources 57 | - **Notification Channels**: Implement custom delivery methods 58 | - **ML Models**: Experiment with different recommendation algorithms 59 | - **Web Interface**: Add new API endpoints and UI components 60 | 61 | Code Quality 62 | ============ 63 | 64 | Standards 65 | --------- 66 | 67 | - **PEP 8**: Python code style guidelines 68 | - **Type Hints**: All public APIs include type annotations 69 | - **Documentation**: Comprehensive docstrings and user guides 70 | - **Testing**: Unit tests with good coverage 71 | 72 | Tools 73 | ----- 74 | 75 | - **Black**: Code formatting 76 | - **Flake8**: Linting and style checking 77 | - **MyPy**: Static type checking 78 | - **Pytest**: Testing framework 79 | 80 | Development Commands 81 | ==================== 82 | 83 | .. code-block:: bash 84 | 85 | # Setup development environment 86 | python -m venv venv 87 | source venv/bin/activate 88 | pip install -e ".[dev]" 89 | 90 | # Code quality checks 91 | black PaperSorter/ 92 | flake8 PaperSorter/ 93 | mypy PaperSorter/ 94 | 95 | # Run tests 96 | pytest 97 | pytest --cov=PaperSorter 98 | 99 | # Build documentation 100 | cd docs 101 | make html 102 | 103 | Contributing Guidelines 104 | ======================= 105 | 106 | Code Contributions 107 | ------------------ 108 | 109 | - Follow existing code patterns and conventions 110 | - Include tests for new functionality 111 | - Update documentation for user-facing changes 112 | - Keep commits focused and well-described 113 | 114 | Documentation 115 | ------------- 116 | 117 | - API documentation using docstrings 118 | - User guides for new features 119 | - Architecture documentation for significant changes 120 | - Examples and tutorials for complex workflows 121 | 122 | Community 123 | ========= 124 | 125 | - **Issues**: Bug reports and feature requests 126 | - **Discussions**: General questions and ideas 127 | - **Pull Requests**: Code contributions and reviews 128 | - **Wiki**: Community-maintained documentation 129 | 130 | Related Resources 131 | ================= 132 | 133 | - :doc:`../api/index` - Complete API reference 134 | - :doc:`../reference/index` - Technical specifications 135 | - :doc:`../admin-guide/index` - Deployment and operations 136 | -------------------------------------------------------------------------------- /PaperSorter/providers/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Base interface for feed providers.""" 25 | 26 | from abc import ABC, abstractmethod 27 | from typing import List, Dict, Optional, Iterator, Any 28 | from datetime import datetime 29 | from dataclasses import dataclass 30 | 31 | 32 | @dataclass 33 | class FeedItem: 34 | """Represents a single feed item/article.""" 35 | 36 | external_id: str 37 | title: str 38 | content: Optional[str] = None 39 | author: Optional[str] = None 40 | origin: str = "" 41 | journal: Optional[str] = None 42 | link: Optional[str] = None 43 | published: datetime = None 44 | 45 | def __post_init__(self): 46 | if self.published is None: 47 | self.published = datetime.now() 48 | 49 | 50 | class FeedProvider(ABC): 51 | """Abstract base class for feed providers.""" 52 | 53 | def __init__(self, config: Dict[str, Any]): 54 | """Initialize the provider with configuration.""" 55 | self.config = config 56 | 57 | @abstractmethod 58 | def get_items( 59 | self, 60 | source: Dict[str, Any], 61 | limit: Optional[int] = None, 62 | since: Optional[datetime] = None, 63 | ) -> Iterator[List[FeedItem]]: 64 | """ 65 | Retrieve feed items from a source. 66 | 67 | Args: 68 | source: Source configuration (from feed_sources table) 69 | limit: Maximum number of items to retrieve 70 | since: Only get items published after this date 71 | 72 | Yields: 73 | Lists of FeedItem objects (batched for efficiency) 74 | """ 75 | pass 76 | 77 | @abstractmethod 78 | def update_source_timestamp(self, source_id: int, has_new_items: bool = False): 79 | """ 80 | Update the last_checked timestamp and optionally last_updated for a source. 81 | 82 | Args: 83 | source_id: ID of the source in feed_sources table 84 | has_new_items: Whether new items were found from this source 85 | """ 86 | pass 87 | 88 | @abstractmethod 89 | def get_sources(self, source_type: str) -> List[Dict[str, Any]]: 90 | """ 91 | Get all sources of a specific type that need updating. 92 | 93 | Args: 94 | source_type: Type of sources to retrieve (e.g., 'rss') 95 | 96 | Returns: 97 | List of source dictionaries from feed_sources table 98 | """ 99 | pass 100 | 101 | def validate_source(self, source: Dict[str, Any]) -> bool: 102 | """ 103 | Validate that a source has required fields for this provider. 104 | 105 | Args: 106 | source: Source configuration to validate 107 | 108 | Returns: 109 | True if valid, False otherwise 110 | """ 111 | return True 112 | -------------------------------------------------------------------------------- /PaperSorter/providers/factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Factory for creating scholarly database providers.""" 25 | 26 | from typing import Dict, Any, Optional 27 | from .scholarly_database import ScholarlyDatabaseProvider 28 | from .semantic_scholar import SemanticScholarProvider 29 | from .openalex import OpenAlexProvider 30 | from ..log import log 31 | 32 | 33 | class ScholarlyDatabaseFactory: 34 | """Factory for creating scholarly database provider instances.""" 35 | 36 | # Available providers 37 | PROVIDERS = { 38 | "semantic_scholar": SemanticScholarProvider, 39 | "semanticscholar": SemanticScholarProvider, # Alias for backward compatibility 40 | "openalex": OpenAlexProvider, 41 | } 42 | 43 | @classmethod 44 | def create_provider( 45 | cls, 46 | provider_name: str, 47 | config: Dict[str, Any] 48 | ) -> Optional[ScholarlyDatabaseProvider]: 49 | """ 50 | Create a scholarly database provider instance. 51 | 52 | Args: 53 | provider_name: Name of the provider (semantic_scholar, openalex) 54 | config: Provider configuration dictionary 55 | 56 | Returns: 57 | Provider instance if successful, None otherwise 58 | """ 59 | # Normalize provider name 60 | provider_name = provider_name.lower().replace("-", "_") 61 | 62 | # Get provider class 63 | provider_class = cls.PROVIDERS.get(provider_name) 64 | if not provider_class: 65 | log.error(f"Unknown scholarly database provider: {provider_name}") 66 | log.info(f"Available providers: {', '.join(cls.PROVIDERS.keys())}") 67 | return None 68 | 69 | # Create provider instance 70 | try: 71 | provider = provider_class(config) 72 | 73 | # Check if provider is configured 74 | if not provider.is_configured(): 75 | log.error(f"{provider.name} is not properly configured") 76 | if provider.requires_api_key: 77 | log.error("API key is required but not provided") 78 | return None 79 | return provider 80 | 81 | except Exception as e: 82 | log.error(f"Failed to create {provider_name} provider: {e}") 83 | return None 84 | 85 | @classmethod 86 | def list_providers(cls) -> Dict[str, bool]: 87 | """ 88 | List available providers and their API key requirements. 89 | 90 | Returns: 91 | Dictionary mapping provider names to whether they require API keys 92 | """ 93 | result = {} 94 | for name, provider_class in cls.PROVIDERS.items(): 95 | # Create temporary instance to check requirements 96 | temp = provider_class({}) 97 | result[name] = temp.requires_api_key 98 | return result 99 | 100 | -------------------------------------------------------------------------------- /PaperSorter/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """Centralized configuration loader for PaperSorter. 4 | 5 | This module provides a lightweight, process-wide configuration singleton 6 | loaded from YAML. Prefer importing and calling `get_config()` from any 7 | module that needs configuration values. 8 | 9 | Load precedence: 10 | - Explicit path provided to `get_config(path)` / `reload_config(path)` 11 | - Environment variables: `PAPERSORTER_CONFIG` or `PAPER_SORTER_CONFIG` 12 | - Default path: `./config.yml` 13 | 14 | Usage: 15 | from PaperSorter.config import get_config 16 | cfg = get_config() 17 | db_cfg = cfg.raw.get('db', {}) 18 | """ 19 | 20 | from __future__ import annotations 21 | 22 | import os 23 | import threading 24 | from dataclasses import dataclass 25 | from pathlib import Path 26 | from typing import Any, Dict, Optional 27 | 28 | import yaml 29 | 30 | 31 | _LOCK = threading.RLock() 32 | _CONFIG: Optional["Config"] = None 33 | _CONFIG_PATH: Optional[str] = None 34 | 35 | 36 | @dataclass 37 | class Config: 38 | """Simple configuration holder with convenience accessors.""" 39 | 40 | raw: Dict[str, Any] 41 | 42 | def get(self, path: str, default: Any = None) -> Any: 43 | """Get a nested value using dotted path notation. 44 | 45 | Example: cfg.get('web.port', 5001) 46 | """ 47 | cur: Any = self.raw 48 | for part in path.split('.'): 49 | if not isinstance(cur, dict) or part not in cur: 50 | return default 51 | cur = cur[part] 52 | return cur 53 | 54 | 55 | def _resolve_config_path(preferred: Optional[str]) -> str: 56 | if preferred: 57 | return str(preferred) 58 | 59 | env = os.environ.get("PAPERSORTER_CONFIG") or os.environ.get("PAPER_SORTER_CONFIG") 60 | if env: 61 | return env 62 | 63 | return "./config.yml" 64 | 65 | 66 | def _load_yaml_config(path: str, explicit: bool) -> Dict[str, Any]: 67 | p = Path(path) 68 | if not p.exists(): 69 | if explicit: 70 | raise FileNotFoundError(f"Configuration file not found: {path}") 71 | # Fallback to empty config when using defaults 72 | return {} 73 | 74 | with p.open("r") as f: 75 | data = yaml.safe_load(f) or {} 76 | if not isinstance(data, dict): 77 | raise ValueError("Configuration root must be a mapping (YAML dict)") 78 | return data 79 | 80 | 81 | def _load_config(path: Optional[str], refresh: bool = False) -> Config: 82 | global _CONFIG, _CONFIG_PATH 83 | with _LOCK: 84 | if _CONFIG is not None and not refresh: 85 | return _CONFIG 86 | 87 | resolved = _resolve_config_path(path) 88 | # Treat as explicit if caller supplied a path or env var is set 89 | explicit = path is not None or os.environ.get("PAPERSORTER_CONFIG") is not None or os.environ.get("PAPER_SORTER_CONFIG") is not None 90 | raw = _load_yaml_config(resolved, explicit=explicit) 91 | 92 | _CONFIG = Config(raw=raw) 93 | _CONFIG_PATH = resolved 94 | return _CONFIG 95 | 96 | 97 | def get_config(path: Optional[str] = None) -> Config: 98 | """Return the process-wide Config instance, loading it if necessary. 99 | 100 | The first explicit path provided will be remembered for subsequent calls. 101 | """ 102 | if path is not None: 103 | return _load_config(path, refresh=False) 104 | return _load_config(None, refresh=False) 105 | 106 | 107 | def reload_config(path: Optional[str] = None) -> Config: 108 | """Force reload the configuration from the given path or the last one used.""" 109 | # If no path provided, use the last resolved path 110 | target = path if path is not None else _CONFIG_PATH 111 | return _load_config(target, refresh=True) 112 | 113 | 114 | def configured() -> bool: 115 | """Return True if a configuration has been loaded.""" 116 | return _CONFIG is not None 117 | 118 | 119 | def set_config_for_testing(cfg: Config) -> None: 120 | """Override the global configuration (use in tests).""" 121 | global _CONFIG 122 | with _LOCK: 123 | _CONFIG = cfg 124 | 125 | -------------------------------------------------------------------------------- /PaperSorter/tasks/serve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Web server task for PaperSorter.""" 25 | 26 | from ..log import log, initialize_logging 27 | from ..web import create_app 28 | from ..cli.base import BaseCommand, registry 29 | import argparse 30 | 31 | 32 | class ServeCommand(BaseCommand): 33 | """Serve web interface for article labeling.""" 34 | 35 | name = 'serve' 36 | help = 'Serve web interface for article labeling and other tasks' 37 | 38 | def add_arguments(self, parser: argparse.ArgumentParser) -> None: 39 | """Add serve-specific arguments.""" 40 | parser.add_argument( 41 | '--host', 42 | default='0.0.0.0', 43 | help='Host to bind to' 44 | ) 45 | parser.add_argument( 46 | '--port', 47 | type=int, 48 | default=5001, 49 | help='Port to bind to' 50 | ) 51 | parser.add_argument( 52 | '--debug', 53 | action='store_true', 54 | help='Enable debug mode' 55 | ) 56 | parser.add_argument( 57 | '--skip-authentication', 58 | help='Skip OAuth authentication and auto-login as specified admin user (DEVELOPMENT ONLY)' 59 | ) 60 | parser.add_argument( 61 | '--demo-mode', 62 | action='store_true', 63 | help='Grant admin privileges to all users (DEMONSTRATION ONLY)' 64 | ) 65 | 66 | def handle(self, args: argparse.Namespace, context) -> int: 67 | """Execute the serve command.""" 68 | initialize_logging('serve', args.log_file, args.quiet) 69 | try: 70 | main( 71 | config=args.config, 72 | host=args.host, 73 | port=args.port, 74 | debug=args.debug, 75 | log_file=args.log_file, 76 | quiet=args.quiet, 77 | skip_authentication=args.skip_authentication, 78 | demo_mode=args.demo_mode 79 | ) 80 | return 0 81 | except Exception as e: 82 | log.error(f"Serve failed: {e}") 83 | return 1 84 | 85 | # Register the command 86 | registry.register(ServeCommand) 87 | 88 | 89 | def main(config, host, port, debug, log_file, quiet, skip_authentication, demo_mode=False): 90 | """Serve web interface for article labeling and other tasks.""" 91 | 92 | if skip_authentication: 93 | log.warning( 94 | f"⚠️ AUTHENTICATION BYPASS ENABLED for user '{skip_authentication}' - DEVELOPMENT USE ONLY!" 95 | ) 96 | 97 | if demo_mode: 98 | log.warning( 99 | "⚠️ DEMO MODE ENABLED: All users have admin privileges! - DEMONSTRATION USE ONLY!" 100 | ) 101 | 102 | log.info(f"Starting web server on {host}:{port}") 103 | 104 | app = create_app(config, skip_authentication=skip_authentication, demo_mode=demo_mode) 105 | 106 | # Run the Flask app 107 | app.run(host=host, port=port, debug=debug) 108 | -------------------------------------------------------------------------------- /PaperSorter/services/summarization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """High-level helpers for LLM-backed article summarization.""" 3 | 4 | from __future__ import annotations 5 | 6 | from typing import Sequence 7 | 8 | from ..providers.openai_client import get_openai_client 9 | 10 | SUMMARY_PROMPT_TEMPLATE = """You are an expert scientific literature analyst. Analyze the following collection of research articles and provide a focused summary. 11 | 12 | {articles} 13 | 14 | Start your response directly with the numbered sections below. Do not include any introductory sentences like "Here is my analysis" or "Based on the provided articles". Do not repeat the format instructions (like "2-3 sentences" or "3-4 bullet points") in your output. Begin immediately with: 15 | 16 | 1. **Common Themes**: Identify the main research areas connecting these articles in 2-3 sentences. 17 | 18 | 2. **Key Topics**: List the most significant concepts, methods, or findings that appear across multiple papers as 3-4 bullet points. 19 | 20 | 3. **Unique Contributions**: For each article, briefly state what distinguishes it from the others in one sentence. Reference articles using their author-year format (e.g., "Smith 2023 introduces..."). 21 | 22 | 4. **Future Directions**: Based on these papers, provide 2-3 bullet points on the most promising research opportunities. 23 | 24 | Keep your response focused and actionable, using clear Markdown formatting. When referencing specific papers, use the author-year format provided in square brackets for each article.""" 25 | 26 | 27 | class ArticleSummarizer: 28 | """Wraps OpenAI chat completion calls for article summaries.""" 29 | 30 | def __init__( 31 | self, 32 | *, 33 | client, 34 | model: str, 35 | temperature: float = 0.7, 36 | max_tokens: int = 8000, 37 | timeout: float | None = None, 38 | ) -> None: 39 | self._client = client 40 | self._model = model 41 | self._temperature = temperature 42 | self._max_tokens = max_tokens 43 | self._timeout = timeout 44 | 45 | @classmethod 46 | def from_config(cls, config): 47 | api_config = config.get("summarization_api") 48 | if not isinstance(api_config, dict): 49 | return None 50 | 51 | client = get_openai_client("summarization_api", cfg=config, optional=True) 52 | if client is None: 53 | return None 54 | 55 | model = api_config.get("model", "gpt-4o-mini") 56 | temperature = float(api_config.get("temperature", 0.7)) 57 | max_tokens = int(api_config.get("max_tokens", 8000)) 58 | timeout = api_config.get("timeout") 59 | try: 60 | timeout_value = float(timeout) if timeout is not None else None 61 | except (TypeError, ValueError): 62 | timeout_value = None 63 | 64 | return cls( 65 | client=client, 66 | model=model, 67 | temperature=temperature, 68 | max_tokens=max_tokens, 69 | timeout=timeout_value, 70 | ) 71 | 72 | def summarize(self, snippets: Sequence[str]) -> str: 73 | if not snippets: 74 | raise ValueError("No article snippets provided for summarization") 75 | 76 | articles_text = "\n\n---\n\n".join(snippets) 77 | prompt = SUMMARY_PROMPT_TEMPLATE.format(articles=articles_text) 78 | 79 | request_kwargs = { 80 | "model": self._model, 81 | "messages": [ 82 | { 83 | "role": "system", 84 | "content": "You are an expert at analyzing and summarizing scientific literature.", 85 | }, 86 | {"role": "user", "content": prompt}, 87 | ], 88 | "temperature": self._temperature, 89 | "max_tokens": self._max_tokens, 90 | } 91 | if self._timeout is not None: 92 | request_kwargs["timeout"] = self._timeout 93 | 94 | response = self._client.chat.completions.create(**request_kwargs) 95 | 96 | message = response.choices[0].message.content 97 | if not message: 98 | raise RuntimeError("Empty response from summarization model") 99 | 100 | if not isinstance(message, str): 101 | message = str(message) 102 | 103 | return message 104 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. PaperSorter documentation master file 2 | 3 | ======================================== 4 | PaperSorter Documentation 5 | ======================================== 6 | 7 | .. image:: https://img.shields.io/badge/python-3.9+-blue.svg 8 | :target: https://www.python.org/downloads/ 9 | :alt: Python Version 10 | 11 | .. image:: https://img.shields.io/badge/license-MIT-green.svg 12 | :target: https://opensource.org/licenses/MIT 13 | :alt: License 14 | 15 | **PaperSorter** is an intelligent academic paper recommendation system that uses machine learning to help researchers stay up-to-date with the latest research in their fields. It automatically fetches papers from RSS feeds, generates embeddings, and uses XGBoost to predict which papers will be most relevant to you. 16 | 17 | Key Features 18 | ============ 19 | 20 | - 🤖 **Smart Filtering**: Machine learning-based paper recommendations 21 | - 📰 **Multi-Source Support**: RSS/Atom feeds, arXiv, and more 22 | - 🔔 **Flexible Notifications**: Slack, Discord, and email newsletters 23 | - 🎯 **Personalized Models**: Train custom models for different research areas 24 | - 🌐 **Web Interface**: User-friendly labeling and management interface 25 | - 🔍 **Semantic Search**: Find related papers using embedding similarity 26 | - 📄 **Search from PDF**: Select text from PDFs to find similar papers (Paper Connect) 27 | 28 | Quick Start 29 | =========== 30 | 31 | .. code-block:: bash 32 | 33 | # Install PaperSorter 34 | pip install -e . 35 | 36 | # Configure your settings 37 | cp config.example.yml config.yml 38 | # Edit config.yml with your database and API credentials 39 | 40 | # Fetch papers and generate embeddings 41 | papersorter update 42 | 43 | # Train your first model (after labeling ~100 papers) 44 | papersorter train 45 | 46 | # Send notifications 47 | papersorter broadcast 48 | 49 | Documentation Overview 50 | ====================== 51 | 52 | .. toctree:: 53 | :maxdepth: 2 54 | :caption: Getting Started 55 | 56 | getting-started/index 57 | getting-started/installation 58 | getting-started/quickstart 59 | getting-started/first-model 60 | 61 | .. toctree:: 62 | :maxdepth: 2 63 | :caption: User Guide 64 | 65 | user-guide/index 66 | user-guide/configuration 67 | user-guide/feed-sources 68 | user-guide/training-models 69 | user-guide/notifications 70 | user-guide/search-from-pdf 71 | user-guide/web-interface 72 | user-guide/workflows 73 | 74 | .. toctree:: 75 | :maxdepth: 2 76 | :caption: Administrator Guide 77 | 78 | admin-guide/index 79 | admin-guide/deployment 80 | admin-guide/database-setup 81 | admin-guide/backup-restore 82 | admin-guide/monitoring 83 | admin-guide/security 84 | admin-guide/troubleshooting 85 | 86 | .. toctree:: 87 | :maxdepth: 2 88 | :caption: CLI Reference 89 | 90 | cli-reference/index 91 | cli-reference/commands 92 | cli-reference/examples 93 | 94 | .. toctree:: 95 | :maxdepth: 2 96 | :caption: API Documentation 97 | 98 | api/index 99 | api/modules 100 | api/database 101 | api/providers 102 | api/notifications 103 | api/web 104 | 105 | .. toctree:: 106 | :maxdepth: 2 107 | :caption: Development 108 | 109 | development/index 110 | development/contributing 111 | development/architecture 112 | development/testing 113 | development/plugins 114 | development/release-process 115 | 116 | .. toctree:: 117 | :maxdepth: 2 118 | :caption: Tutorials 119 | 120 | tutorials/index 121 | tutorials/gmail-setup 122 | tutorials/slack-integration 123 | tutorials/custom-embeddings 124 | tutorials/multi-model 125 | 126 | .. toctree:: 127 | :maxdepth: 2 128 | :caption: Reference 129 | 130 | reference/index 131 | reference/configuration-reference 132 | reference/database-schema 133 | reference/environment-variables 134 | reference/glossary 135 | 136 | .. toctree:: 137 | :maxdepth: 1 138 | :caption: About 139 | 140 | changelog 141 | license 142 | 143 | Indices and Tables 144 | ================== 145 | 146 | * :ref:`genindex` 147 | * :ref:`modindex` 148 | * :ref:`search` 149 | 150 | Need Help? 151 | ========== 152 | 153 | - 📖 Check the documentation guides 154 | - 🐛 Report issues on `GitHub `_ 155 | - 💬 Join our community discussions 156 | 157 | License 158 | ======= 159 | 160 | PaperSorter is released under the MIT License. See the LICENSE file for details. -------------------------------------------------------------------------------- /PaperSorter/templates/feedback_success.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Feedback Recorded - {{ site_name }}{% endblock %} 4 | 5 | {% block header %}{% endblock %} 6 | 7 | {% block styles %} 8 | 9 | 114 | {% endblock %} 115 | 116 | {% block main_container %} 117 |
118 | 121 | 122 |

Thank You!

123 | 124 |
125 | Your feedback has been recorded. You marked 126 | "{{ feed_title }}" 127 | as {{ feedback_type }}. 128 |
129 | 130 | 138 | 139 | 142 |
143 | {% endblock %} 144 | -------------------------------------------------------------------------------- /PaperSorter/templates/settings.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Settings - {{ site_name }}{% endblock %} 4 | {% block header_title %}Settings{% endblock %} 5 | 6 | {% block header_actions %} 7 | ← Back to Papers 8 | {{ super() }} 9 | {% endblock %} 10 | 11 | {% block styles %} 12 | 103 | {% endblock %} 104 | 105 | {% block content %} 106 |
107 | 108 |
📰
109 |
Feeds
110 |
111 | Manage RSS feed subscriptions 112 |
113 |
114 | 115 | 116 |
📡
117 |
Channels
118 |
119 | Manage broadcast channels for notifications 120 |
121 |
122 | 123 | 124 |
🤖
125 |
Prediction Models
126 |
127 | Configure preference prediction models for paper scoring 128 |
129 |
130 | 131 | 132 |
👥
133 |
Users
134 |
135 | Manage users and their preferences 136 |
137 |
138 |
139 | {% endblock %} 140 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # PaperSorter Docker Environment Configuration 2 | # Copy this file to .env and fill in your values 3 | 4 | # ============================================ 5 | # Database Configuration 6 | # ============================================ 7 | POSTGRES_DB=papersorter 8 | POSTGRES_USER=papersorter 9 | POSTGRES_PASSWORD=changeme # CHANGE THIS! 10 | 11 | # ============================================ 12 | # Application Configuration 13 | # ============================================ 14 | # Flask secret key for session management (generate with: python -c "import secrets; print(secrets.token_hex(32))") 15 | FLASK_SECRET_KEY=your-secret-key-here 16 | 17 | # Site configuration 18 | SITE_NAME=PaperSorter 19 | BASE_URL=http://localhost # Change to https://your-domain.com for production 20 | DOMAIN=localhost # Change to your-domain.com for production 21 | ADMIN_EMAIL=admin@example.com # Used for Let's Encrypt SSL certificates 22 | DEFAULT_TIMEZONE=UTC # Default timezone for new users (e.g., America/New_York, Asia/Seoul) 23 | DEFAULT_DATE_FORMAT=MMM D, YYYY # Default date format (e.g., YYYY-MM-DD, DD/MM/YYYY) 24 | # Comma-separated list of admin emails or ORCID IDs 25 | ADMIN_USERS= # Example: admin@example.com,0000-0002-1825-0097@orcid.org 26 | 27 | # ============================================ 28 | # OAuth Configuration 29 | # ============================================ 30 | # Google OAuth (https://console.cloud.google.com/) 31 | GOOGLE_CLIENT_ID= 32 | GOOGLE_CLIENT_SECRET= 33 | 34 | # GitHub OAuth (https://github.com/settings/developers) 35 | GITHUB_CLIENT_ID= 36 | GITHUB_CLIENT_SECRET= 37 | 38 | # ORCID OAuth (https://orcid.org/developer-tools) 39 | ORCID_CLIENT_ID= 40 | ORCID_CLIENT_SECRET= 41 | 42 | # ============================================ 43 | # API Keys 44 | # ============================================ 45 | # Embedding API configuration 46 | EMBEDDING_API_KEY= 47 | EMBEDDING_API_URL=https://api.openai.com/v1 # Or your custom endpoint 48 | EMBEDDING_MODEL=text-embedding-3-large # Model name for embeddings 49 | EMBEDDING_DIMENSIONS= # Optional: dimensions (e.g., 1536 for pgvector HNSW indexing) 50 | 51 | # Summarization API (e.g., Gemini) 52 | SUMMARIZATION_API_KEY= 53 | SUMMARIZATION_API_URL=https://generativelanguage.googleapis.com/v1beta/openai 54 | SUMMARIZATION_MODEL=gemini-2.0-flash-thinking-exp-01-21 # Model for summarization 55 | 56 | # Scholarly Database Provider 57 | SCHOLARLY_PROVIDER=semantic_scholar # Options: semantic_scholar, openalex 58 | MATCH_DATE_TOLERANCE_DAYS=60 # Date tolerance for automatic article matching 59 | 60 | # Semantic Scholar API 61 | SEMANTIC_SCHOLAR_API_KEY= 62 | # Optional: Retry configuration for rate limits (defaults shown) 63 | # SEMANTIC_SCHOLAR_MAX_RETRIES=5 # Number of retries for 429 errors 64 | # SEMANTIC_SCHOLAR_RETRY_BACKOFF_BASE=2 # Exponential backoff base 65 | # SEMANTIC_SCHOLAR_THROTTLE=1 # Seconds between requests 66 | 67 | # OpenAlex (if using instead of Semantic Scholar) 68 | OPENALEX_EMAIL=your-email@domain.com 69 | # Optional: Retry configuration for rate limits (defaults shown) 70 | # OPENALEX_MAX_RETRIES=5 # Number of retries for 429 errors 71 | # OPENALEX_RETRY_BACKOFF_BASE=2 # Exponential backoff base 72 | # OPENALEX_THROTTLE=0.1 # Seconds between requests 73 | 74 | # ============================================ 75 | # Email/SMTP Configuration 76 | # ============================================ 77 | # SMTP provider settings for email notifications 78 | # Option 1: Use a predefined provider (gmail, outlook) 79 | SMTP_PROVIDER=gmail # Options: gmail, outlook, custom 80 | # For Gmail/Outlook, only username and password are needed: 81 | SMTP_USERNAME=your-email@gmail.com 82 | SMTP_PASSWORD= # Use app-specific password, not regular password 83 | 84 | # Option 2: Custom SMTP configuration (when SMTP_PROVIDER=custom) 85 | SMTP_HOST=smtp.example.com 86 | SMTP_PORT=587 87 | SMTP_ENCRYPTION=tls # Options: tls, ssl, none 88 | SMTP_TIMEOUT=30 89 | 90 | # Email notification settings 91 | EMAIL_FROM=papersorter@example.com # Sender address for notifications 92 | EMAIL_FROM_NAME=PaperSorter Newsletter # Sender display name 93 | EMAIL_SUBJECT_TEMPLATE=Research Papers Digest - {date:%Y-%m-%d} # Subject line template 94 | 95 | # ============================================ 96 | # Port Configuration (optional) 97 | # ============================================ 98 | HTTP_PORT=80 99 | HTTPS_PORT=443 100 | 101 | # ============================================ 102 | # Resource Limits (optional, for production) 103 | # ============================================ 104 | # Uncomment and adjust for production deployments 105 | # WEB_MEMORY_LIMIT=4G 106 | # WEB_CPU_LIMIT=2 107 | # DB_MEMORY_LIMIT=2G 108 | # DB_CPU_LIMIT=2 -------------------------------------------------------------------------------- /docs/api/modules.rst: -------------------------------------------------------------------------------- 1 | API Modules Reference 2 | ===================== 3 | 4 | This section contains the auto-generated API documentation for PaperSorter modules. 5 | 6 | .. contents:: Module Overview 7 | :local: 8 | :depth: 2 9 | 10 | Core Modules 11 | ------------ 12 | 13 | PaperSorter.feed_database 14 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 15 | 16 | .. automodule:: PaperSorter.feed_database 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | :special-members: __init__ 21 | 22 | PaperSorter.embedding_database 23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 24 | 25 | .. automodule:: PaperSorter.embedding_database 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | :special-members: __init__ 30 | 31 | Provider Modules 32 | ---------------- 33 | 34 | PaperSorter.providers.base 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | 37 | .. automodule:: PaperSorter.providers.base 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | :special-members: __init__ 42 | 43 | PaperSorter.providers.rss 44 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 45 | 46 | .. automodule:: PaperSorter.providers.rss 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | Task Modules 52 | ------------ 53 | 54 | PaperSorter.tasks.update 55 | ~~~~~~~~~~~~~~~~~~~~~~~~ 56 | 57 | .. automodule:: PaperSorter.tasks.update 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | 62 | PaperSorter.tasks.train 63 | ~~~~~~~~~~~~~~~~~~~~~~~ 64 | 65 | .. automodule:: PaperSorter.tasks.train 66 | :members: 67 | :undoc-members: 68 | :show-inheritance: 69 | 70 | PaperSorter.tasks.broadcast 71 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 72 | 73 | .. automodule:: PaperSorter.tasks.broadcast 74 | :members: 75 | :undoc-members: 76 | :show-inheritance: 77 | 78 | PaperSorter.tasks.serve 79 | ~~~~~~~~~~~~~~~~~~~~~~~ 80 | 81 | .. automodule:: PaperSorter.tasks.serve 82 | :members: 83 | :undoc-members: 84 | :show-inheritance: 85 | 86 | Web Modules 87 | ----------- 88 | 89 | PaperSorter.web.app 90 | ~~~~~~~~~~~~~~~~~~~ 91 | 92 | .. automodule:: PaperSorter.web.app 93 | :members: 94 | :undoc-members: 95 | :show-inheritance: 96 | 97 | PaperSorter.web.main 98 | ~~~~~~~~~~~~~~~~~~~~ 99 | 100 | .. automodule:: PaperSorter.web.main 101 | :members: 102 | :undoc-members: 103 | :show-inheritance: 104 | 105 | PaperSorter.web.auth.models 106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 107 | 108 | .. automodule:: PaperSorter.web.auth.models 109 | :members: 110 | :undoc-members: 111 | :show-inheritance: 112 | 113 | PaperSorter.web.auth.routes 114 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 115 | 116 | .. automodule:: PaperSorter.web.auth.routes 117 | :members: 118 | :undoc-members: 119 | :show-inheritance: 120 | 121 | PaperSorter.web.auth.decorators 122 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 123 | 124 | .. automodule:: PaperSorter.web.auth.decorators 125 | :members: 126 | :undoc-members: 127 | :show-inheritance: 128 | 129 | API Endpoints 130 | ------------- 131 | 132 | PaperSorter.web.api.feeds 133 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 134 | 135 | .. automodule:: PaperSorter.web.api.feeds 136 | :members: 137 | :undoc-members: 138 | :show-inheritance: 139 | 140 | PaperSorter.web.api.search 141 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 142 | 143 | .. automodule:: PaperSorter.web.api.search 144 | :members: 145 | :undoc-members: 146 | :show-inheritance: 147 | 148 | PaperSorter.web.api.settings 149 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 150 | 151 | .. automodule:: PaperSorter.web.api.settings 152 | :members: 153 | :undoc-members: 154 | :show-inheritance: 155 | 156 | PaperSorter.web.api.user 157 | ~~~~~~~~~~~~~~~~~~~~~~~~ 158 | 159 | .. automodule:: PaperSorter.web.api.user 160 | :members: 161 | :undoc-members: 162 | :show-inheritance: 163 | 164 | Utility Modules 165 | --------------- 166 | 167 | PaperSorter.utils.email 168 | ~~~~~~~~~~~~~~~~~~~~~~~ 169 | 170 | .. automodule:: PaperSorter.utils.email 171 | :members: 172 | :undoc-members: 173 | :show-inheritance: 174 | 175 | PaperSorter.web.utils.database 176 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 177 | 178 | .. automodule:: PaperSorter.web.utils.database 179 | :members: 180 | :undoc-members: 181 | :show-inheritance: 182 | 183 | Model Classes 184 | ------------- 185 | 186 | PaperSorter.web.models.semantic_scholar 187 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 188 | 189 | .. automodule:: PaperSorter.web.models.semantic_scholar 190 | :members: 191 | :undoc-members: 192 | :show-inheritance: 193 | 194 | Background Jobs 195 | --------------- 196 | 197 | PaperSorter.web.jobs.poster 198 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 199 | 200 | .. automodule:: PaperSorter.web.jobs.poster 201 | :members: 202 | :undoc-members: 203 | :show-inheritance: -------------------------------------------------------------------------------- /PaperSorter/static/css/main.css: -------------------------------------------------------------------------------- 1 | /* PaperSorter Main CSS - Import all stylesheets */ 2 | 3 | /* 4 | * CSS Architecture: 5 | * 1. Variables - Design tokens and CSS custom properties 6 | * 2. Base - Reset, typography, and fundamental styles 7 | * 3. Components - Reusable UI components 8 | * 4. Layout - Application-specific layouts 9 | */ 10 | 11 | /* Import CSS Variables (must be first) */ 12 | @import url('variables.css'); 13 | 14 | /* Import Base Styles */ 15 | @import url('base.css'); 16 | 17 | /* Import Component Styles */ 18 | @import url('components.css'); 19 | 20 | /* Import Layout Styles */ 21 | @import url('layout.css'); 22 | 23 | /* =================================== */ 24 | /* Additional Global Overrides */ 25 | /* =================================== */ 26 | 27 | /* Ensure smooth scrolling */ 28 | html { 29 | scroll-behavior: smooth; 30 | } 31 | 32 | /* Focus visible only for keyboard navigation */ 33 | *:focus:not(:focus-visible) { 34 | outline: none; 35 | } 36 | 37 | /* Better text rendering */ 38 | body { 39 | text-rendering: optimizeLegibility; 40 | } 41 | 42 | /* Prevent text selection on UI elements */ 43 | button, 44 | .btn, 45 | .nav-link, 46 | .badge, 47 | .tag { 48 | user-select: none; 49 | } 50 | 51 | /* Ensure images are responsive by default */ 52 | img { 53 | max-width: 100%; 54 | height: auto; 55 | display: block; 56 | } 57 | 58 | /* =================================== */ 59 | /* Print Styles */ 60 | /* =================================== */ 61 | 62 | @media print { 63 | /* Hide navigation and action elements */ 64 | .header, 65 | .nav-links, 66 | .action-bar, 67 | .feed-actions, 68 | .hamburger-menu, 69 | .modal, 70 | .btn, 71 | .pagination { 72 | display: none !important; 73 | } 74 | 75 | /* Reset backgrounds for print */ 76 | body { 77 | background: white; 78 | color: black; 79 | padding: 0; 80 | } 81 | 82 | .card, 83 | .feed-item { 84 | box-shadow: none; 85 | border: 1px solid #ddd; 86 | page-break-inside: avoid; 87 | } 88 | 89 | /* Ensure links are visible */ 90 | a { 91 | color: black; 92 | text-decoration: underline; 93 | } 94 | 95 | a[href]:after { 96 | content: " (" attr(href) ")"; 97 | font-size: 0.8em; 98 | } 99 | } 100 | 101 | /* =================================== */ 102 | /* Accessibility Improvements */ 103 | /* =================================== */ 104 | 105 | /* Skip to main content link */ 106 | .skip-to-main { 107 | position: absolute; 108 | top: -40px; 109 | left: 0; 110 | background: var(--color-primary); 111 | color: var(--text-white); 112 | padding: var(--spacing-sm) var(--spacing-base); 113 | text-decoration: none; 114 | z-index: var(--z-index-tooltip); 115 | border-radius: var(--radius-base); 116 | } 117 | 118 | .skip-to-main:focus { 119 | top: var(--spacing-sm); 120 | } 121 | 122 | /* Screen reader only text */ 123 | .sr-only { 124 | position: absolute; 125 | width: 1px; 126 | height: 1px; 127 | padding: 0; 128 | margin: -1px; 129 | overflow: hidden; 130 | clip: rect(0, 0, 0, 0); 131 | white-space: nowrap; 132 | border: 0; 133 | } 134 | 135 | /* High contrast mode support */ 136 | @media (prefers-contrast: high) { 137 | .card, 138 | .feed-item, 139 | .btn, 140 | .form-control { 141 | border: 2px solid; 142 | } 143 | } 144 | 145 | /* Reduced motion support */ 146 | @media (prefers-reduced-motion: reduce) { 147 | *, 148 | *::before, 149 | *::after { 150 | animation-duration: 0.01ms !important; 151 | animation-iteration-count: 1 !important; 152 | transition-duration: 0.01ms !important; 153 | scroll-behavior: auto !important; 154 | } 155 | } 156 | 157 | /* =================================== */ 158 | /* Dark Mode Preparation */ 159 | /* =================================== */ 160 | 161 | /* 162 | * Dark mode styles are prepared but not active. 163 | * They will be activated when data-theme="dark" is set on body element. 164 | * This structure allows for easy dark mode implementation in the future. 165 | */ 166 | 167 | [data-theme="dark"] { 168 | /* Dark mode overrides will be added here */ 169 | } 170 | 171 | /* =================================== */ 172 | /* Browser-Specific Fixes */ 173 | /* =================================== */ 174 | 175 | /* Firefox */ 176 | @-moz-document url-prefix() { 177 | select.form-control { 178 | text-indent: 0.01px; 179 | text-overflow: ''; 180 | } 181 | } 182 | 183 | /* Edge and IE */ 184 | @supports (-ms-ime-align: auto) { 185 | select.form-control { 186 | padding-right: var(--spacing-xl); 187 | } 188 | } 189 | 190 | /* Safari */ 191 | @supports (-webkit-appearance: none) { 192 | input[type="search"]::-webkit-search-decoration, 193 | input[type="search"]::-webkit-search-cancel-button, 194 | input[type="search"]::-webkit-search-results-button, 195 | input[type="search"]::-webkit-search-results-decoration { 196 | -webkit-appearance: none; 197 | } 198 | } -------------------------------------------------------------------------------- /PaperSorter/cli/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2024-2025 Seoul National University 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | # 23 | 24 | """Base command class and registry for PaperSorter CLI.""" 25 | 26 | import argparse 27 | from abc import ABC, abstractmethod 28 | from typing import Dict, Type, Optional, Any 29 | 30 | 31 | class BaseCommand(ABC): 32 | """Base class for all CLI commands.""" 33 | 34 | name: str = None 35 | help: str = None 36 | 37 | @abstractmethod 38 | def add_arguments(self, parser: argparse.ArgumentParser) -> None: 39 | """Add command-specific arguments to the parser.""" 40 | pass 41 | 42 | @abstractmethod 43 | def handle(self, args: argparse.Namespace, context: Any) -> int: 44 | """ 45 | Execute the command. 46 | 47 | Args: 48 | args: Parsed command-line arguments 49 | context: Command context with config and utilities 50 | 51 | Returns: 52 | Exit code (0 for success) 53 | """ 54 | pass 55 | 56 | def add_common_arguments(self, parser: argparse.ArgumentParser) -> None: 57 | """Add common arguments shared by all commands.""" 58 | parser.add_argument( 59 | '--config', '-c', 60 | default='./config.yml', 61 | help='Database configuration file' 62 | ) 63 | parser.add_argument( 64 | '--log-file', 65 | help='Log file path' 66 | ) 67 | parser.add_argument( 68 | '-q', '--quiet', 69 | action='store_true', 70 | help='Suppress log output' 71 | ) 72 | 73 | 74 | class CommandRegistry: 75 | """Registry for managing CLI commands.""" 76 | 77 | def __init__(self): 78 | self._commands: Dict[str, Type[BaseCommand]] = {} 79 | self._instances: Dict[str, BaseCommand] = {} 80 | 81 | def register(self, command_class: Type[BaseCommand]) -> None: 82 | """Register a command class.""" 83 | if not command_class.name: 84 | raise ValueError(f"Command {command_class.__name__} must have a name") 85 | self._commands[command_class.name] = command_class 86 | 87 | def get_command(self, name: str) -> Optional[BaseCommand]: 88 | """Get a command instance by name.""" 89 | if name not in self._instances and name in self._commands: 90 | command_class = self._commands[name] 91 | # Check if it's already an instance 92 | if isinstance(command_class, BaseCommand): 93 | self._instances[name] = command_class 94 | else: 95 | self._instances[name] = command_class() 96 | return self._instances.get(name) 97 | 98 | def create_subparsers(self, parser: argparse.ArgumentParser) -> None: 99 | """Create subparsers for all registered commands.""" 100 | subparsers = parser.add_subparsers( 101 | dest='command', 102 | help='Available commands', 103 | metavar='' 104 | ) 105 | 106 | for name, command_class in sorted(self._commands.items()): 107 | command = self.get_command(name) 108 | 109 | # Replace underscores with hyphens in command names for CLI 110 | cli_name = name.replace('_', '-') 111 | 112 | subparser = subparsers.add_parser( 113 | cli_name, 114 | help=command.help, 115 | formatter_class=argparse.RawDescriptionHelpFormatter 116 | ) 117 | 118 | # Add common arguments 119 | command.add_common_arguments(subparser) 120 | 121 | # Add command-specific arguments 122 | command.add_arguments(subparser) 123 | 124 | # Store the command instance for later execution 125 | subparser.set_defaults(command_handler=command) 126 | 127 | def list_commands(self) -> list: 128 | """Return a list of registered command names.""" 129 | return sorted(self._commands.keys()) 130 | 131 | 132 | # Global registry instance 133 | registry = CommandRegistry() 134 | --------------------------------------------------------------------------------