├── .dockerignore ├── .env.example ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── feature_request.md └── workflows │ ├── docker-build-push.yml │ ├── manualPush.yml │ ├── release-to-discord.yml │ └── stale.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.rag ├── LICENSE ├── OPENAPI └── openapi.json ├── PRIVACY_POLICY.md ├── RAG-DEV-GUIDE.md ├── README.md ├── SECURITY.md ├── config └── config.js ├── dashboard.png ├── docker-compose.yml ├── docs ├── README.md ├── chat.png ├── favicon.ico ├── hero.png ├── history.png ├── index.html ├── playground.png ├── ppai_icon.png └── settings.png ├── ecosystem.config.js ├── eslint.config.mjs ├── icon.png ├── icon.webp ├── jsdoc_standards.md ├── main.py ├── models └── document.js ├── package-lock.json ├── package-lock.json.bak ├── package.json ├── paperless-ai-chrome.zip ├── ppairag.png ├── prettierrc.json ├── preview.png ├── public ├── css │ ├── chat.css │ ├── dashboard.css │ ├── settings.css │ └── setup.css ├── favicon.ico └── js │ ├── chat.js │ ├── dashboard.js │ ├── history.js │ ├── manual.js │ ├── playground-analyzer.js │ ├── playground.js │ ├── settings.js │ └── setup.js ├── rag_ready.png ├── requirements.txt ├── routes ├── auth.js ├── rag.js └── setup.js ├── schemas.js ├── server.js ├── services ├── aiServiceFactory.js ├── azureService.js ├── chatService.js ├── customService.js ├── debugService.js ├── documentsService.js ├── loggerService.js ├── manualService.js ├── ollamaService.js ├── openaiService.js ├── paperlessService.js ├── ragService.js ├── serviceUtils.js └── setupService.js ├── setup.png ├── start-services.sh ├── swagger.js └── views ├── chat.ejs ├── dashboard.ejs ├── debug.ejs ├── history.ejs ├── index.ejs ├── layout.ejs ├── login.ejs ├── manual.ejs ├── manual.ejs.bak ├── playground.ejs ├── rag.ejs ├── settings.ejs ├── setup.ejs └── template.ejs /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | data 4 | .git 5 | .gitignore 6 | .env 7 | *.md 8 | api-test.ps1 9 | openai_debug.log 10 | prompt.bak 11 | data_bak/ 12 | data_bak2/ 13 | data_*/ 14 | preview.png 15 | .env.bak 16 | data bak/ 17 | .env* 18 | docker-compose-dev.yml 19 | public/images/ 20 | delete_all.js 21 | documents.json 22 | apitest.js 23 | logs/* 24 | api_correspondent.js 25 | prompt.txt 26 | api_test_r1.js 27 | openrouter.js 28 | /chromadb 29 | rag_config.conf 30 | indexed.conf 31 | indexing_complete.flag 32 | RAGZ_README.md 33 | /chromadb 34 | /chromadb/* 35 | rag_config.conf 36 | indexed.conf 37 | main.py.bak 38 | __pycache__/ 39 | 40 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PAPERLESS_AI_INITIAL_SETUP=yes 2 | PAPERLESS_API_URL=http://localhost:8000/api 3 | PAPERLESS_API_TOKEN=xxxxxxxxxxxxxxxxxxxx 4 | PAPERLESS_USERNAME=clusterzx 5 | AI_PROVIDER=custom 6 | OPENAI_API_KEY= 7 | OPENAI_MODEL= 8 | OLLAMA_API_URL=http://localhost:11434 9 | OLLAMA_MODEL=llama3.2 10 | SCAN_INTERVAL=*/30 * * * * 11 | SYSTEM_PROMPT=`You are a personalized document analyzer. Your task is to analyze documents and extract relevant information.\n\nAnalyze the document content and extract the following information into a structured JSON object:\n\n1. title: Create a concise, meaningful title for the document\n2. correspondent: Identify the sender/institution but do not include addresses\n3. tags: Select up to 4 relevant thematic tags\n4. document_date: Extract the document date (format: YYYY-MM-DD)\n5. language: Determine the document language (e.g. "de" or "en")\n \nImportant rules for the analysis:\n\nFor tags:\n- FIRST check the existing tags before suggesting new ones\n- Use only relevant categories\n- Maximum 4 tags per document, less if sufficient (at least 1)\n- Avoid generic or too specific tags\n- Use only the most important information for tag creation\n- The output language is the one used in the document! IMPORTANT!\n\nFor the title:\n- Short and concise, NO ADDRESSES\n- Contains the most important identification features\n- For invoices/orders, mention invoice/order number if available\n- The output language is the one used in the document! IMPORTANT!\n\nFor the correspondent:\n- Identify the sender or institution\n When generating the correspondent, always create the shortest possible form of the company name (e.g. "Amazon" instead of "Amazon EU SARL, German branch")\n\nFor the document date:\n- Extract the date of the document\n- Use the format YYYY-MM-DD\n- If multiple dates are present, use the most relevant one\n\nFor the language:\n- Determine the document language\n- Use language codes like "de" for German or "en" for English\n- If the language is not clear, use "und" as a placeholder 12 | ` 13 | PROCESS_PREDEFINED_DOCUMENTS=yes 14 | TAGS=pre-process 15 | ADD_AI_PROCESSED_TAG=no 16 | AI_PROCESSED_TAG_NAME=ai-processed 17 | USE_PROMPT_TAGS=no 18 | PROMPT_TAGS= 19 | USE_EXISTING_DATA=no 20 | API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxx 21 | CUSTOM_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx 22 | CUSTOM_BASE_URL=https://api.deepseek.com/v1 23 | CUSTOM_MODEL=deepseek-chat 24 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [clusterzx] 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: "🐞 Bug Report" 2 | description: "Report a bug or unexpected behavior" 3 | title: "[Bug]: " 4 | labels: ["bug", "triage"] 5 | assignees: 6 | - "" 7 | projects: ["clusterzx/paperless-ai"] 8 | 9 | body: 10 | - type: markdown 11 | attributes: 12 | value: | 13 | ## 🐞 Bug Report 14 | Thanks for taking the time to report an issue! Please fill in as much detail as possible to help us investigate and fix it. 15 | 16 | **⚠️ IMPORTANT: Issues that are missing required information or do not follow this template will be deleted without notice. Please ensure all required fields are completed.** 17 | 18 | - type: input 19 | id: bug_summary 20 | attributes: 21 | label: "🔍 Bug Summary" 22 | description: "Briefly describe the issue." 23 | placeholder: "A short, clear summary of the bug..." 24 | validations: 25 | required: true 26 | 27 | - type: textarea 28 | id: bug_description 29 | attributes: 30 | label: "📖 Description" 31 | description: "Provide a detailed description of the bug, including any observations." 32 | placeholder: "Explain the issue, expected behavior, and actual behavior." 33 | validations: 34 | required: true 35 | 36 | - type: textarea 37 | id: reproduction_steps 38 | attributes: 39 | label: "🔄 Steps to Reproduce" 40 | description: "How do we reproduce the issue?" 41 | placeholder: | 42 | 1. Go to '...' 43 | 2. Click on '...' 44 | 3. Scroll down to '...' 45 | 4. See error 46 | validations: 47 | required: true 48 | 49 | - type: textarea 50 | id: expected_behavior 51 | attributes: 52 | label: "✅ Expected Behavior" 53 | description: "What should happen instead?" 54 | placeholder: "Describe the expected outcome." 55 | validations: 56 | required: true 57 | 58 | - type: textarea 59 | id: actual_behavior 60 | attributes: 61 | label: "❌ Actual Behavior" 62 | description: "What actually happens?" 63 | placeholder: "Describe what you see instead of the expected behavior." 64 | validations: 65 | required: true 66 | 67 | - type: input 68 | id: paperless_ai_version 69 | attributes: 70 | label: "🏷️ Paperless-AI Version" 71 | description: "What version of Paperless-AI are you using? You can find this in your settings or docker-compose file." 72 | placeholder: "e.g. v1.2.3, latest, commit hash, etc." 73 | validations: 74 | required: true 75 | 76 | - type: textarea 77 | id: docker_logs 78 | attributes: 79 | label: "📜 Docker Logs" 80 | description: "Upload Docker logs (or paste relevant log snippets)." 81 | placeholder: "Drag & drop files here or paste logs." 82 | render: shell 83 | validations: 84 | required: true 85 | 86 | - type: textarea 87 | id: paperless_ngx_logs 88 | attributes: 89 | label: "📜 Paperless-ngx Logs" 90 | description: "Upload Paperless-ngx logs (or paste relevant log snippets)." 91 | placeholder: "Drag & drop files here or paste logs." 92 | render: shell 93 | 94 | - type: textarea 95 | id: screenshots 96 | attributes: 97 | label: "🖼️ Screenshots of your settings page" 98 | description: "Attach screenshots of your currenct paperless-ai settings." 99 | placeholder: "Drag & drop images or provide a link." 100 | 101 | - type: dropdown 102 | id: operating_system 103 | attributes: 104 | label: "🖥️ Desktop Environment" 105 | description: "Select the OS where the issue occurs." 106 | options: 107 | - "Windows" 108 | - "macOS" 109 | - "Linux" 110 | - "Other" 111 | validations: 112 | required: true 113 | 114 | - type: input 115 | id: os_version 116 | attributes: 117 | label: "💻 OS Version" 118 | description: "Enter your OS version." 119 | placeholder: "e.g. Windows 11, macOS 14, Ubuntu 22.04" 120 | 121 | - type: dropdown 122 | id: browser 123 | attributes: 124 | label: "🌐 Browser" 125 | description: "Which browser are you using (if applicable)?" 126 | options: 127 | - "Chrome" 128 | - "Safari" 129 | - "Firefox" 130 | - "Edge" 131 | - "Other" 132 | 133 | - type: input 134 | id: browser_version 135 | attributes: 136 | label: "🔢 Browser Version" 137 | description: "Enter your browser version (if applicable)." 138 | placeholder: "e.g. 120.0.6099.199" 139 | 140 | - type: input 141 | id: smartphone_browser 142 | attributes: 143 | label: "🌐 Mobile Browser" 144 | description: "Specify the browser you used on mobile (if applicable)." 145 | placeholder: "e.g. Safari, Chrome, Firefox" 146 | 147 | - type: checkboxes 148 | id: additional_info 149 | attributes: 150 | label: "📝 Additional Information" 151 | description: "Select any that apply." 152 | options: 153 | - label: "I have checked existing issues and this is not a duplicate" 154 | required: true 155 | - label: "I have tried debugging this issue on my own" 156 | - label: "I can provide a fix and submit a PR" 157 | - label: "I am sure that this problem is affecting everyone, not only me" 158 | - label: "I have provided all required information above" 159 | required: true 160 | 161 | - type: textarea 162 | id: extra_notes 163 | attributes: 164 | label: "📌 Extra Notes" 165 | description: "Anything else you'd like to add?" 166 | placeholder: "Additional comments or findings..." 167 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/docker-build-push.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [published] # Only triggers when a release is published 4 | schedule: 5 | - cron: "0 0 * * *" # Nightly build 6 | jobs: 7 | docker: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v3 12 | with: 13 | fetch-depth: 0 14 | 15 | - name: Set up QEMU 16 | uses: docker/setup-qemu-action@v2 17 | 18 | - name: Set up Docker Buildx 19 | uses: docker/setup-buildx-action@v2 20 | 21 | - name: Log in to Docker Hub 22 | uses: docker/login-action@v2 23 | with: 24 | username: ${{ secrets.DOCKER_USERNAME }} 25 | password: ${{ secrets.DOCKER_PASSWORD }} 26 | 27 | - name: Login to GitHub Container Registry 28 | uses: docker/login-action@v2 29 | with: 30 | registry: ghcr.io 31 | username: ${{ github.repository_owner }} 32 | password: ${{ secrets.GHCR_PAT }} 33 | 34 | - name: Extract metadata 35 | id: meta 36 | uses: docker/metadata-action@v4 37 | with: 38 | images: | 39 | ${{ secrets.DOCKER_USERNAME }}/paperless-ai 40 | ghcr.io/${{ github.repository_owner }}/paperless-ai 41 | tags: | 42 | type=schedule,pattern=nightly 43 | type=semver,pattern={{version}},enable=${{ github.event_name == 'release' }} 44 | type=raw,value=latest,enable=${{ github.event_name == 'release' }} 45 | 46 | - name: Build and push 47 | uses: docker/build-push-action@v4 48 | with: 49 | context: . 50 | push: true 51 | platforms: linux/amd64,linux/arm64 52 | tags: ${{ steps.meta.outputs.tags }} 53 | labels: ${{ steps.meta.outputs.labels }} 54 | -------------------------------------------------------------------------------- /.github/workflows/manualPush.yml: -------------------------------------------------------------------------------- 1 | name: Manual Docker Build and Push 2 | 3 | on: 4 | workflow_dispatch: # Ermöglicht manuelle Auslösung über GitHub UI 5 | inputs: 6 | tag: 7 | description: 'Custom tag (optional, defaults to latest)' 8 | required: false 9 | default: 'latest' 10 | type: string 11 | 12 | jobs: 13 | docker: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v3 18 | with: 19 | fetch-depth: 0 20 | 21 | - name: Set up QEMU 22 | uses: docker/setup-qemu-action@v2 23 | 24 | - name: Set up Docker Buildx 25 | uses: docker/setup-buildx-action@v2 26 | 27 | - name: Log in to Docker Hub 28 | uses: docker/login-action@v2 29 | with: 30 | username: ${{ secrets.DOCKER_USERNAME }} 31 | password: ${{ secrets.DOCKER_PASSWORD }} 32 | 33 | - name: Login to GitHub Container Registry 34 | uses: docker/login-action@v2 35 | with: 36 | registry: ghcr.io 37 | username: ${{ github.repository_owner }} 38 | password: ${{ secrets.GHCR_PAT }} 39 | 40 | - name: Extract metadata 41 | id: meta 42 | uses: docker/metadata-action@v4 43 | with: 44 | images: | 45 | ${{ secrets.DOCKER_USERNAME }}/paperless-ai 46 | ghcr.io/${{ github.repository_owner }}/paperless-ai 47 | tags: | 48 | type=raw,value=${{ inputs.tag }} 49 | type=sha,prefix={{branch}}- 50 | 51 | - name: Build and push 52 | uses: docker/build-push-action@v4 53 | with: 54 | context: . 55 | push: true 56 | platforms: linux/amd64,linux/arm64 57 | tags: ${{ steps.meta.outputs.tags }} 58 | labels: ${{ steps.meta.outputs.labels }} 59 | 60 | - name: Summary 61 | run: | 62 | echo "## 🚀 Docker Build Complete" >> $GITHUB_STEP_SUMMARY 63 | echo "Successfully built and pushed Docker images with tag: **${{ inputs.tag }}**" >> $GITHUB_STEP_SUMMARY 64 | echo "" >> $GITHUB_STEP_SUMMARY 65 | echo "### Images pushed to:" >> $GITHUB_STEP_SUMMARY 66 | echo "- Docker Hub: \`${{ secrets.DOCKER_USERNAME }}/paperless-ai:${{ inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY 67 | echo "- GitHub Container Registry: \`ghcr.io/${{ github.repository_owner }}/paperless-ai:${{ inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY 68 | -------------------------------------------------------------------------------- /.github/workflows/release-to-discord.yml: -------------------------------------------------------------------------------- 1 | name: Notify Discord on Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | discord_notification: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Send Discord Notification 12 | uses: sarisia/actions-status-discord@v1.15.3 13 | with: 14 | webhook: ${{ secrets.DISCORD_WEBHOOK_URL }} 15 | username: "GitHub Bot 🤖" 16 | avatar_url: "https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" 17 | content: | 18 | **🚀 New Release Published!** 19 | 📌 **Repository:** `${{ github.repository }}` 20 | 🏷️ **Version:** `${{ github.event.release.tag_name }}` 21 | 📜 **Description:** 22 | ``` 23 | ${{ github.event.release.body }} 24 | ``` 25 | 🔗 **[View Release](${{ github.event.release.html_url }})** 26 | embeds: | 27 | [ 28 | { 29 | "title": "🎉 New Release: ${{ github.event.release.tag_name }}", 30 | "description": "${{ github.event.release.body }}", 31 | "url": "${{ github.event.release.html_url }}", 32 | "color": 16776960, 33 | "footer": { 34 | "text": "GitHub Actions", 35 | "icon_url": "https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" 36 | }, 37 | "timestamp": "${{ github.event.release.published_at }}" 38 | } 39 | ] 40 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Mark stale issues and pull requests 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" # Runs daily at midnight 6 | 7 | jobs: 8 | stale: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Close stale issues and pull requests 13 | uses: actions/stale@v8 14 | with: 15 | repo-token: ${{ secrets.stale_bot }} 16 | stale-issue-message: "This issue has been marked as stale due to inactivity. Please respond to keep it open." 17 | stale-pr-message: "This pull request has been marked as stale due to inactivity. Please update it to keep it open." 18 | close-issue-message: "This issue has been closed due to lack of response." 19 | close-pr-message: "This pull request has been closed due to lack of response." 20 | days-before-stale: 7 # Days before an issue or PR is marked as stale 21 | days-before-close: 3 # Days before a stale issue or PR is closed 22 | stale-issue-label: "stale" # Label added to stale issues 23 | exempt-issue-labels: "wontfix,bug" # Labels that exempt issues from being marked as stale 24 | only-issues: true # Only affect issues, not PRs 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .env.bak 3 | .dockerignore 4 | data/ 5 | node_modules/ 6 | api-test.ps1 7 | openai_debug.log 8 | prompt.bak 9 | data bak/ 10 | data_bak/ 11 | data_*/ 12 | .env* 13 | docker-compose-dev.yml 14 | public/images/ 15 | delete_all.js 16 | documents.json 17 | apitest.js 18 | logs/* 19 | api_correspondent.js 20 | prompt.txt 21 | api_test_r1.js 22 | openrouter.js 23 | /chromadb 24 | rag_config.conf 25 | indexed.conf 26 | indexing_complete.flag 27 | RAGZ_README.md 28 | /chromadb 29 | /chromadb/* 30 | rag_config.conf 31 | indexed.conf 32 | main.py.bak 33 | indexing_complete.flag 34 | __pycache__/ 35 | main.pyy* 36 | rag.ejs.old 37 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | 1. Fork the repository 4 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`) 5 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) 6 | 4. Push to the branch (`git push origin feature/AmazingFeature`) 7 | 5. Open a Pull Request 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a slim Node.js (LTS) image as base 2 | FROM node:22-slim 3 | 4 | WORKDIR /app 5 | 6 | # Install system dependencies and clean up in single layer 7 | RUN apt-get update && \ 8 | apt-get install -y --no-install-recommends \ 9 | python3 \ 10 | python3-pip \ 11 | python3-dev \ 12 | python3-venv \ 13 | make \ 14 | g++ \ 15 | curl \ 16 | wget && \ 17 | apt-get clean && \ 18 | rm -rf /var/lib/apt/lists/* 19 | 20 | # Install PM2 process manager globally 21 | RUN npm install pm2 -g 22 | 23 | # Install Python dependencies for RAG service in a virtual environment 24 | COPY requirements.txt /app/ 25 | RUN python3 -m venv /app/venv 26 | ENV PATH="/app/venv/bin:$PATH" 27 | RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt 28 | 29 | # Copy package files for dependency installation 30 | COPY package*.json ./ 31 | 32 | # Install node dependencies with clean install 33 | RUN npm ci --only=production && npm cache clean --force 34 | 35 | # Copy application source code 36 | COPY . . 37 | 38 | # Make startup script executable 39 | RUN chmod +x start-services.sh 40 | 41 | # Configure persistent data volume 42 | VOLUME ["/app/data"] 43 | 44 | # Configure application port - aber der tatsächliche Port wird durch PAPERLESS_AI_PORT bestimmt 45 | EXPOSE ${PAPERLESS_AI_PORT:-3000} 46 | 47 | # Add health check with dynamic port 48 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ 49 | CMD curl -f http://localhost:${PAPERLESS_AI_PORT:-3000}/health || exit 1 50 | 51 | # Set production environment 52 | ENV NODE_ENV=production 53 | 54 | # Start both Node.js and Python services using our script 55 | CMD ["./start-services.sh"] 56 | -------------------------------------------------------------------------------- /Dockerfile.rag: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | WORKDIR /app 4 | 5 | # Install system dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | build-essential \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | # Copy requirements file 11 | COPY requirements.txt /app/ 12 | 13 | # Install Python dependencies 14 | RUN pip install --no-cache-dir -r requirements.txt 15 | 16 | # Copy Python code 17 | COPY main.py /app/ 18 | 19 | # Create necessary directories 20 | RUN mkdir -p /app/data /app/data/chromadb 21 | 22 | # Expose port for FastAPI 23 | EXPOSE 8000 24 | 25 | # Run the application with auto-initialization 26 | CMD ["python", "main.py", "--host", "0.0.0.0", "--port", "8000", "--initialize"] 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 clusterzx 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PRIVACY_POLICY.md: -------------------------------------------------------------------------------- 1 | # Privacy Policy for Paperless-AI Chat Extension 2 | 3 | Last updated: 16.01.2025 4 | 5 | ## 1. General Information 6 | 7 | The Paperless-AI Chat Extension ("the Extension") is a browser extension designed to enhance document interaction in Paperless-ngx through AI-powered chat functionality. We are committed to protecting your privacy and personal data. 8 | 9 | ## 2. Data Controller 10 | 11 | Email: clusterz[at]protonmail.com 12 | 13 | ## 3. Data Collection and Processing 14 | 15 | ### 3.1 Stored Data 16 | The Extension stores the following data locally in your browser: 17 | - URL of your Paperless-ngx installation 18 | - URL of your Paperless-AI server 19 | - API key for the Paperless-AI service 20 | 21 | This data is stored exclusively in the Chrome Storage Sync API and is only accessible by the Extension. 22 | 23 | ### 3.2 Document Content Processing 24 | - The Extension only accesses document content when you actively use the chat function for a specific document 25 | - Document contents are transmitted exclusively to your configured Paperless-AI server 26 | - No document content is transmitted to third parties 27 | 28 | ### 3.3 Chat History 29 | - Chat histories are only temporarily held in browser memory 30 | - This data is deleted when closing the chat window 31 | - No permanent storage of chat histories occurs in the Extension 32 | 33 | ## 4. Data Transmission 34 | 35 | The Extension transmits data exclusively to: 36 | - Your self-hosted Paperless-ngx installation 37 | - Your self-configured Paperless-AI server 38 | 39 | No data is transmitted to the Extension developers or other third parties. 40 | 41 | ## 5. Permissions 42 | 43 | The Extension requires the following browser permissions: 44 | - "storage": For saving your configuration settings 45 | - "activeTab": For integrating chat functionality into the Paperless-ngx interface 46 | - "host_permissions": For communication with your Paperless-ngx and Paperless-AI servers 47 | 48 | ## 6. Data Security 49 | 50 | - All communication with your servers is encrypted via HTTPS 51 | - The API key is securely stored in the Chrome Storage system 52 | - The Extension implements best practices for handling sensitive data 53 | 54 | ## 7. Your Rights 55 | 56 | You have the right to: 57 | - Uninstall the Extension at any time 58 | - Delete your stored settings 59 | - Cease using the Extension at any time 60 | 61 | Under GDPR, you also have the following rights: 62 | - Right to access your personal data 63 | - Right to rectification 64 | - Right to erasure ("right to be forgotten") 65 | - Right to restrict processing 66 | - Right to data portability 67 | - Right to object 68 | 69 | ## 8. Changes to Privacy Policy 70 | 71 | We reserve the right to modify this privacy policy when necessary, in compliance with applicable data protection regulations. The current version can always be found at [Link to Privacy Policy]. 72 | 73 | ## 9. Contact 74 | 75 | If you have any questions about data protection, you can contact us at any time: 76 | clusterz[at]protonmail.com 77 | 78 | ## 10. Consent 79 | 80 | By installing and using the Extension, you agree to this privacy policy. You can withdraw your consent at any time by uninstalling the Extension. 81 | 82 | ## 11. Technical Details 83 | 84 | ### 11.1 Data Storage Location 85 | All configuration data is stored locally in your browser using Chrome's secure storage APIs. No data is stored on our servers. 86 | 87 | ### 11.2 Data Processing 88 | - Document content is processed only when explicitly requested through the chat interface 89 | - Processing occurs on your configured Paperless-AI server 90 | - No content caching or storage occurs within the Extension 91 | 92 | ### 11.3 Security Measures 93 | - All API communications use HTTPS encryption 94 | - API keys are stored using Chrome's secure storage system 95 | - No logging or tracking of user activities 96 | - No analytics or tracking code is included in the Extension 97 | 98 | ## 12. Children's Privacy 99 | 100 | The Extension is not intended for use by children under the age of 13. We do not knowingly collect or process data from children under 13 years of age. 101 | 102 | ## 13. International Data Transfers 103 | 104 | As the Extension operates entirely within your browser and communicates only with servers you configure, no international data transfers occur through our services. 105 | -------------------------------------------------------------------------------- /RAG-DEV-GUIDE.md: -------------------------------------------------------------------------------- 1 | # Using the RAG Service in Development Mode 2 | 3 | This guide explains how to run the Paperless-AI application with the RAG service in a local development environment without Docker. 4 | 5 | ## Understanding the Architecture 6 | 7 | The integration consists of two main components: 8 | 9 | 1. **Python RAG Service (main.py)**: Handles document indexing, search, and context retrieval 10 | 2. **Node.js Integration**: Manages the UI, communicates with the Python service, and uses LLMs to generate responses 11 | 12 | In production, both services run in the same Docker container, but for development, you can run them separately. 13 | 14 | ## Prerequisites 15 | 16 | - Node.js 16+ for the main Paperless-AI application 17 | - Python 3.10+ for the RAG service 18 | - A running Paperless-NGX instance (for document access) 19 | 20 | ## Option 1: Run Both Services Together (Recommended) 21 | 22 | 1. Make sure you have all dependencies installed: 23 | 24 | ```bash 25 | # Install Node.js dependencies 26 | npm install 27 | 28 | # Install Python dependencies 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | 2. Configure your `.env` file in the `data` directory with your Paperless-NGX credentials: 33 | 34 | ``` 35 | PAPERLESS_API_URL=https://your-paperless-ngx-instance 36 | PAPERLESS_API_TOKEN=your-api-token 37 | ``` 38 | 39 | **Note:** The Python service will also read the existing API settings from this file (PAPERLESS_API_URL). 40 | 41 | 3. Run both services using the provided script: 42 | 43 | ```bash 44 | # Make the script executable first (Linux/macOS) 45 | chmod +x start-services.sh 46 | 47 | # Run the services 48 | ./start-services.sh 49 | ``` 50 | 51 | ## Option 2: Run Services Separately 52 | 53 | ### Step 1: Set Up the Python RAG Service 54 | 55 | 1. Install Python dependencies: 56 | 57 | ```bash 58 | pip install -r requirements.txt 59 | ``` 60 | 61 | 2. Start the Python RAG service: 62 | 63 | ```bash 64 | python main.py --host 127.0.0.1 --port 8000 --initialize 65 | ``` 66 | 67 | The `--initialize` flag will build the document index on startup. 68 | 69 | ### Step 2: Configure the Paperless-AI Application 70 | 71 | 1. Set the environment variables for the Node.js application: 72 | 73 | For Windows (Command Prompt): 74 | ```cmd 75 | set RAG_SERVICE_URL=http://localhost:8000 76 | set RAG_SERVICE_ENABLED=true 77 | ``` 78 | 79 | For Windows (PowerShell): 80 | ```powershell 81 | $env:RAG_SERVICE_URL="http://localhost:8000" 82 | $env:RAG_SERVICE_ENABLED="true" 83 | ``` 84 | 85 | For Linux/macOS: 86 | ```bash 87 | export RAG_SERVICE_URL=http://localhost:8000 88 | export RAG_SERVICE_ENABLED=true 89 | ``` 90 | 91 | 2. Start the Paperless-AI application in development mode: 92 | 93 | ```bash 94 | npm run dev 95 | ``` 96 | 97 | ## Accessing the RAG Interface 98 | 99 | Open your browser and navigate to: 100 | 101 | ``` 102 | http://localhost:3000/rag 103 | ``` 104 | 105 | You should see the RAG interface where you can ask questions about your documents. 106 | 107 | ## Troubleshooting 108 | 109 | ### Environment Variables 110 | 111 | - The Python service looks for these variables in this order: 112 | - For API URL: `PAPERLESS_API_URL`, then `PAPERLESS_URL`, then `PAPERLESS_NGX_URL`, then `PAPERLESS_HOST` 113 | - For API Token: `PAPERLESS_TOKEN`, then `PAPERLESS_API_TOKEN`, then `PAPERLESS_APIKEY` 114 | 115 | - If you're using different variable names in your existing `.env` file, the Python service should still find them. 116 | 117 | ### Common Issues 118 | 119 | - **Missing Documents**: Check that the indexing has completed. You can check the status at `http://localhost:8000/indexing/status`. 120 | - **Connection Errors**: Ensure your Paperless-NGX credentials are correct and the instance is accessible. 121 | - **Port Conflicts**: If port 8000 is already in use, specify a different port with the `--port` parameter and update the `RAG_SERVICE_URL` environment variable accordingly. 122 | 123 | ## Development Workflow 124 | 125 | When making changes to the codebase: 126 | 127 | 1. **Python RAG Service Changes**: 128 | - Edit `main.py` 129 | - Restart the Python service to apply changes 130 | 131 | 2. **Paperless-AI Integration Changes**: 132 | - Edit Node.js files (like `services/ragService.js` or `routes/rag.js`) 133 | - If using nodemon (with `npm run dev`), changes should be applied automatically 134 | - For UI changes to `views/rag.ejs`, refresh the browser 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub commit activity](https://img.shields.io/github/commit-activity/t/clusterzx/paperless-ai) ![Docker Pulls](https://img.shields.io/docker/pulls/clusterzx/paperless-ai) ![GitHub User's stars](https://img.shields.io/github/stars/clusterzx) ![GitHub License](https://img.shields.io/github/license/clusterzx/paperless-ai?cacheSeconds=1) 2 | 3 | ## Discord: 4 | ### [https://discord.gg/AvNekAfK38](https://discord.gg/AvNekAfK38) 5 | 6 | # Paperless-AI 7 | 8 | An automated document analyzer for Paperless-ngx using OpenAI API, Ollama and all OpenAI API compatible Services to automatically analyze and tag your documents. \ 9 | It features: Automode, Manual Mode, Ollama and OpenAI, a Chat function to query your documents with AI, a modern and intuitive Webinterface. \ 10 | \ 11 | **Following Services and OpenAI API compatible services have been successfully tested:** 12 | - Ollama 13 | - OpenAI 14 | - DeepSeek.ai 15 | - OpenRouter.ai 16 | - Perplexity.ai 17 | - Together.ai 18 | - VLLM 19 | - LiteLLM 20 | - Fastchat 21 | - Gemini (Google) 22 | - ... and there are possibly many more 23 | 24 | > 🚀 **New Feature Announcement** 25 | > **Paperless-AI now includes a powerful, integrated RAG-powered Chat interface!** 26 | > Introducing a whole new way to interact with your Paperless-NGX archive: instead of browsing, filtering, or guessing which tags to search for — just ask. 27 | > Thanks to Retrieval-Augmented Generation (RAG), you can now search semantically across the full content of your documents and get human-like answers instantly. 28 | 29 | > 🔍 **No more guessing. Just ask.** 30 | > Want to know _“When did I receive my electricity contract?”_, _“How much did I pay for the last car repair?”_ or _“Which documents mention my health insurance?”_ — Paperless-AI will find it for you, even if you don’t remember the exact title, sender, or date. 31 | 32 | > 💡 **What does RAG bring to Paperless-NGX?** 33 | > - True full-text understanding of your documents 34 | > - Context-aware responses — beyond keyword search 35 | > - Useful when dealing with large or chaotic document archives 36 | > - Saves time, avoids frustration, and unlocks insights you may have forgotten you had stored 37 | > - Blazingly fast answers backed by your own trusted data 38 | 39 | ![RAG_CHAT_DEMO](https://raw.githubusercontent.com/clusterzx/paperless-ai/refs/heads/main/ppairag.png) 40 | 41 | > ⚠️ **Important Note**: If you're installing Paperless-AI for the **first time**, please **restart the container after completing the setup routine** (where you enter your API keys and preferences). This ensures that all services initialize correctly and your RAG index is built properly. 42 | > ➕ This step is **not required when updating** an existing installation. 43 | 44 | 45 | ![PPAI_SHOWCASE3](https://github.com/user-attachments/assets/1fc9f470-6e45-43e0-a212-b8fa6225e8dd) 46 | 47 | 48 | ## Features 49 | 50 | ### Automated Document Management 51 | - **Automatic Scanning**: Identifies and processes new documents within Paperless-ngx. 52 | - **AI-Powered Analysis**: Leverages OpenAI API and Ollama (Mistral, Llama, Phi 3, Gemma 2) for precise document analysis. 53 | - **Metadata Assignment**: Automatically assigns titles, tags, document_type and correspondent details. 54 | 55 | ### Advanced Customization Options 56 | - **Predefined Processing Rules**: Specify which documents to process based on existing tags. *(Optional)* 🆕 57 | - **Selective Tag Assignment**: Use only selected tags for processing. *(Disables the prompt dialog)* 🆕 58 | - **Custom Tagging**: Assign a specific tag (of your choice) to AI-processed documents for easy identification. 🆕 59 | 60 | ### Manual Mode 61 | - **AI-Assisted Analysis**: Manually analyze documents with AI support in a modern web interface. *(Accessible via the `/manual` endpoint)* 🆕 62 | 63 | ### Interactive Chat Functionality 64 | - **Document Querying**: Ask questions about your documents and receive accurate, AI-generated answers. 🆕 65 | 66 | ## Installation 67 | 68 | Visit the Wiki for installation:\ 69 | [Click here for Installation](https://github.com/clusterzx/paperless-ai/wiki/2.-Installation) 70 | ------------------------------------------- 71 | 72 | 73 | ## Docker Support 74 | 75 | The application comes with full Docker support: 76 | 77 | - Automatic container restart on failure 78 | - Health monitoring 79 | - Volume persistence for database 80 | - Resource management 81 | - Graceful shutdown handling 82 | 83 | ## Development 84 | 85 | To run the application locally without Docker: 86 | 87 | 1. Install dependencies: 88 | ```bash 89 | npm install 90 | ``` 91 | 92 | 2. Start the development server: 93 | ```bash 94 | npm run test 95 | ``` 96 | 97 | ## Contributing 98 | 99 | 1. Fork the repository 100 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`) 101 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) 102 | 4. Push to the branch (`git push origin feature/AmazingFeature`) 103 | 5. Open a Pull Request 104 | 105 | ## License 106 | 107 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 108 | 109 | ## Acknowledgments 110 | 111 | - [Paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) for the amazing document management system 112 | - OpenAI API 113 | - The Express.js and Node.js communities for their excellent tools 114 | 115 | ## Support 116 | 117 | If you encounter any issues or have questions: 118 | 119 | 1. Check the [Issues](https://github.com/clusterzx/paperless-ai/issues) section 120 | 2. Create a new issue if yours isn't already listed 121 | 3. Provide detailed information about your setup and the problem 122 | 123 | ## Roadmap (DONE) 124 | 125 | - [x] Support for custom AI models 126 | - [x] Support for multiple language analysis 127 | - [x] Advanced tag matching algorithms 128 | - [x] Custom rules for document processing 129 | - [x] Enhanced web interface with statistics 130 | 131 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | | Version | Supported | 6 | | ------- | ------------------ | 7 | | 2.5.2 | :white_check_mark: | 8 | | 2.5.0 | :white_check_mark: | 9 | | 1.9.x | :x: | 10 | | < 1.9 | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | If you find a security vulnerability please open an issue. 15 | -------------------------------------------------------------------------------- /config/config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const currentDir = decodeURIComponent(process.cwd()); 3 | const envPath = path.join(currentDir, 'data', '.env'); 4 | console.log('Loading .env from:', envPath); // Debug log 5 | require('dotenv').config({ path: envPath }); 6 | 7 | // Helper function to parse boolean-like env vars 8 | const parseEnvBoolean = (value, defaultValue = 'yes') => { 9 | if (!value) return defaultValue; 10 | return value.toLowerCase() === 'true' || value === '1' || value.toLowerCase() === 'yes' ? 'yes' : 'no'; 11 | }; 12 | 13 | // Initialize limit functions with defaults 14 | const limitFunctions = { 15 | activateTagging: parseEnvBoolean(process.env.ACTIVATE_TAGGING, 'yes'), 16 | activateCorrespondents: parseEnvBoolean(process.env.ACTIVATE_CORRESPONDENTS, 'yes'), 17 | activateDocumentType: parseEnvBoolean(process.env.ACTIVATE_DOCUMENT_TYPE, 'yes'), 18 | activateTitle: parseEnvBoolean(process.env.ACTIVATE_TITLE, 'yes'), 19 | activateCustomFields: parseEnvBoolean(process.env.ACTIVATE_CUSTOM_FIELDS, 'yes') 20 | }; 21 | 22 | console.log('Loaded environment variables:', { 23 | PAPERLESS_API_URL: process.env.PAPERLESS_API_URL, 24 | PAPERLESS_API_TOKEN: '******', 25 | LIMIT_FUNCTIONS: limitFunctions 26 | }); 27 | 28 | module.exports = { 29 | PAPERLESS_AI_VERSION: '3.0.4', 30 | CONFIGURED: false, 31 | disableAutomaticProcessing: process.env.DISABLE_AUTOMATIC_PROCESSING || 'no', 32 | predefinedMode: process.env.PROCESS_PREDEFINED_DOCUMENTS, 33 | tokenLimit: process.env.TOKEN_LIMIT || 128000, 34 | responseTokens: process.env.RESPONSE_TOKENS || 1000, 35 | addAIProcessedTag: process.env.ADD_AI_PROCESSED_TAG || 'no', 36 | addAIProcessedTags: process.env.AI_PROCESSED_TAG_NAME || 'ai-processed', 37 | paperless: { 38 | apiUrl: process.env.PAPERLESS_API_URL, 39 | apiToken: process.env.PAPERLESS_API_TOKEN 40 | }, 41 | openai: { 42 | apiKey: process.env.OPENAI_API_KEY 43 | }, 44 | ollama: { 45 | apiUrl: process.env.OLLAMA_API_URL || 'http://localhost:11434', 46 | model: process.env.OLLAMA_MODEL || 'llama3.2' 47 | }, 48 | custom: { 49 | apiUrl: process.env.CUSTOM_BASE_URL || '', 50 | apiKey: process.env.CUSTOM_API_KEY || '', 51 | model: process.env.CUSTOM_MODEL || '' 52 | }, 53 | azure: { 54 | apiKey: process.env.AZURE_API_KEY || '', 55 | endpoint: process.env.AZURE_ENDPOINT || '', 56 | deploymentName: process.env.AZURE_DEPLOYMENT_NAME || '', 57 | apiVersion: process.env.AZURE_API_VERSION || '2023-05-15' 58 | }, 59 | customFields: process.env.CUSTOM_FIELDS || '', 60 | aiProvider: process.env.AI_PROVIDER || 'openai', 61 | scanInterval: process.env.SCAN_INTERVAL || '*/30 * * * *', 62 | useExistingData: process.env.USE_EXISTING_DATA || 'no', 63 | // Add limit functions to config 64 | limitFunctions: { 65 | activateTagging: limitFunctions.activateTagging, 66 | activateCorrespondents: limitFunctions.activateCorrespondents, 67 | activateDocumentType: limitFunctions.activateDocumentType, 68 | activateTitle: limitFunctions.activateTitle, 69 | activateCustomFields: limitFunctions.activateCustomFields 70 | }, 71 | specialPromptPreDefinedTags: `You are a document analysis AI. You will analyze the document. 72 | You take the main information to associate tags with the document. 73 | You will also find the correspondent of the document (Sender not receiver). Also you find a meaningful and short title for the document. 74 | You are given a list of tags: ${process.env.PROMPT_TAGS} 75 | Only use the tags from the list and try to find the best fitting tags. 76 | You do not ask for additional information, you only use the information given in the document. 77 | 78 | Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.: 79 | { 80 | "title": "xxxxx", 81 | "correspondent": "xxxxxxxx", 82 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"], 83 | "document_date": "YYYY-MM-DD", 84 | "language": "en/de/es/..." 85 | }`, 86 | mustHavePrompt: ` Return the result EXCLUSIVELY as a JSON object. The Tags, Title and Document_Type MUST be in the language that is used in the document.: 87 | IMPORTANT: The custom_fields are optional and can be left out if not needed, only try to fill out the values if you find a matching information in the document. 88 | Do not change the value of field_name, only fill out the values. If the field is about money only add the number without currency and always use a . for decimal places. 89 | { 90 | "title": "xxxxx", 91 | "correspondent": "xxxxxxxx", 92 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"], 93 | "document_type": "Invoice/Contract/...", 94 | "document_date": "YYYY-MM-DD", 95 | "language": "en/de/es/...", 96 | %CUSTOMFIELDS% 97 | }`, 98 | }; -------------------------------------------------------------------------------- /dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/dashboard.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | paperless-ai: 3 | image: clusterzx/paperless-ai 4 | container_name: paperless-ai 5 | network_mode: bridge 6 | restart: unless-stopped 7 | cap_drop: 8 | - ALL 9 | security_opt: 10 | - no-new-privileges=true 11 | environment: 12 | - PUID=1000 13 | - PGID=1000 14 | - PAPERLESS_AI_PORT=${PAPERLESS_AI_PORT:-3000} 15 | - RAG_SERVICE_URL=http://localhost:8000 16 | - RAG_SERVICE_ENABLED=true 17 | ports: 18 | - "3000:${PAPERLESS_AI_PORT:-3000}" 19 | volumes: 20 | - paperless-ai_data:/app/data 21 | 22 | volumes: 23 | paperless-ai_data: 24 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/chat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/chat.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/favicon.ico -------------------------------------------------------------------------------- /docs/hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/hero.png -------------------------------------------------------------------------------- /docs/history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/history.png -------------------------------------------------------------------------------- /docs/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/playground.png -------------------------------------------------------------------------------- /docs/ppai_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/ppai_icon.png -------------------------------------------------------------------------------- /docs/settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/settings.png -------------------------------------------------------------------------------- /ecosystem.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | apps: [{ 3 | name: 'paperless-ai', 4 | script: 'server.js', 5 | instances: 1, 6 | autorestart: true, 7 | watch: false, 8 | max_memory_restart: '1G', 9 | env: { 10 | NODE_ENV: 'production' 11 | }, 12 | exp_backoff_restart_delay: 100 13 | }] 14 | }; -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import globals from "globals"; 2 | import pluginJs from "@eslint/js"; 3 | import prettier from "eslint-config-prettier"; 4 | 5 | /** @type {import('eslint').Linter.Config[]} */ 6 | export default [ 7 | { 8 | files: ["**/*.js"], 9 | languageOptions: { 10 | sourceType: "commonjs", 11 | globals: { 12 | ...globals.browser, 13 | ...globals.node, 14 | }, 15 | }, 16 | }, 17 | pluginJs.configs.recommended, 18 | prettier, // Prettier integriert 19 | ]; -------------------------------------------------------------------------------- /icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/icon.png -------------------------------------------------------------------------------- /icon.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/icon.webp -------------------------------------------------------------------------------- /jsdoc_standards.md: -------------------------------------------------------------------------------- 1 | # JSDoc/Swagger Documentation Standards for Paperless-AI API 2 | 3 | The following detailed standard is what all API route documentation should adhere to: 4 | 5 | ## 1. Basic Structure and Format 6 | 7 | Every route in the API must be documented with a JSDoc comment block using the `@swagger` tag following the OpenAPI 3.0.0 specification. The documentation should be placed immediately before the route handler function. 8 | 9 | ```javascript 10 | /** 11 | * @swagger 12 | * /path/to/endpoint: 13 | * method: 14 | * // Documentation content 15 | */ 16 | router.method('/path/to/endpoint', async (req, res) => { 17 | ``` 18 | 19 | ## 2. Core Documentation Elements 20 | 21 | ### 2.1 Route Path Definition 22 | 23 | - The route path must match exactly the path defined in the Express route handler 24 | - Path parameters should be defined using curly braces: `/path/{paramName}` 25 | - Trailing slashes should be avoided for consistency 26 | 27 | ### 2.2 HTTP Method 28 | 29 | - The HTTP method (get, post, put, delete) should be indented under the path 30 | - Only one method should be defined per documentation block 31 | - Multiple methods for the same path should be documented separately 32 | 33 | ### 2.3 Summary and Description 34 | 35 | - Every endpoint must have a clear, concise `summary` field (single line) 36 | - A more detailed `description` field using the pipe symbol (`|`) for multi-line content 37 | - The description should: 38 | - Explain the purpose of the endpoint in 2-3 sentences 39 | - Describe key functionality and behaviors 40 | - Note any important side effects or dependencies 41 | - Use proper grammar and complete sentences 42 | - For complex endpoints, include usage examples or explanations of how the endpoint works in the larger application context 43 | 44 | Example: 45 | ```javascript 46 | /** 47 | * @swagger 48 | * /api/example: 49 | * get: 50 | * summary: Brief description of what this endpoint does 51 | * description: | 52 | * Detailed explanation of the endpoint functionality. 53 | * This should cover what the endpoint does, how it works, 54 | * and any important behaviors users should know about. 55 | * 56 | * Use multiple paragraphs for complex explanations. 57 | */ 58 | ``` 59 | 60 | ## 3. Tags and Categorization 61 | 62 | ### 3.1 Tag Requirements 63 | 64 | - Each endpoint must be assigned to at least one tag, often multiple tags 65 | - Tags must come from the predefined list of application tags defined in the `tags` section 66 | - Multiple tags should be used when an endpoint serves multiple purposes 67 | - Common tag combinations include: 68 | - `[Navigation, X]` for UI page routes 69 | - `[API, X]` for data API endpoints 70 | - `[System, Authentication]` for security-related endpoints 71 | 72 | ### 3.2 Defined Tags 73 | 74 | The application uses the following tags for categorization: 75 | - Authentication - User authentication and authorization endpoints 76 | - Documents - Document management and processing endpoints 77 | - History - Document processing history and tracking 78 | - Navigation - General navigation endpoints for the web interface 79 | - System - Configuration, health checks, and administrative functions 80 | - Chat - Document chat functionality 81 | - Setup - Application setup and configuration 82 | - Metadata - Endpoints for managing document metadata 83 | - API - General API endpoints (usually combined with other tags) 84 | 85 | ## 4. Security Requirements 86 | 87 | ### 4.1 Security Definitions 88 | 89 | - Each protected endpoint must include appropriate security requirements 90 | - The application supports two authentication methods: 91 | - `BearerAuth` - JWT-based authentication for web app users 92 | - `ApiKeyAuth` - API key authentication for programmatic access 93 | 94 | ### 4.2 Security Requirement Format 95 | 96 | Security requirements should be specified in the standard format: 97 | ```javascript 98 | * security: 99 | * - BearerAuth: [] 100 | * - ApiKeyAuth: [] 101 | ``` 102 | 103 | ### 4.3 Security Notices 104 | 105 | - For endpoints that modify security settings (like key regeneration), include explicit security notices 106 | - Format these as bold text in the description using Markdown: `**Security Notice**: Important information.` 107 | 108 | ## 5. Parameters Documentation 109 | 110 | ### 5.1 Path Parameters 111 | 112 | Path parameters should be documented with: 113 | - Parameter name matching the path definition 114 | - Schema type (integer, string, etc.) 115 | - Required flag (almost always true for path parameters) 116 | - Description of the parameter purpose 117 | - Example value 118 | 119 | ```javascript 120 | * parameters: 121 | * - in: path 122 | * name: id 123 | * required: true 124 | * schema: 125 | * type: integer 126 | * description: The resource ID 127 | * example: 123 128 | ``` 129 | 130 | ### 5.2 Query Parameters 131 | 132 | Query parameters follow a similar format but include: 133 | - Default values where applicable 134 | - Enumerated values if the parameter has a restricted set of options 135 | 136 | ```javascript 137 | * parameters: 138 | * - in: query 139 | * name: limit 140 | * schema: 141 | * type: integer 142 | * default: 10 143 | * description: Maximum number of records to return 144 | ``` 145 | 146 | ### 5.3 Request Body 147 | 148 | For POST/PUT endpoints, document the request body with: 149 | - Required flag 150 | - Content type (usually application/json) 151 | - Schema definition including: 152 | - Required properties list 153 | - Property definitions with types 154 | - Property descriptions 155 | - Example values 156 | 157 | ```javascript 158 | * requestBody: 159 | * required: true 160 | * content: 161 | * application/json: 162 | * schema: 163 | * type: object 164 | * required: 165 | * - propertyName 166 | * properties: 167 | * propertyName: 168 | * type: string 169 | * description: Description of the property 170 | * example: "Example value" 171 | ``` 172 | 173 | ## 6. Response Documentation 174 | 175 | ### 6.1 Response Status Codes 176 | 177 | Each endpoint must document all possible response status codes: 178 | - 200/201 for successful operations 179 | - 400 for invalid requests 180 | - 401 for authentication failures 181 | - 403 for authorization failures 182 | - 404 for resource not found 183 | - 500 for server errors 184 | - Any other status code the endpoint might return 185 | 186 | ### 6.2 Response Content 187 | 188 | For each status code, document: 189 | - Description of what the status code means in this specific context 190 | - Content type of the response 191 | - Schema definition of the response body 192 | - For complex responses, use schema references to components 193 | 194 | ```javascript 195 | * responses: 196 | * 200: 197 | * description: Detailed description of successful response 198 | * content: 199 | * application/json: 200 | * schema: 201 | * $ref: '#/components/schemas/ResponseSchema' 202 | ``` 203 | 204 | ### 6.3 Streaming Responses 205 | 206 | For streaming endpoints (like chat), document: 207 | - The streaming nature of the response 208 | - The format of each chunk 209 | - Examples of the stream events 210 | 211 | ```javascript 212 | * 200: 213 | * description: | 214 | * Response streaming started. Each event contains a message chunk. 215 | * content: 216 | * text/event-stream: 217 | * schema: 218 | * type: string 219 | * example: | 220 | * data: {"chunk":"Example response chunk"} 221 | * 222 | * data: {"done":true} 223 | ``` 224 | 225 | ## 7. Schema Definitions and References 226 | 227 | ### 7.1 Schema Components 228 | 229 | - Complex object schemas should be defined as components in a central schema file 230 | - These components should be referenced using `$ref` syntax 231 | - Common schemas like Error responses should always use references 232 | 233 | ```javascript 234 | * schema: 235 | * $ref: '#/components/schemas/Error' 236 | ``` 237 | 238 | ### 7.2 Inline Schemas 239 | 240 | - Simple response schemas can be defined inline 241 | - Include: 242 | - Object type 243 | - Properties with types and descriptions 244 | - Example values for each property 245 | 246 | ```javascript 247 | * schema: 248 | * type: object 249 | * properties: 250 | * success: 251 | * type: boolean 252 | * description: Whether the operation succeeded 253 | * example: true 254 | ``` 255 | 256 | ### 7.3 Array Schemas 257 | 258 | Arrays should specify the item type, either as a reference or inline schema: 259 | 260 | ```javascript 261 | * schema: 262 | * type: array 263 | * items: 264 | * $ref: '#/components/schemas/Item' 265 | ``` 266 | 267 | ## 8. Documentation Style and Formatting 268 | 269 | ### 8.1 Indentation and Formatting 270 | 271 | - Consistent indentation using 2 spaces 272 | - Proper nesting of OpenAPI elements 273 | - Clear separation between different documentation sections 274 | 275 | ### 8.2 Naming Conventions 276 | 277 | - Use camelCase for property names in schemas 278 | - Use snake_case for query parameter names 279 | - Use descriptive names for all elements 280 | 281 | ### 8.3 Example Values 282 | 283 | - Every property should include a realistic example value 284 | - Examples should demonstrate typical usage 285 | - For enums, example should be one of the allowed values 286 | 287 | ## 9. Special Documentation Types 288 | 289 | ### 9.1 Page Routes (Navigation) 290 | 291 | For routes that render HTML pages: 292 | - Tag with [Navigation] and relevant feature tag 293 | - Document the purpose of the page 294 | - Note any data dependencies 295 | 296 | ### 9.2 API Data Endpoints 297 | 298 | For pure data API endpoints: 299 | - Tag with [API] and relevant feature tag 300 | - Document the data structure comprehensively 301 | - Include pagination details if applicable 302 | 303 | ### 9.3 Authentication Endpoints 304 | 305 | For authentication-related endpoints: 306 | - Tag with [Authentication] 307 | - Include detailed security considerations 308 | - Document token/session behaviors 309 | 310 | ## 10. Documentation Quality Standards 311 | 312 | ### 10.1 Completeness 313 | 314 | - No undocumented parameters or responses 315 | - All possible response codes covered 316 | - All security requirements specified 317 | 318 | ### 10.2 Accuracy 319 | 320 | - Documentation must match actual implementation 321 | - Examples must be valid for the described schema 322 | - Security requirements must reflect actual restrictions 323 | 324 | ### 10.3 Consistency 325 | 326 | - Similar endpoints should follow similar documentation patterns 327 | - Standard responses (like errors) should be documented identically 328 | - Terminology should be consistent across all endpoints 329 | 330 | This comprehensive standard ensures that all API documentation in the Paperless-AI application is thorough, consistent, and user-friendly, providing developers with all the information they need to use the API effectively. 331 | 332 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "paperless-ai", 3 | "version": "1.0.0", 4 | "description": "AI based Tag, correspondent and meta data generation", 5 | "main": "server.js", 6 | "scripts": { 7 | "test": "nodemon server.js" 8 | }, 9 | "keywords": [ 10 | "paperless-ngx", 11 | "ai", 12 | "paperless", 13 | "artificial", 14 | "intelligence" 15 | ], 16 | "nodemonConfig": { 17 | "ignore": [ 18 | "test/*", 19 | "docs/*", 20 | "node_modules/*", 21 | ".git/*", 22 | "*.log", 23 | "public/*", 24 | "views/*", 25 | "document.json", 26 | "OPENAPI/openapi.json" 27 | ] 28 | }, 29 | "author": "Clusterzx", 30 | "license": "MIT", 31 | "dependencies": { 32 | "axios": "^1.8.2", 33 | "bcryptjs": "^3.0.2", 34 | "better-sqlite3": "^11.8.1", 35 | "body-parser": "^1.20.3", 36 | "cheerio": "^1.0.0", 37 | "cookie-parser": "^1.4.7", 38 | "cors": "^2.8.5", 39 | "date-fns": "^4.1.0", 40 | "dockerode": "^4.0.6", 41 | "dotenv": "^16.4.7", 42 | "ejs": "^3.1.10", 43 | "express": "^4.21.2", 44 | "jsonwebtoken": "^9.0.2", 45 | "node-cron": "^3.0.3", 46 | "nodemon": "^3.1.9", 47 | "openai": "^4.86.2", 48 | "rimraf": "^6.0.1", 49 | "sqlite3": "^5.1.7", 50 | "swagger-jsdoc": "^6.2.8", 51 | "swagger-ui-express": "^5.0.1", 52 | "tiktoken": "^1.0.20" 53 | }, 54 | "devDependencies": { 55 | "@eslint/js": "^9.22.0", 56 | "eslint": "^9.22.0", 57 | "eslint-config-prettier": "^10.1.1", 58 | "eslint-plugin-jsdoc": "^50.6.3", 59 | "globals": "^16.0.0", 60 | "prettier": "^3.5.3" 61 | }, 62 | "pnpm": { 63 | "onlyBuiltDependencies": [ 64 | "@scarf/scarf", 65 | "better-sqlite3" 66 | ] 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /paperless-ai-chrome.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/paperless-ai-chrome.zip -------------------------------------------------------------------------------- /ppairag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/ppairag.png -------------------------------------------------------------------------------- /prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "singleQuote": true, 4 | "tabWidth": 2, 5 | "trailingComma": "es5" 6 | } -------------------------------------------------------------------------------- /preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/preview.png -------------------------------------------------------------------------------- /public/css/chat.css: -------------------------------------------------------------------------------- 1 | /* Theme Variables */ 2 | :root[data-theme="light"] { 3 | --bg-primary: #ffffff; 4 | --bg-secondary: #f8fafc; 5 | --text-primary: #0f172a; 6 | --text-secondary: #475569; 7 | --accent-primary: #3b82f6; 8 | --border-color: #e2e8f0; 9 | --chat-bg: #ffffff; 10 | --message-bg: #f1f5f9; 11 | --user-message-bg: #eff6ff; 12 | } 13 | 14 | :root[data-theme="dark"] { 15 | --bg-primary: #0f172a; 16 | --bg-secondary: #1e293b; 17 | --text-primary: #f8fafc; 18 | --text-secondary: #e2e8f0; 19 | --accent-primary: #60a5fa; 20 | --border-color: #334155; 21 | --chat-bg: #1e293b; 22 | --message-bg: #334155; 23 | --user-message-bg: #1e3a8a; 24 | } 25 | 26 | body { 27 | background-color: var(--bg-secondary); 28 | color: var(--text-primary); 29 | min-height: 100vh; 30 | transition: background-color 0.3s ease; 31 | font-family: Arial, sans-serif; 32 | margin: 20px; 33 | line-height: 1.6; 34 | } 35 | 36 | .chat-container { 37 | max-width: 1600px; 38 | margin: 2rem auto; 39 | padding: 0 1rem; 40 | display: flex; 41 | flex-direction: column; 42 | gap: 1.5rem; 43 | } 44 | 45 | .document-select-card { 46 | background: var(--chat-bg); 47 | border-radius: 0.5rem; 48 | padding: 1.5rem; 49 | box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); 50 | border: 1px solid var(--border-color); 51 | } 52 | 53 | .card-title { 54 | font-size: 1.25rem; 55 | font-weight: 600; 56 | margin-bottom: 1rem; 57 | color: var(--text-primary); 58 | } 59 | 60 | .select-input { 61 | width: 100%; 62 | padding: 0.5rem; 63 | border-radius: 0.375rem; 64 | border: 1px solid var(--border-color); 65 | background: var(--bg-primary); 66 | color: var(--text-primary); 67 | } 68 | 69 | .chat-interface { 70 | background: var(--chat-bg); 71 | border-radius: 0.5rem; 72 | border: 1px solid var(--border-color); 73 | display: flex; 74 | flex-direction: column; 75 | height: 70vh; 76 | } 77 | 78 | .initial-state { 79 | display: flex; 80 | align-items: center; 81 | justify-content: center; 82 | height: 100%; 83 | color: var(--text-secondary); 84 | } 85 | 86 | .chat-history { 87 | flex-grow: 1; 88 | overflow-y: auto; 89 | padding: 1.5rem; 90 | display: flex; 91 | flex-direction: column; 92 | gap: 1rem; 93 | } 94 | 95 | .message { 96 | max-width: 80%; 97 | padding: 1rem; 98 | border-radius: 0.5rem; 99 | background: var(--message-bg); 100 | } 101 | 102 | .user-message { 103 | background: var(--user-message-bg); 104 | align-self: flex-end; 105 | } 106 | 107 | .message-form { 108 | border-top: 1px solid var(--border-color); 109 | padding: 1rem; 110 | display: flex; 111 | gap: 0.5rem; 112 | } 113 | 114 | .message-input { 115 | flex-grow: 1; 116 | padding: 0.75rem; 117 | border-radius: 0.375rem; 118 | border: 1px solid var(--border-color); 119 | background: var(--bg-primary); 120 | color: var(--text-primary); 121 | resize: none; 122 | min-height: 20px; 123 | max-height: 150px; 124 | } 125 | 126 | .send-button { 127 | padding: 0.75rem 1.5rem; 128 | background: var(--accent-primary); 129 | color: white; 130 | border: none; 131 | border-radius: 0.375rem; 132 | cursor: pointer; 133 | transition: opacity 0.2s ease; 134 | } 135 | 136 | .send-button:hover { 137 | opacity: 0.9; 138 | } 139 | 140 | .theme-toggle { 141 | position: fixed; 142 | top: 1rem; 143 | right: 1rem; 144 | padding: 0.5rem; 145 | border-radius: 0.5rem; 146 | background: var(--bg-primary); 147 | border: 1px solid var(--border-color); 148 | cursor: pointer; 149 | transition: background-color 0.3s ease; 150 | } 151 | 152 | .theme-icon { 153 | width: 1.5rem; 154 | height: 1.5rem; 155 | color: var(--text-primary); 156 | } 157 | 158 | .hidden { 159 | display: none !important; 160 | } 161 | 162 | @media (max-width: 768px) { 163 | .chat-container { 164 | margin: 1rem; 165 | } 166 | 167 | .message { 168 | max-width: 90%; 169 | } 170 | } -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/public/favicon.ico -------------------------------------------------------------------------------- /public/js/chat.js: -------------------------------------------------------------------------------- 1 | let currentDocumentId = null; 2 | 3 | // Initialize marked with options for code highlighting 4 | marked.setOptions({ 5 | highlight: function(code, lang) { 6 | if (lang && hljs.getLanguage(lang)) { 7 | return hljs.highlight(code, { language: lang }).value; 8 | } 9 | return hljs.highlightAuto(code).value; 10 | }, 11 | breaks: true, 12 | gfm: true 13 | }); 14 | 15 | // Load saved theme on page load 16 | document.addEventListener('DOMContentLoaded', () => { 17 | const savedTheme = localStorage.getItem('theme') || 'light'; 18 | setTheme(savedTheme); 19 | setupTextareaAutoResize(); 20 | }); 21 | 22 | async function initializeChat(documentId) { 23 | try { 24 | const response = await fetch(`/chat/init/${documentId}`); 25 | if (!response.ok) throw new Error('Failed to initialize chat'); 26 | const data = await response.json(); 27 | 28 | document.getElementById('initialState').classList.add('hidden'); 29 | document.getElementById('chatHistory').classList.remove('hidden'); 30 | document.getElementById('messageForm').classList.remove('hidden'); 31 | document.getElementById('documentId').value = documentId; 32 | document.getElementById('chatHistory').innerHTML = ''; 33 | 34 | currentDocumentId = documentId; 35 | 36 | addMessage('Chat initialized for document: ' + data.documentTitle, false); 37 | } catch (error) { 38 | console.error('Error initializing chat:', error); 39 | showError('Failed to initialize chat'); 40 | } 41 | } 42 | 43 | async function sendMessage(message) { 44 | try { 45 | const response = await fetch('/chat/message', { 46 | method: 'POST', 47 | headers: { 48 | 'Content-Type': 'application/json', 49 | }, 50 | body: JSON.stringify({ 51 | documentId: currentDocumentId, 52 | message: message 53 | }) 54 | }); 55 | 56 | if (!response.ok) throw new Error('Failed to send message'); 57 | 58 | // Create message container for streaming response 59 | const containerDiv = document.createElement('div'); 60 | containerDiv.className = 'message-container assistant'; 61 | 62 | const messageDiv = document.createElement('div'); 63 | messageDiv.className = 'message assistant'; 64 | containerDiv.appendChild(messageDiv); 65 | 66 | document.getElementById('chatHistory').appendChild(containerDiv); 67 | 68 | let markdown = ''; 69 | const reader = response.body.getReader(); 70 | const decoder = new TextDecoder(); 71 | 72 | while (true) { 73 | const { done, value } = await reader.read(); 74 | if (done) break; 75 | 76 | const text = decoder.decode(value); 77 | const lines = text.split('\n'); 78 | 79 | for (const line of lines) { 80 | if (line.startsWith('data: ')) { 81 | const data = line.slice(6); 82 | if (data === '[DONE]') continue; 83 | 84 | try { 85 | const parsed = JSON.parse(data); 86 | if (parsed.content) { 87 | markdown += parsed.content; 88 | messageDiv.innerHTML = marked.parse(markdown); 89 | 90 | // Apply syntax highlighting to any code blocks 91 | messageDiv.querySelectorAll('pre code').forEach((block) => { 92 | hljs.highlightBlock(block); 93 | }); 94 | 95 | // Scroll to bottom 96 | const chatHistory = document.getElementById('chatHistory'); 97 | chatHistory.scrollTop = chatHistory.scrollHeight; 98 | } 99 | } catch (e) { 100 | console.error('Error parsing SSE data:', e); 101 | } 102 | } 103 | } 104 | } 105 | 106 | return null; // No need to return response as it's handled in streaming 107 | } catch (error) { 108 | console.error('Error sending message:', error); 109 | throw error; 110 | } 111 | } 112 | 113 | function addMessage(message, isUser = true) { 114 | const containerDiv = document.createElement('div'); 115 | containerDiv.className = `message-container ${isUser ? 'user' : 'assistant'}`; 116 | 117 | const messageDiv = document.createElement('div'); 118 | messageDiv.className = `message ${isUser ? 'user' : 'assistant'}`; 119 | 120 | if (isUser) { 121 | messageDiv.innerHTML = `

${escapeHtml(message)}

`; 122 | } else { 123 | let messageContent = message; 124 | try { 125 | if (typeof message === 'string' && message.trim().startsWith('{')) { 126 | const jsonResponse = JSON.parse(message); 127 | messageContent = jsonResponse.reply || jsonResponse.message || message; 128 | } 129 | } catch (e) { 130 | console.log('Message is not JSON, using as is'); 131 | } 132 | 133 | messageDiv.innerHTML = marked.parse(messageContent); 134 | messageDiv.querySelectorAll('pre code').forEach((block) => { 135 | hljs.highlightBlock(block); 136 | }); 137 | } 138 | 139 | containerDiv.appendChild(messageDiv); 140 | const chatHistory = document.getElementById('chatHistory'); 141 | chatHistory.appendChild(containerDiv); 142 | chatHistory.scrollTop = chatHistory.scrollHeight; 143 | } 144 | 145 | function showError(message) { 146 | const errorDiv = document.createElement('div'); 147 | errorDiv.className = 'message-container assistant'; 148 | errorDiv.innerHTML = ` 149 |
150 |

Error: ${escapeHtml(message)}

151 |
152 | `; 153 | document.getElementById('chatHistory').appendChild(errorDiv); 154 | } 155 | 156 | function escapeHtml(unsafe) { 157 | return unsafe 158 | .replace(/&/g, "&") 159 | .replace(//g, ">") 161 | .replace(/"/g, """) 162 | .replace(/'/g, "'"); 163 | } 164 | 165 | function toggleTheme() { 166 | const currentTheme = document.body.getAttribute('data-theme'); 167 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; 168 | setTheme(newTheme); 169 | } 170 | 171 | function setTheme(theme) { 172 | const body = document.body; 173 | const lightIcon = document.getElementById('lightIcon'); 174 | const darkIcon = document.getElementById('darkIcon'); 175 | 176 | body.setAttribute('data-theme', theme); 177 | localStorage.setItem('theme', theme); 178 | 179 | if (theme === 'dark') { 180 | lightIcon.classList.add('hidden'); 181 | darkIcon.classList.remove('hidden'); 182 | } else { 183 | lightIcon.classList.remove('hidden'); 184 | darkIcon.classList.add('hidden'); 185 | } 186 | } 187 | 188 | function setupTextareaAutoResize() { 189 | const textarea = document.getElementById('messageInput'); 190 | 191 | function adjustHeight() { 192 | textarea.style.height = 'auto'; 193 | textarea.style.height = (textarea.scrollHeight) + 'px'; 194 | } 195 | 196 | textarea.addEventListener('input', adjustHeight); 197 | textarea.addEventListener('keydown', (e) => { 198 | if (e.key === 'Enter' && !e.shiftKey) { 199 | e.preventDefault(); 200 | document.getElementById('messageForm').dispatchEvent(new Event('submit')); 201 | } 202 | }); 203 | } 204 | 205 | document.getElementById('documentSelect').addEventListener('change', function() { 206 | const documentId = this.value; 207 | if (documentId) { 208 | initializeChat(documentId); 209 | } 210 | }); 211 | 212 | document.addEventListener("DOMContentLoaded", function () { 213 | const documentSelect = document.getElementById('documentSelect'); 214 | const documentId = documentSelect.value; 215 | 216 | if (documentId) { 217 | initializeChat(documentId); 218 | } 219 | }); 220 | 221 | document.getElementById('messageInput').addEventListener('keydown', async (e) => { 222 | if (e.key === 'Enter' && !e.shiftKey) { 223 | e.preventDefault(); 224 | await submitForm(); 225 | } 226 | }); 227 | 228 | async function submitForm() { 229 | const messageInput = document.getElementById('messageInput'); 230 | const message = messageInput.value.trim(); 231 | 232 | if (!message) return; 233 | 234 | try { 235 | // Show user message immediately 236 | addMessage(message, true); 237 | 238 | // Clear input and reset height 239 | messageInput.value = ''; 240 | messageInput.style.height = 'auto'; 241 | 242 | // Send message and handle streaming response 243 | await sendMessage(message); 244 | } catch { 245 | showError('Failed to send message'); 246 | } 247 | } -------------------------------------------------------------------------------- /public/js/dashboard.js: -------------------------------------------------------------------------------- 1 | // Theme Management 2 | class ThemeManager { 3 | constructor() { 4 | this.themeToggle = document.getElementById('themeToggle'); 5 | this.initialize(); 6 | } 7 | 8 | initialize() { 9 | const savedTheme = localStorage.getItem('theme') || 'light'; 10 | this.setTheme(savedTheme); 11 | console.log('Theme initialized'); 12 | this.themeToggle.addEventListener('click', () => this.toggleTheme()); 13 | } 14 | 15 | setTheme(theme) { 16 | document.documentElement.setAttribute('data-theme', theme); 17 | localStorage.setItem('theme', theme); 18 | 19 | const icon = this.themeToggle.querySelector('i'); 20 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun'; 21 | } 22 | 23 | toggleTheme() { 24 | const currentTheme = document.documentElement.getAttribute('data-theme'); 25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; 26 | this.setTheme(newTheme); 27 | console.log('Theme toggled to: ' + newTheme); 28 | } 29 | } 30 | 31 | // Chart Initialization 32 | class ChartManager { 33 | constructor() { 34 | this.initializeDocumentChart(); 35 | } 36 | 37 | initializeDocumentChart() { 38 | const { documentCount, processedCount } = window.dashboardData; 39 | const unprocessedCount = documentCount - processedCount; 40 | 41 | const ctx = document.getElementById('documentChart').getContext('2d'); 42 | new Chart(ctx, { 43 | type: 'doughnut', 44 | data: { 45 | labels: ['AI Processed', 'Unprocessed'], 46 | datasets: [{ 47 | data: [processedCount, unprocessedCount], 48 | backgroundColor: [ 49 | '#3b82f6', // blue-500 50 | '#e2e8f0' // gray-200 51 | ], 52 | borderWidth: 0, 53 | spacing: 2 54 | }] 55 | }, 56 | options: { 57 | responsive: true, 58 | maintainAspectRatio: false, 59 | cutout: '70%', 60 | plugins: { 61 | legend: { 62 | display: false 63 | }, 64 | tooltip: { 65 | callbacks: { 66 | label: function(context) { 67 | const value = context.raw; 68 | const total = processedCount + unprocessedCount; 69 | const percentage = ((value / total) * 100).toFixed(1); 70 | return `${value} (${percentage}%)`; 71 | } 72 | } 73 | } 74 | } 75 | } 76 | }); 77 | } 78 | } 79 | 80 | // Modal Management 81 | class ModalManager { 82 | constructor() { 83 | this.modal = document.getElementById('detailsModal'); 84 | this.modalTitle = this.modal.querySelector('.modal-title'); 85 | this.modalContent = this.modal.querySelector('.modal-data'); 86 | this.modalLoader = this.modal.querySelector('.modal-loader'); 87 | this.initializeEventListeners(); 88 | } 89 | 90 | initializeEventListeners() { 91 | // Close button click 92 | this.modal.querySelector('.modal-close').addEventListener('click', () => this.hideModal()); 93 | 94 | // Overlay click 95 | this.modal.querySelector('.modal-overlay').addEventListener('click', () => this.hideModal()); 96 | 97 | // Escape key press 98 | document.addEventListener('keydown', (e) => { 99 | if (e.key === 'Escape' && this.modal.classList.contains('show')) { 100 | this.hideModal(); 101 | } 102 | }); 103 | } 104 | 105 | showModal(title) { 106 | this.modalTitle.textContent = title; 107 | this.modalContent.innerHTML = ''; 108 | this.modal.classList.remove('hidden'); // Fix: Remove 'hidden' class 109 | this.modal.classList.add('show'); 110 | document.body.style.overflow = 'hidden'; 111 | } 112 | 113 | hideModal() { 114 | this.modal.classList.remove('show'); 115 | this.modal.classList.add('hidden'); // Fix: Add 'hidden' class back 116 | document.body.style.overflow = ''; 117 | } 118 | 119 | showLoader() { 120 | this.modalLoader.classList.remove('hidden'); 121 | this.modalContent.classList.add('hidden'); 122 | } 123 | 124 | hideLoader() { 125 | this.modalLoader.classList.add('hidden'); 126 | this.modalContent.classList.remove('hidden'); 127 | } 128 | 129 | setContent(content) { 130 | this.modalContent.innerHTML = content; 131 | } 132 | } 133 | 134 | // Make showTagDetails and showCorrespondentDetails globally available 135 | window.showTagDetails = async function() { 136 | window.modalManager.showModal('Tag Overview'); 137 | window.modalManager.showLoader(); 138 | 139 | try { 140 | const response = await fetch('/api/tagsCount'); 141 | const tags = await response.json(); 142 | 143 | let content = '
'; 144 | tags.forEach(tag => { 145 | content += ` 146 |
147 | ${tag.name} 148 | ${tag.document_count || 0} documents 149 |
150 | `; 151 | }); 152 | content += '
'; 153 | 154 | window.modalManager.setContent(content); 155 | } catch (error) { 156 | console.error('Error loading tags:', error); 157 | window.modalManager.setContent('
Error loading tags. Please try again later.
'); 158 | } finally { 159 | window.modalManager.hideLoader(); 160 | } 161 | } 162 | 163 | window.showCorrespondentDetails = async function() { 164 | window.modalManager.showModal('Correspondent Overview'); 165 | window.modalManager.showLoader(); 166 | 167 | try { 168 | const response = await fetch('/api/correspondentsCount'); 169 | const correspondents = await response.json(); 170 | 171 | let content = '
'; 172 | correspondents.forEach(correspondent => { 173 | content += ` 174 |
175 | ${correspondent.name} 176 | ${correspondent.document_count || 0} documents 177 |
178 | `; 179 | }); 180 | content += '
'; 181 | 182 | window.modalManager.setContent(content); 183 | } catch (error) { 184 | console.error('Error loading correspondents:', error); 185 | window.modalManager.setContent('
Error loading correspondents. Please try again later.
'); 186 | } finally { 187 | window.modalManager.hideLoader(); 188 | } 189 | } 190 | 191 | // Navigation Management 192 | class NavigationManager { 193 | constructor() { 194 | this.sidebarLinks = document.querySelectorAll('.sidebar-link'); 195 | this.initialize(); 196 | } 197 | 198 | initialize() { 199 | this.sidebarLinks.forEach(link => { 200 | link.addEventListener('click', (e) => { 201 | // Nur für Links ohne echtes Ziel preventDefault aufrufen 202 | if (link.getAttribute('href') === '#') { 203 | e.preventDefault(); 204 | } 205 | this.setActiveLink(link); 206 | }); 207 | }); 208 | } 209 | 210 | setActiveLink(activeLink) { 211 | this.sidebarLinks.forEach(link => { 212 | link.classList.remove('active'); 213 | }); 214 | activeLink.classList.add('active'); 215 | } 216 | } 217 | 218 | // API Functions 219 | async function showTagDetails() { 220 | modalManager.showModal('Tag Overview'); 221 | modalManager.showLoader(); 222 | 223 | try { 224 | const response = await fetch('/api/tags'); 225 | const tags = await response.json(); 226 | 227 | let content = '
'; 228 | tags.forEach(tag => { 229 | content += ` 230 |
231 | ${tag.name} 232 | ${tag.document_count || 0} documents 233 |
234 | `; 235 | }); 236 | content += '
'; 237 | 238 | modalManager.setContent(content); 239 | } catch (error) { 240 | console.error('Error loading tags:', error); 241 | modalManager.setContent('
Error loading tags. Please try again later.
'); 242 | } finally { 243 | modalManager.hideLoader(); 244 | } 245 | } 246 | 247 | async function showCorrespondentDetails() { 248 | modalManager.showModal('Correspondent Overview'); 249 | modalManager.showLoader(); 250 | 251 | try { 252 | const response = await fetch('/api/correspondents'); 253 | const correspondents = await response.json(); 254 | 255 | let content = '
'; 256 | correspondents.forEach(correspondent => { 257 | content += ` 258 |
259 | ${correspondent.name} 260 | ${correspondent.document_count || 0} documents 261 |
262 | `; 263 | }); 264 | content += '
'; 265 | 266 | modalManager.setContent(content); 267 | } catch (error) { 268 | console.error('Error loading correspondents:', error); 269 | modalManager.setContent('
Error loading correspondents. Please try again later.
'); 270 | } finally { 271 | modalManager.hideLoader(); 272 | } 273 | } 274 | 275 | // Initialize everything when DOM is loaded 276 | document.addEventListener('DOMContentLoaded', () => { 277 | window.themeManager = new ThemeManager(); 278 | window.navigationManager = new NavigationManager(); 279 | window.chartManager = new ChartManager(); 280 | window.modalManager = new ModalManager(); 281 | }); -------------------------------------------------------------------------------- /public/js/history.js: -------------------------------------------------------------------------------- 1 | // Theme Management 2 | class ThemeManager { 3 | constructor() { 4 | this.themeToggle = document.getElementById('themeToggle'); 5 | this.initialize(); 6 | } 7 | 8 | initialize() { 9 | const savedTheme = localStorage.getItem('theme') || 'light'; 10 | this.setTheme(savedTheme); 11 | this.themeToggle?.addEventListener('click', () => this.toggleTheme()); 12 | } 13 | 14 | setTheme(theme) { 15 | document.documentElement.setAttribute('data-theme', theme); 16 | localStorage.setItem('theme', theme); 17 | const icon = this.themeToggle.querySelector('i'); 18 | if (icon) { 19 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun'; 20 | } 21 | } 22 | 23 | toggleTheme() { 24 | const currentTheme = document.documentElement.getAttribute('data-theme'); 25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; 26 | this.setTheme(newTheme); 27 | } 28 | } 29 | 30 | class HistoryManager { 31 | constructor() { 32 | this.confirmModal = document.getElementById('confirmModal'); 33 | this.confirmModalAll = document.getElementById('confirmModalAll'); 34 | this.selectAll = document.getElementById('selectAll'); 35 | this.table = null; // Will be initialized in initializeDataTable 36 | this.initialize(); 37 | } 38 | 39 | initialize() { 40 | this.table = this.initializeDataTable(); 41 | this.initializeModals(); 42 | this.initializeResetButtons(); 43 | this.initializeFilters(); 44 | this.initializeSelectAll(); 45 | } 46 | 47 | initializeDataTable() { 48 | return $('#historyTable').DataTable({ 49 | serverSide: true, 50 | processing: true, 51 | ajax: { 52 | url: '/api/history', 53 | data: (d) => { 54 | d.tag = $('#tagFilter').val(); 55 | d.correspondent = $('#correspondentFilter').val(); 56 | } 57 | }, 58 | columns: [ 59 | { 60 | data: 'document_id', 61 | render: (data) => ``, 62 | orderable: false, 63 | width: '40px' 64 | }, 65 | { 66 | data: 'document_id', 67 | width: '60px' 68 | }, 69 | { 70 | data: 'title', 71 | render: (data, type, row) => { 72 | if (type === 'display') { 73 | return ` 74 |
${data}
75 |
Modified: ${new Date(row.created_at).toLocaleString()}
76 | `; 77 | } 78 | return data; 79 | } 80 | }, 81 | { 82 | data: 'tags', 83 | render: (data, type) => { 84 | if (type === 'display') { 85 | if (!data?.length) return 'No tags'; 86 | return data.map(tag => 87 | `${tag.name}` 88 | ).join(' '); 89 | } 90 | return data?.map(t => t.name).join(', ') || ''; 91 | } 92 | }, 93 | { data: 'correspondent' }, 94 | { 95 | data: null, 96 | render: (data) => ` 97 |
98 | 102 | 106 |
107 | `, 108 | orderable: false, 109 | width: '150px' 110 | } 111 | ], 112 | order: [[2, 'desc']], 113 | pageLength: 10, 114 | dom: '<"flex flex-col sm:flex-row justify-between items-center mb-4"<"flex-1"f><"flex-none"l>>rtip', 115 | language: { 116 | search: "Search documents:", 117 | lengthMenu: "Show _MENU_ entries", 118 | info: "Showing _START_ to _END_ of _TOTAL_ documents", 119 | infoEmpty: "Showing 0 to 0 of 0 documents", 120 | infoFiltered: "(filtered from _MAX_ total documents)" 121 | }, 122 | drawCallback: () => { 123 | // Update "Select All" checkbox state after table redraw 124 | this.updateSelectAllState(); 125 | // Reattach event listeners to checkboxes 126 | this.attachCheckboxListeners(); 127 | } 128 | }); 129 | } 130 | 131 | initializeModals() { 132 | // Modal close handlers 133 | [this.confirmModal, this.confirmModalAll].forEach(modal => { 134 | if (!modal) return; 135 | 136 | // Close on overlay click 137 | modal.querySelector('.modal-overlay')?.addEventListener('click', () => { 138 | this.hideModal(modal); 139 | }); 140 | 141 | // Close on X button click 142 | modal.querySelector('.modal-close')?.addEventListener('click', () => { 143 | this.hideModal(modal); 144 | }); 145 | 146 | // Close on Cancel button click 147 | modal.querySelector('[id^="cancel"]')?.addEventListener('click', () => { 148 | this.hideModal(modal); 149 | }); 150 | }); 151 | 152 | // Close on Escape key 153 | document.addEventListener('keydown', (e) => { 154 | if (e.key === 'Escape') { 155 | this.hideModal(this.confirmModal); 156 | this.hideModal(this.confirmModalAll); 157 | } 158 | }); 159 | 160 | // Reset action handlers 161 | document.getElementById('confirmReset')?.addEventListener('click', async () => { 162 | const selectedDocs = this.getSelectedDocuments(); 163 | const success = await this.resetDocuments(selectedDocs); 164 | if (success) { 165 | this.hideModal(this.confirmModal); 166 | } 167 | }); 168 | 169 | document.getElementById('confirmResetAll')?.addEventListener('click', async () => { 170 | const success = await this.resetAllDocuments(); 171 | if (success) { 172 | this.hideModal(this.confirmModalAll); 173 | } 174 | }); 175 | } 176 | 177 | initializeResetButtons() { 178 | // Reset Selected button 179 | document.getElementById('resetSelectedBtn')?.addEventListener('click', () => { 180 | const selectedDocs = this.getSelectedDocuments(); 181 | if (selectedDocs.length === 0) { 182 | alert('Please select at least one document to reset.'); 183 | return; 184 | } 185 | this.showModal(this.confirmModal); 186 | }); 187 | 188 | // Reset All button 189 | document.getElementById('resetAllBtn')?.addEventListener('click', () => { 190 | this.showModal(this.confirmModalAll); 191 | }); 192 | } 193 | 194 | initializeFilters() { 195 | $('#tagFilter, #correspondentFilter').on('change', () => { 196 | this.table.ajax.reload(); 197 | }); 198 | } 199 | 200 | initializeSelectAll() { 201 | if (!this.selectAll) return; 202 | 203 | // Handle "Select All" checkbox 204 | this.selectAll.addEventListener('change', () => { 205 | const isChecked = this.selectAll.checked; 206 | const checkboxes = document.querySelectorAll('.doc-select'); 207 | checkboxes.forEach(checkbox => { 208 | checkbox.checked = isChecked; 209 | }); 210 | }); 211 | 212 | // Initial state check 213 | this.updateSelectAllState(); 214 | } 215 | 216 | attachCheckboxListeners() { 217 | const checkboxes = document.querySelectorAll('.doc-select'); 218 | checkboxes.forEach(checkbox => { 219 | // Remove existing listeners to prevent duplicates 220 | checkbox.removeEventListener('change', this.handleCheckboxChange); 221 | // Add new listener 222 | checkbox.addEventListener('change', () => this.handleCheckboxChange()); 223 | }); 224 | } 225 | 226 | handleCheckboxChange() { 227 | this.updateSelectAllState(); 228 | } 229 | 230 | updateSelectAllState() { 231 | if (!this.selectAll) return; 232 | 233 | const checkboxes = document.querySelectorAll('.doc-select'); 234 | const checkedBoxes = document.querySelectorAll('.doc-select:checked'); 235 | 236 | // Update "Select All" checkbox state 237 | this.selectAll.checked = checkboxes.length > 0 && checkboxes.length === checkedBoxes.length; 238 | 239 | // Update indeterminate state 240 | this.selectAll.indeterminate = checkedBoxes.length > 0 && checkedBoxes.length < checkboxes.length; 241 | } 242 | 243 | showModal(modal) { 244 | if (modal) { 245 | modal.classList.remove('hidden'); 246 | modal.classList.add('show'); 247 | } 248 | } 249 | 250 | hideModal(modal) { 251 | if (modal) { 252 | modal.classList.remove('show'); 253 | modal.classList.add('hidden'); 254 | } 255 | } 256 | 257 | getSelectedDocuments() { 258 | return Array.from(document.querySelectorAll('.doc-select:checked')) 259 | .map(checkbox => checkbox.value); 260 | } 261 | 262 | async resetDocuments(ids) { 263 | try { 264 | const response = await fetch('/api/reset-documents', { 265 | method: 'POST', 266 | headers: { 'Content-Type': 'application/json' }, 267 | body: JSON.stringify({ ids }) 268 | }); 269 | 270 | if (!response.ok) { 271 | throw new Error('Failed to reset documents'); 272 | } 273 | 274 | await this.table.ajax.reload(); 275 | return true; 276 | } catch (error) { 277 | console.error('Error resetting documents:', error); 278 | alert('Failed to reset documents. Please try again.'); 279 | return false; 280 | } 281 | } 282 | 283 | async resetAllDocuments() { 284 | try { 285 | const response = await fetch('/api/reset-all-documents', { 286 | method: 'POST', 287 | headers: { 'Content-Type': 'application/json' } 288 | }); 289 | 290 | if (!response.ok) { 291 | throw new Error('Failed to reset all documents'); 292 | } 293 | 294 | await this.table.ajax.reload(); 295 | return true; 296 | } catch (error) { 297 | console.error('Error resetting all documents:', error); 298 | alert('Failed to reset all documents. Please try again.'); 299 | return false; 300 | } 301 | } 302 | } 303 | 304 | // Initialize when DOM is loaded 305 | document.addEventListener('DOMContentLoaded', () => { 306 | window.themeManager = new ThemeManager(); 307 | window.historyManager = new HistoryManager(); 308 | }); -------------------------------------------------------------------------------- /public/js/manual.js: -------------------------------------------------------------------------------- 1 | // Theme Management 2 | class ThemeManager { 3 | constructor() { 4 | this.themeToggle = document.getElementById('themeToggle'); 5 | this.initialize(); 6 | } 7 | 8 | initialize() { 9 | const savedTheme = localStorage.getItem('theme') || 'light'; 10 | this.setTheme(savedTheme); 11 | 12 | this.themeToggle.addEventListener('click', () => this.toggleTheme()); 13 | } 14 | 15 | setTheme(theme) { 16 | document.documentElement.setAttribute('data-theme', theme); 17 | localStorage.setItem('theme', theme); 18 | 19 | const icon = this.themeToggle.querySelector('i'); 20 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun'; 21 | } 22 | 23 | toggleTheme() { 24 | const currentTheme = document.documentElement.getAttribute('data-theme'); 25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; 26 | this.setTheme(newTheme); 27 | } 28 | } 29 | 30 | // Chart Initialization 31 | class ChartManager { 32 | constructor() { 33 | this.initializeDocumentChart(); 34 | } 35 | 36 | initializeDocumentChart() { 37 | const { documentCount, processedCount } = window.dashboardData; 38 | const unprocessedCount = documentCount - processedCount; 39 | 40 | const ctx = document.getElementById('documentChart').getContext('2d'); 41 | new Chart(ctx, { 42 | type: 'doughnut', 43 | data: { 44 | labels: ['AI Processed', 'Unprocessed'], 45 | datasets: [{ 46 | data: [processedCount, unprocessedCount], 47 | backgroundColor: [ 48 | '#3b82f6', // blue-500 49 | '#e2e8f0' // gray-200 50 | ], 51 | borderWidth: 0, 52 | spacing: 2 53 | }] 54 | }, 55 | options: { 56 | responsive: true, 57 | maintainAspectRatio: false, 58 | cutout: '70%', 59 | plugins: { 60 | legend: { 61 | display: false 62 | }, 63 | tooltip: { 64 | callbacks: { 65 | label: function(context) { 66 | const value = context.raw; 67 | const total = processedCount + unprocessedCount; 68 | const percentage = ((value / total) * 100).toFixed(1); 69 | return `${value} (${percentage}%)`; 70 | } 71 | } 72 | } 73 | } 74 | } 75 | }); 76 | } 77 | } 78 | 79 | // Modal Management 80 | class ModalManager { 81 | constructor() { 82 | this.modal = document.getElementById('detailsModal'); 83 | this.modalTitle = this.modal.querySelector('.modal-title'); 84 | this.modalContent = this.modal.querySelector('.modal-data'); 85 | this.modalLoader = this.modal.querySelector('.modal-loader'); 86 | this.initializeEventListeners(); 87 | } 88 | 89 | initializeEventListeners() { 90 | // Close button click 91 | this.modal.querySelector('.modal-close').addEventListener('click', () => this.hideModal()); 92 | 93 | // Overlay click 94 | this.modal.querySelector('.modal-overlay').addEventListener('click', () => this.hideModal()); 95 | 96 | // Escape key press 97 | document.addEventListener('keydown', (e) => { 98 | if (e.key === 'Escape' && this.modal.classList.contains('show')) { 99 | this.hideModal(); 100 | } 101 | }); 102 | } 103 | 104 | showModal(title) { 105 | this.modalTitle.textContent = title; 106 | this.modalContent.innerHTML = ''; 107 | this.modal.classList.remove('hidden'); // Fix: Remove 'hidden' class 108 | this.modal.classList.add('show'); 109 | document.body.style.overflow = 'hidden'; 110 | } 111 | 112 | hideModal() { 113 | this.modal.classList.remove('show'); 114 | this.modal.classList.add('hidden'); // Fix: Add 'hidden' class back 115 | document.body.style.overflow = ''; 116 | } 117 | 118 | showLoader() { 119 | this.modalLoader.classList.remove('hidden'); 120 | this.modalContent.classList.add('hidden'); 121 | } 122 | 123 | hideLoader() { 124 | this.modalLoader.classList.add('hidden'); 125 | this.modalContent.classList.remove('hidden'); 126 | } 127 | 128 | setContent(content) { 129 | this.modalContent.innerHTML = content; 130 | } 131 | } 132 | 133 | // Make showTagDetails and showCorrespondentDetails globally available 134 | window.showTagDetails = async function() { 135 | window.modalManager.showModal('Tag Overview'); 136 | window.modalManager.showLoader(); 137 | 138 | try { 139 | const response = await fetch('/api/tagsCount'); 140 | const tags = await response.json(); 141 | 142 | let content = '
'; 143 | tags.forEach(tag => { 144 | content += ` 145 |
146 | ${tag.name} 147 | ${tag.document_count || 0} documents 148 |
149 | `; 150 | }); 151 | content += '
'; 152 | 153 | window.modalManager.setContent(content); 154 | } catch (error) { 155 | console.error('Error loading tags:', error); 156 | window.modalManager.setContent('
Error loading tags. Please try again later.
'); 157 | } finally { 158 | window.modalManager.hideLoader(); 159 | } 160 | } 161 | 162 | window.showCorrespondentDetails = async function() { 163 | window.modalManager.showModal('Correspondent Overview'); 164 | window.modalManager.showLoader(); 165 | 166 | try { 167 | const response = await fetch('/api/correspondentsCount'); 168 | const correspondents = await response.json(); 169 | 170 | let content = '
'; 171 | correspondents.forEach(correspondent => { 172 | content += ` 173 |
174 | ${correspondent.name} 175 | ${correspondent.document_count || 0} documents 176 |
177 | `; 178 | }); 179 | content += '
'; 180 | 181 | window.modalManager.setContent(content); 182 | } catch (error) { 183 | console.error('Error loading correspondents:', error); 184 | window.modalManager.setContent('
Error loading correspondents. Please try again later.
'); 185 | } finally { 186 | window.modalManager.hideLoader(); 187 | } 188 | } 189 | 190 | // Navigation Management 191 | class NavigationManager { 192 | constructor() { 193 | this.sidebarLinks = document.querySelectorAll('.sidebar-link'); 194 | this.initialize(); 195 | } 196 | 197 | initialize() { 198 | this.sidebarLinks.forEach(link => { 199 | link.addEventListener('click', (e) => { 200 | // Nur für Links ohne echtes Ziel preventDefault aufrufen 201 | if (link.getAttribute('href') === '#') { 202 | e.preventDefault(); 203 | } 204 | this.setActiveLink(link); 205 | }); 206 | }); 207 | } 208 | 209 | setActiveLink(activeLink) { 210 | this.sidebarLinks.forEach(link => { 211 | link.classList.remove('active'); 212 | }); 213 | activeLink.classList.add('active'); 214 | } 215 | } 216 | 217 | // API Functions 218 | async function showTagDetails() { 219 | modalManager.showModal('Tag Overview'); 220 | modalManager.showLoader(); 221 | 222 | try { 223 | const response = await fetch('/api/tags'); 224 | const tags = await response.json(); 225 | 226 | let content = '
'; 227 | tags.forEach(tag => { 228 | content += ` 229 |
230 | ${tag.name} 231 | ${tag.document_count || 0} documents 232 |
233 | `; 234 | }); 235 | content += '
'; 236 | 237 | modalManager.setContent(content); 238 | } catch (error) { 239 | console.error('Error loading tags:', error); 240 | modalManager.setContent('
Error loading tags. Please try again later.
'); 241 | } finally { 242 | modalManager.hideLoader(); 243 | } 244 | } 245 | 246 | async function showCorrespondentDetails() { 247 | modalManager.showModal('Correspondent Overview'); 248 | modalManager.showLoader(); 249 | 250 | try { 251 | const response = await fetch('/api/correspondents'); 252 | const correspondents = await response.json(); 253 | 254 | let content = '
'; 255 | correspondents.forEach(correspondent => { 256 | content += ` 257 |
258 | ${correspondent.name} 259 | ${correspondent.document_count || 0} documents 260 |
261 | `; 262 | }); 263 | content += '
'; 264 | 265 | modalManager.setContent(content); 266 | } catch (error) { 267 | console.error('Error loading correspondents:', error); 268 | modalManager.setContent('
Error loading correspondents. Please try again later.
'); 269 | } finally { 270 | modalManager.hideLoader(); 271 | } 272 | } 273 | 274 | // Initialize everything when DOM is loaded 275 | document.addEventListener('DOMContentLoaded', () => { 276 | window.themeManager = new ThemeManager(); 277 | window.navigationManager = new NavigationManager(); 278 | window.chartManager = new ChartManager(); 279 | window.modalManager = new ModalManager(); 280 | }); -------------------------------------------------------------------------------- /public/js/playground.js: -------------------------------------------------------------------------------- 1 | // Theme Management für Playground 2 | class ThemeManager { 3 | constructor() { 4 | this.themeToggle = document.getElementById('themeToggle'); 5 | this.initialize(); 6 | } 7 | 8 | initialize() { 9 | const savedTheme = localStorage.getItem('theme') || 'light'; 10 | this.setTheme(savedTheme); 11 | 12 | this.themeToggle.addEventListener('click', () => this.toggleTheme()); 13 | 14 | // Add logo to no-invert class 15 | const logo = document.querySelector('.sidebar-header img'); 16 | if (logo) { 17 | logo.classList.add('no-invert'); 18 | } 19 | } 20 | 21 | setTheme(theme) { 22 | document.documentElement.setAttribute('data-theme', theme); 23 | localStorage.setItem('theme', theme); 24 | 25 | const icon = this.themeToggle.querySelector('i'); 26 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun'; 27 | } 28 | 29 | toggleTheme() { 30 | const currentTheme = document.documentElement.getAttribute('data-theme'); 31 | const newTheme = currentTheme === 'light' ? 'dark' : 'light'; 32 | this.setTheme(newTheme); 33 | } 34 | } 35 | 36 | // Initialize everything when DOM is loaded 37 | document.addEventListener('DOMContentLoaded', () => { 38 | window.themeManager = new ThemeManager(); 39 | }); -------------------------------------------------------------------------------- /rag_ready.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/rag_ready.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi>=0.95.0 2 | uvicorn>=0.21.1 3 | python-dotenv>=1.0.0 4 | requests>=2.28.2 5 | numpy>=1.24.2 6 | torch>=2.0.0 7 | sentence-transformers>=2.2.2 8 | chromadb>=0.3.21 9 | rank-bm25>=0.2.2 10 | nltk>=3.8.1 11 | tqdm>=4.65.0 12 | pydantic>=1.10.7 13 | -------------------------------------------------------------------------------- /routes/auth.js: -------------------------------------------------------------------------------- 1 | const jwt = require('jsonwebtoken'); 2 | const config = require('../config/config'); 3 | 4 | // JWT secret key - should be moved to environment variables 5 | const JWT_SECRET = process.env.JWT_SECRET || 'your-secret-key'; 6 | 7 | // JWT middleware to verify token 8 | const authenticateJWT = (req, res, next) => { 9 | const token = req.cookies.jwt || req.headers.authorization?.split(' ')[1]; 10 | const apiKey = req.headers['x-api-key']; 11 | 12 | if (apiKey && apiKey === process.env.API_KEY) { 13 | req.user = { apiKey: true }; 14 | return next(); 15 | } 16 | 17 | if (!token) { 18 | return res.status(401).json({ message: 'Authentication required' }); 19 | } 20 | 21 | try { 22 | const decoded = jwt.verify(token, JWT_SECRET); 23 | req.user = decoded; 24 | next(); 25 | } catch (error) { 26 | return res.status(403).json({ message: 'Invalid or expired token' }); 27 | } 28 | }; 29 | 30 | const isAuthenticated = (req, res, next) => { 31 | const token = req.cookies.jwt || req.headers.authorization?.split(' ')[1]; 32 | const apiKey = req.headers['x-api-key']; 33 | 34 | if (apiKey && apiKey === process.env.API_KEY) { 35 | req.user = { apiKey: true }; 36 | return next(); 37 | } 38 | 39 | if (!token) { 40 | return res.redirect('/login'); 41 | } 42 | 43 | try { 44 | const decoded = jwt.verify(token, JWT_SECRET); 45 | req.user = decoded; 46 | next(); 47 | } catch (error) { 48 | res.clearCookie('jwt'); 49 | return res.redirect('/login'); 50 | } 51 | }; 52 | 53 | module.exports = { authenticateJWT, isAuthenticated }; -------------------------------------------------------------------------------- /routes/rag.js: -------------------------------------------------------------------------------- 1 | // routes/rag.js 2 | const express = require('express'); 3 | const router = express.Router(); 4 | const ragService = require('../services/ragService'); 5 | 6 | /** 7 | * Search documents 8 | */ 9 | router.post('/search', async (req, res) => { 10 | try { 11 | const { query, from_date, to_date, correspondent } = req.body; 12 | 13 | if (!query) { 14 | return res.status(400).json({ error: 'Query is required' }); 15 | } 16 | 17 | const filters = {}; 18 | if (from_date) filters.from_date = from_date; 19 | if (to_date) filters.to_date = to_date; 20 | if (correspondent) filters.correspondent = correspondent; 21 | 22 | const results = await ragService.search(query, filters); 23 | res.json(results); 24 | } catch (error) { 25 | console.error('Error in /api/rag/search:', error); 26 | res.status(500).json({ error: error.message || 'Internal server error' }); 27 | } 28 | }); 29 | 30 | /** 31 | * Ask a question about documents 32 | */ 33 | router.post('/ask', async (req, res) => { 34 | try { 35 | const { question } = req.body; 36 | 37 | if (!question) { 38 | return res.status(400).json({ error: 'Question is required' }); 39 | } 40 | 41 | const result = await ragService.askQuestion(question); 42 | res.json(result); 43 | } catch (error) { 44 | console.error('Error in /api/rag/ask:', error); 45 | res.status(500).json({ error: error.message || 'Internal server error' }); 46 | } 47 | }); 48 | 49 | /** 50 | * Start document indexing 51 | */ 52 | router.post('/index', async (req, res) => { 53 | try { 54 | const { force = false } = req.body; 55 | const result = await ragService.indexDocuments(force); 56 | res.json(result); 57 | } catch (error) { 58 | console.error('Error in /api/rag/index:', error); 59 | res.status(500).json({ error: error.message || 'Internal server error' }); 60 | } 61 | }); 62 | 63 | /** 64 | * Get indexing status 65 | */ 66 | router.get('/index/status', async (req, res) => { 67 | try { 68 | const status = await ragService.getIndexingStatus(); 69 | res.json(status); 70 | } catch (error) { 71 | console.error('Error in /api/rag/index/status:', error); 72 | res.status(500).json({ error: error.message || 'Internal server error' }); 73 | } 74 | }); 75 | 76 | /** 77 | * Check if updates are needed 78 | */ 79 | router.get('/index/check', async (req, res) => { 80 | try { 81 | const result = await ragService.checkForUpdates(); 82 | res.json(result); 83 | } catch (error) { 84 | console.error('Error in /api/rag/index/check:', error); 85 | res.status(500).json({ error: error.message || 'Internal server error' }); 86 | } 87 | }); 88 | 89 | /** 90 | * Get RAG service status 91 | */ 92 | router.get('/status', async (req, res) => { 93 | try { 94 | const status = await ragService.checkStatus(); 95 | const aiStatus = await ragService.getAIStatus(); 96 | // Combine RAG and AI status 97 | status.ai_status = aiStatus.status; 98 | status.ai_model = aiStatus.model; 99 | // console.log('RAG Status:', status); 100 | // console.log('AI Status:', aiStatus); 101 | res.json(status); 102 | } catch (error) { 103 | console.error('Error in /api/rag/status:', error); 104 | res.status(500).json({ error: error.message || 'Internal server error' }); 105 | } 106 | }); 107 | 108 | /** 109 | * Initialize RAG service 110 | */ 111 | router.post('/initialize', async (req, res) => { 112 | try { 113 | const { force = false } = req.body; 114 | const result = await ragService.initialize(force); 115 | res.json(result); 116 | } catch (error) { 117 | console.error('Error in /api/rag/initialize:', error); 118 | res.status(500).json({ error: error.message || 'Internal server error' }); 119 | } 120 | }); 121 | 122 | module.exports = router; 123 | -------------------------------------------------------------------------------- /services/aiServiceFactory.js: -------------------------------------------------------------------------------- 1 | const config = require('../config/config'); 2 | const openaiService = require('./openaiService'); 3 | const ollamaService = require('./ollamaService'); 4 | const customService = require('./customService'); 5 | const azureService = require('./azureService'); 6 | 7 | class AIServiceFactory { 8 | static getService() { 9 | switch (config.aiProvider) { 10 | case 'ollama': 11 | return ollamaService; 12 | case 'openai': 13 | default: 14 | return openaiService; 15 | case 'custom': 16 | return customService; 17 | case 'azure': 18 | return azureService; 19 | } 20 | } 21 | } 22 | 23 | module.exports = AIServiceFactory; -------------------------------------------------------------------------------- /services/chatService.js: -------------------------------------------------------------------------------- 1 | // services/chatService.js 2 | const OpenAIService = require('./openaiService'); 3 | const PaperlessService = require('./paperlessService'); 4 | const config = require('../config/config'); 5 | const fs = require('fs'); 6 | const path = require('path'); 7 | const os = require('os'); 8 | const stream = require('stream'); 9 | const { promisify } = require('util'); 10 | const pipeline = promisify(stream.pipeline); 11 | const { OpenAI } = require('openai'); 12 | 13 | class ChatService { 14 | constructor() { 15 | this.chats = new Map(); // Stores chat histories: documentId -> messages[] 16 | this.tempDir = path.join(os.tmpdir(), 'paperless-chat'); 17 | 18 | // Create temporary directory if it doesn't exist 19 | if (!fs.existsSync(this.tempDir)) { 20 | fs.mkdirSync(this.tempDir, { recursive: true }); 21 | } 22 | } 23 | 24 | /** 25 | * Downloads the original file from Paperless 26 | * @param {string} documentId - The ID of the document 27 | * @returns {Promise<{filePath: string, filename: string, mimeType: string}>} 28 | */ 29 | async downloadDocument(documentId) { 30 | try { 31 | const document = await PaperlessService.getDocument(documentId); 32 | const tempFilePath = path.join(this.tempDir, `${documentId}_${document.original_filename}`); 33 | 34 | // Create download stream 35 | const response = await PaperlessService.client.get(`/documents/${documentId}/download/`, { 36 | responseType: 'stream' 37 | }); 38 | 39 | // Save file temporarily 40 | await pipeline( 41 | response.data, 42 | fs.createWriteStream(tempFilePath) 43 | ); 44 | 45 | return { 46 | filePath: tempFilePath, 47 | filename: document.original_filename, 48 | mimeType: document.mime_type 49 | }; 50 | } catch (error) { 51 | console.error(`Error downloading document ${documentId}:`, error); 52 | throw error; 53 | } 54 | } 55 | 56 | /** 57 | * Initializes a new chat for a document 58 | * @param {string} documentId - The ID of the document 59 | */ 60 | async initializeChat(documentId) { 61 | try { 62 | // Get document information 63 | const document = await PaperlessService.getDocument(documentId); 64 | let documentContent; 65 | 66 | try { 67 | documentContent = await PaperlessService.getDocumentContent(documentId); 68 | } catch (error) { 69 | console.warn('Could not get direct document content, trying file download...', error); 70 | const { filePath } = await this.downloadDocument(documentId); 71 | documentContent = await fs.promises.readFile(filePath, 'utf8'); 72 | } 73 | 74 | // Create initial system prompt 75 | const messages = [ 76 | { 77 | role: "system", 78 | content: `You are a helpful assistant for the document "${document.title}". 79 | Use the following document content as context for your responses. 80 | If you don't know something or it's not in the document, please say so honestly. 81 | 82 | Document content: 83 | ${documentContent}` 84 | } 85 | ]; 86 | 87 | this.chats.set(documentId, { 88 | messages, 89 | documentTitle: document.title 90 | }); 91 | 92 | return { 93 | documentTitle: document.title, 94 | initialized: true 95 | }; 96 | } catch (error) { 97 | console.error(`Error initializing chat for document ${documentId}:`, error); 98 | throw error; 99 | } 100 | } 101 | 102 | async sendMessageStream(documentId, userMessage, res) { 103 | try { 104 | if (!this.chats.has(documentId)) { 105 | await this.initializeChat(documentId); 106 | } 107 | 108 | const chatData = this.chats.get(documentId); 109 | chatData.messages.push({ 110 | role: "user", 111 | content: userMessage 112 | }); 113 | 114 | // Set headers for SSE 115 | res.setHeader('Content-Type', 'text/event-stream'); 116 | res.setHeader('Cache-Control', 'no-cache'); 117 | res.setHeader('Connection', 'keep-alive'); 118 | 119 | let fullResponse = ''; 120 | const aiProvider = process.env.AI_PROVIDER; 121 | 122 | if (aiProvider === 'openai') { 123 | // Make sure OpenAIService is initialized 124 | OpenAIService.initialize(); 125 | 126 | // Always create a new client instance for this request to ensure it works 127 | const openai = new OpenAI({ 128 | apiKey: process.env.OPENAI_API_KEY 129 | }); 130 | 131 | const stream = await openai.chat.completions.create({ 132 | model: process.env.OPENAI_MODEL || 'gpt-4', 133 | messages: chatData.messages, 134 | stream: true, 135 | }); 136 | 137 | for await (const chunk of stream) { 138 | const content = chunk.choices[0]?.delta?.content || ''; 139 | if (content) { 140 | fullResponse += content; 141 | res.write(`data: ${JSON.stringify({ content })}\n\n`); 142 | } 143 | } 144 | } else if (aiProvider === 'custom') { 145 | // Use OpenAI SDK with custom base URL 146 | const customOpenAI = new OpenAI({ 147 | baseURL: process.env.CUSTOM_BASE_URL, 148 | apiKey: process.env.CUSTOM_API_KEY, 149 | }); 150 | 151 | const stream = await customOpenAI.chat.completions.create({ 152 | model: process.env.CUSTOM_MODEL, 153 | messages: chatData.messages, 154 | stream: true, 155 | }); 156 | 157 | for await (const chunk of stream) { 158 | const content = chunk.choices[0]?.delta?.content || ''; 159 | if (content) { 160 | fullResponse += content; 161 | res.write(`data: ${JSON.stringify({ content })}\n\n`); 162 | } 163 | } 164 | } else if (aiProvider === 'azure') { 165 | // Use OpenAI SDK with Azure configuration 166 | const azureOpenAI = new OpenAI({ 167 | apiKey: process.env.AZURE_API_KEY, 168 | baseURL: `${process.env.AZURE_ENDPOINT}/openai/deployments/${process.env.AZURE_DEPLOYMENT_NAME}`, 169 | defaultQuery: { 'api-version': process.env.AZURE_API_VERSION }, 170 | }); 171 | 172 | const stream = await azureOpenAI.chat.completions.create({ 173 | model: process.env.AZURE_DEPLOYMENT_NAME, 174 | messages: chatData.messages, 175 | stream: true, 176 | }); 177 | 178 | for await (const chunk of stream) { 179 | const content = chunk.choices[0]?.delta?.content || ''; 180 | if (content) { 181 | fullResponse += content; 182 | res.write(`data: ${JSON.stringify({ content })}\n\n`); 183 | } 184 | } 185 | } else if (aiProvider === 'ollama') { 186 | // Use OpenAI SDK for Ollama with OpenAI API compatibility 187 | const ollamaOpenAI = new OpenAI({ 188 | baseURL: `${process.env.OLLAMA_API_URL}/v1`, 189 | apiKey: 'ollama', // Ollama doesn't require a real API key but the SDK requires some value 190 | }); 191 | 192 | const stream = await ollamaOpenAI.chat.completions.create({ 193 | model: process.env.OLLAMA_MODEL, 194 | messages: chatData.messages, 195 | stream: true, 196 | }); 197 | 198 | for await (const chunk of stream) { 199 | const content = chunk.choices[0]?.delta?.content || ''; 200 | if (content) { 201 | fullResponse += content; 202 | res.write(`data: ${JSON.stringify({ content })}\n\n`); 203 | } 204 | } 205 | } else { 206 | throw new Error('AI Provider not configured'); 207 | } 208 | 209 | // Add the complete response to chat history 210 | chatData.messages.push({ 211 | role: "assistant", 212 | content: fullResponse 213 | }); 214 | this.chats.set(documentId, chatData); 215 | 216 | // End the stream 217 | res.write('data: [DONE]\n\n'); 218 | res.end(); 219 | 220 | } catch (error) { 221 | console.error(`Error in sendMessageStream:`, error); 222 | res.write(`data: ${JSON.stringify({ error: error.message })}\n\n`); 223 | res.end(); 224 | } 225 | } 226 | 227 | getChatHistory(documentId) { 228 | const chatData = this.chats.get(documentId); 229 | return chatData ? chatData.messages : []; 230 | } 231 | 232 | chatExists(documentId) { 233 | return this.chats.has(documentId); 234 | } 235 | 236 | async cleanup() { 237 | try { 238 | for (const documentId of this.chats.keys()) { 239 | await this.deleteChat(documentId); 240 | } 241 | if (fs.existsSync(this.tempDir)) { 242 | await fs.promises.rmdir(this.tempDir, { recursive: true }); 243 | } 244 | } catch (error) { 245 | console.error('Error cleaning up ChatService:', error); 246 | } 247 | } 248 | } 249 | 250 | module.exports = new ChatService(); 251 | -------------------------------------------------------------------------------- /services/customService.js: -------------------------------------------------------------------------------- 1 | const { 2 | calculateTokens, 3 | calculateTotalPromptTokens, 4 | truncateToTokenLimit, 5 | writePromptToFile 6 | } = require('./serviceUtils'); 7 | const OpenAI = require('openai'); 8 | const config = require('../config/config'); 9 | const tiktoken = require('tiktoken'); 10 | const paperlessService = require('./paperlessService'); 11 | const fs = require('fs').promises; 12 | const path = require('path'); 13 | 14 | class CustomOpenAIService { 15 | constructor() { 16 | this.client = null; 17 | this.tokenizer = null; 18 | } 19 | 20 | initialize() { 21 | if (!this.client && config.aiProvider === 'custom') { 22 | this.client = new OpenAI({ 23 | baseURL: config.custom.apiUrl, 24 | apiKey: config.custom.apiKey 25 | }); 26 | } 27 | } 28 | 29 | async analyzeDocument(content, existingTags = [], existingCorrespondentList = [], id) { 30 | const cachePath = path.join('./public/images', `${id}.png`); 31 | try { 32 | this.initialize(); 33 | const now = new Date(); 34 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' }); 35 | 36 | if (!this.client) { 37 | throw new Error('Custom OpenAI client not initialized'); 38 | } 39 | 40 | // Handle thumbnail caching 41 | try { 42 | await fs.access(cachePath); 43 | console.log('[DEBUG] Thumbnail already cached'); 44 | } catch (err) { 45 | console.log('Thumbnail not cached, fetching from Paperless'); 46 | 47 | const thumbnailData = await paperlessService.getThumbnailImage(id); 48 | 49 | if (!thumbnailData) { 50 | console.warn('Thumbnail nicht gefunden'); 51 | } 52 | 53 | await fs.mkdir(path.dirname(cachePath), { recursive: true }); 54 | await fs.writeFile(cachePath, thumbnailData); 55 | } 56 | 57 | // Format existing tags 58 | const existingTagsList = existingTags 59 | .map(tag => tag.name) 60 | .join(', '); 61 | 62 | 63 | let systemPrompt = ''; 64 | let promptTags = ''; 65 | const model = config.custom.model; 66 | // Get system prompt and model 67 | if(process.env.USE_EXISTING_DATA === 'yes') { 68 | systemPrompt = ` 69 | Prexisting tags: ${existingTagsList}\n\n 70 | Prexisiting correspondent: ${existingCorrespondentList}\n\n 71 | ` + process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt; 72 | promptTags = ''; 73 | } else { 74 | systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt; 75 | promptTags = ''; 76 | } 77 | if (process.env.USE_PROMPT_TAGS === 'yes') { 78 | promptTags = process.env.PROMPT_TAGS; 79 | systemPrompt = ` 80 | Take these tags and try to match one or more to the document content.\n\n 81 | ` + config.specialPromptPreDefinedTags; 82 | } 83 | 84 | // Calculate total prompt tokens including all components 85 | const totalPromptTokens = await calculateTotalPromptTokens( 86 | systemPrompt, 87 | process.env.USE_PROMPT_TAGS === 'yes' ? [promptTags] : [] 88 | ); 89 | 90 | // Calculate available tokens 91 | const maxTokens = Number(config.tokenLimit); // Model's maximum context length 92 | const reservedTokens = totalPromptTokens + Number(config.responseTokens); 93 | const availableTokens = maxTokens - reservedTokens; 94 | 95 | // Truncate content if necessary 96 | const truncatedContent = await truncateToTokenLimit(content, availableTokens); 97 | 98 | // Make API request 99 | const response = await this.client.chat.completions.create({ 100 | model: model, 101 | messages: [ 102 | { 103 | role: "system", 104 | content: systemPrompt 105 | }, 106 | { 107 | role: "user", 108 | content: truncatedContent 109 | } 110 | ], 111 | ...(model !== 'o3-mini' && { temperature: 0.3 }), 112 | }); 113 | 114 | // Handle response 115 | if (!response?.choices?.[0]?.message?.content) { 116 | throw new Error('Invalid API response structure'); 117 | } 118 | 119 | // Log token usage 120 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`); 121 | console.log(`[DEBUG] [${timestamp}] Total tokens: ${response.usage.total_tokens}`); 122 | 123 | const usage = response.usage; 124 | const mappedUsage = { 125 | promptTokens: usage.prompt_tokens, 126 | completionTokens: usage.completion_tokens, 127 | totalTokens: usage.total_tokens 128 | }; 129 | 130 | let jsonContent = response.choices[0].message.content; 131 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); 132 | 133 | let parsedResponse; 134 | try { 135 | parsedResponse = JSON.parse(jsonContent); 136 | } catch (error) { 137 | console.error('Failed to parse JSON response:', error); 138 | throw new Error('Invalid JSON response from API'); 139 | } 140 | 141 | // Validate response structure 142 | if (!parsedResponse || !Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') { 143 | throw new Error('Invalid response structure: missing tags array or correspondent string'); 144 | } 145 | 146 | return { 147 | document: parsedResponse, 148 | metrics: mappedUsage, 149 | truncated: truncatedContent.length < content.length 150 | }; 151 | } catch (error) { 152 | console.error('Failed to analyze document:', error); 153 | return { 154 | document: { tags: [], correspondent: null }, 155 | metrics: null, 156 | error: error.message 157 | }; 158 | } 159 | } 160 | 161 | 162 | async analyzePlayground(content, prompt) { 163 | const musthavePrompt = ` 164 | Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.: 165 | { 166 | "title": "xxxxx", 167 | "correspondent": "xxxxxxxx", 168 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"], 169 | "document_date": "YYYY-MM-DD", 170 | "language": "en/de/es/..." 171 | }`; 172 | 173 | try { 174 | this.initialize(); 175 | const now = new Date(); 176 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' }); 177 | 178 | if (!this.client) { 179 | throw new Error('OpenAI client not initialized - missing API key'); 180 | } 181 | 182 | // Calculate total prompt tokens including musthavePrompt 183 | const totalPromptTokens = await calculateTotalPromptTokens( 184 | prompt + musthavePrompt // Combined system prompt 185 | ); 186 | 187 | // Calculate available tokens 188 | const maxTokens = Number(config.tokenLimit); 189 | const reservedTokens = totalPromptTokens + Number(config.responseTokens); 190 | const availableTokens = maxTokens - reservedTokens; 191 | 192 | // Truncate content if necessary 193 | const truncatedContent = await truncateToTokenLimit(content, availableTokens); 194 | 195 | // Make API request 196 | const response = await this.client.chat.completions.create({ 197 | model: config.custom.model, 198 | messages: [ 199 | { 200 | role: "system", 201 | content: prompt + musthavePrompt 202 | }, 203 | { 204 | role: "user", 205 | content: truncatedContent 206 | } 207 | ], 208 | temperature: 0.3, 209 | }); 210 | 211 | // Handle response 212 | if (!response?.choices?.[0]?.message?.content) { 213 | throw new Error('Invalid API response structure'); 214 | } 215 | 216 | // Log token usage 217 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`); 218 | console.log(`[DEBUG] [${timestamp}] Total tokens: ${response.usage.total_tokens}`); 219 | 220 | const usage = response.usage; 221 | const mappedUsage = { 222 | promptTokens: usage.prompt_tokens, 223 | completionTokens: usage.completion_tokens, 224 | totalTokens: usage.total_tokens 225 | }; 226 | 227 | console.log(mappedUsage); 228 | 229 | let jsonContent = response.choices[0].message.content; 230 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); 231 | 232 | let parsedResponse; 233 | try { 234 | parsedResponse = JSON.parse(jsonContent); 235 | } catch (error) { 236 | console.error('Failed to parse JSON response:', error); 237 | throw new Error('Invalid JSON response from API'); 238 | } 239 | 240 | // Validate response structure 241 | if (!parsedResponse || !Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') { 242 | throw new Error('Invalid response structure: missing tags array or correspondent string'); 243 | } 244 | 245 | return { 246 | document: parsedResponse, 247 | metrics: mappedUsage, 248 | truncated: truncatedContent.length < content.length 249 | }; 250 | } catch (error) { 251 | console.error('Failed to analyze document:', error); 252 | return { 253 | document: { tags: [], correspondent: null }, 254 | metrics: null, 255 | error: error.message 256 | }; 257 | } 258 | } 259 | 260 | /** 261 | * Generate text based on a prompt 262 | * @param {string} prompt - The prompt to generate text from 263 | * @returns {Promise} - The generated text 264 | */ 265 | async generateText(prompt) { 266 | try { 267 | this.initialize(); 268 | 269 | if (!this.client) { 270 | throw new Error('Custom OpenAI client not initialized - missing API key'); 271 | } 272 | 273 | const model = config.custom.model; 274 | 275 | const response = await this.client.chat.completions.create({ 276 | model: model, 277 | messages: [ 278 | { 279 | role: "user", 280 | content: prompt 281 | } 282 | ], 283 | temperature: 0.7, 284 | max_tokens: 128000 285 | }); 286 | 287 | if (!response?.choices?.[0]?.message?.content) { 288 | throw new Error('Invalid API response structure'); 289 | } 290 | 291 | return response.choices[0].message.content; 292 | } catch (error) { 293 | console.error('Error generating text with Custom OpenAI:', error); 294 | throw error; 295 | } 296 | } 297 | 298 | async checkStatus() { 299 | try { 300 | this.initialize(); 301 | 302 | if (!this.client) { 303 | throw new Error('Custom OpenAI client not initialized - missing API key'); 304 | } 305 | 306 | const model = config.custom.model; 307 | 308 | const response = await this.client.chat.completions.create({ 309 | model: model, 310 | messages: [ 311 | { 312 | role: "user", 313 | content: 'Ping' 314 | } 315 | ], 316 | temperature: 0.7, 317 | max_tokens: 1000 318 | }); 319 | 320 | if (!response?.choices?.[0]?.message?.content) { 321 | return { status: 'error' }; 322 | } 323 | 324 | return { status: 'ok', model: model }; 325 | } catch (error) { 326 | console.error('Error generating text with Custom OpenAI:', error); 327 | return { status: 'error' }; 328 | } 329 | } 330 | } 331 | 332 | module.exports = new CustomOpenAIService(); 333 | -------------------------------------------------------------------------------- /services/debugService.js: -------------------------------------------------------------------------------- 1 | // service to debug the paperless-ngx api routes 2 | const env = require('dotenv').config(); 3 | const axios = require('axios'); 4 | const paperless_api = process.env.PAPERLESS_API_URL; 5 | const paperless_token = process.env.PAPERLESS_API_TOKEN; 6 | 7 | const getDocuments = async () => { 8 | try { 9 | const response = await axios.get(`${paperless_api}/documents/`, { 10 | headers: { 11 | 'Authorization': `Token ${paperless_token}`, 12 | 'Content-Type': 'application/json' 13 | } 14 | }); 15 | return response.data; 16 | } 17 | catch (error) { 18 | console.error('Paperless validation error:', error.message); 19 | return JSON.stringify(error); 20 | } 21 | } 22 | 23 | const getTags = async () => { 24 | try { 25 | const response = await axios.get(`${paperless_api}/tags/`, { 26 | headers: { 27 | 'Authorization': `Token ${paperless_token}`, 28 | 'Content-Type': 'application/json' 29 | } 30 | }); 31 | return response.data; 32 | } 33 | catch (error) { 34 | console.error('Paperless validation error:', error.message); 35 | return JSON.stringify(error); 36 | } 37 | } 38 | 39 | const getCorrespondents = async () => { 40 | try { 41 | const response = await axios.get(`${paperless_api}/correspondents/`, { 42 | headers: { 43 | 'Authorization': `Token ${paperless_token}`, 44 | 'Content-Type': 'application/json' 45 | } 46 | }); 47 | return response.data; 48 | } 49 | catch (error) { 50 | console.error('Paperless validation error:', error.message); 51 | return JSON.stringify(error); 52 | } 53 | } 54 | 55 | module.exports = { getDocuments, getTags, getCorrespondents }; 56 | -------------------------------------------------------------------------------- /services/documentsService.js: -------------------------------------------------------------------------------- 1 | // services/documentsService.js 2 | const paperlessService = require('./paperlessService'); 3 | 4 | class DocumentsService { 5 | constructor() { 6 | this.tagCache = new Map(); 7 | this.correspondentCache = new Map(); 8 | } 9 | 10 | async getTagNames() { 11 | if (this.tagCache.size === 0) { 12 | const tags = await paperlessService.getTags(); 13 | tags.forEach(tag => { 14 | this.tagCache.set(tag.id, tag.name); 15 | }); 16 | } 17 | return Object.fromEntries(this.tagCache); 18 | } 19 | 20 | async getCorrespondentNames() { 21 | if (this.correspondentCache.size === 0) { 22 | const correspondents = await paperlessService.listCorrespondentsNames(); 23 | correspondents.forEach(corr => { 24 | this.correspondentCache.set(corr.id, corr.name); 25 | }); 26 | } 27 | return Object.fromEntries(this.correspondentCache); 28 | } 29 | 30 | async getDocumentsWithMetadata() { 31 | const [documents, tagNames, correspondentNames] = await Promise.all([ 32 | paperlessService.getDocuments(), 33 | this.getTagNames(), 34 | this.getCorrespondentNames() 35 | ]); 36 | 37 | // Sort documents by created date (newest first) 38 | documents.sort((a, b) => new Date(b.created) - new Date(a.created)); 39 | 40 | return { 41 | documents, 42 | tagNames, 43 | correspondentNames, 44 | paperlessUrl: process.env.PAPERLESS_API_URL.replace('/api', '') 45 | }; 46 | } 47 | } 48 | 49 | module.exports = new DocumentsService(); -------------------------------------------------------------------------------- /services/loggerService.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const util = require('util'); 3 | const path = require('path'); 4 | 5 | class Logger { 6 | constructor(options = {}) { 7 | this.logFile = options.logFile || 'application.log'; 8 | this.logDir = options.logDir || 'logs'; 9 | this.timestamp = options.timestamp !== false; 10 | this.format = options.format || 'txt'; 11 | this.maxFileSize = options.maxFileSize || 1024 * 1024 * 10; // Standard: 10MB 12 | 13 | if (!fs.existsSync(this.logDir)) { 14 | fs.mkdirSync(this.logDir, { recursive: true }); 15 | } 16 | 17 | this.logPath = path.join(this.logDir, this.logFile); 18 | 19 | // Initialisiere Log-Datei 20 | this.initLogFile(); 21 | 22 | this.originalConsole = { 23 | log: console.log, 24 | error: console.error, 25 | warn: console.warn, 26 | info: console.info, 27 | debug: console.debug 28 | }; 29 | 30 | this.overrideConsoleMethods(); 31 | } 32 | 33 | initLogFile() { 34 | // Prüfe ob die Datei die maximale Größe überschreitet 35 | if (this.checkFileSize()) { 36 | // Lösche die alte Datei 37 | try { 38 | fs.unlinkSync(this.logPath); 39 | } catch (error) { 40 | // Ignoriere Fehler wenn Datei nicht existiert 41 | } 42 | } 43 | 44 | // Initialisiere HTML-Datei wenn nötig 45 | if (this.format === 'html') { 46 | this.initHtmlFile(); 47 | } 48 | } 49 | 50 | checkFileSize() { 51 | if (fs.existsSync(this.logPath)) { 52 | const stats = fs.statSync(this.logPath); 53 | return stats.size >= this.maxFileSize; 54 | } 55 | return false; 56 | } 57 | 58 | initHtmlFile() { 59 | const htmlHeader = ` 60 | 61 | 62 | 63 | 64 | Application Logs 65 | 113 | 132 | 133 | 134 |
135 | `; 136 | 137 | if (!fs.existsSync(this.logPath) || fs.statSync(this.logPath).size === 0) { 138 | fs.writeFileSync(this.logPath, htmlHeader); 139 | } 140 | } 141 | 142 | getTimestamp() { 143 | return new Date().toISOString(); 144 | } 145 | 146 | formatLogMessage(type, args) { 147 | const msg = util.format(...args); 148 | if (this.format === 'html') { 149 | const timestamp = this.timestamp ? 150 | `[${this.getTimestamp()}]` : ''; 151 | return `
152 | ${timestamp} 153 | [${type.toUpperCase()}] 154 | ${this.escapeHtml(msg)} 155 |
\n`; 156 | } else { 157 | return this.timestamp ? 158 | `[${this.getTimestamp()}] [${type.toUpperCase()}] ${msg}\n` : 159 | `[${type.toUpperCase()}] ${msg}\n`; 160 | } 161 | } 162 | 163 | escapeHtml(unsafe) { 164 | return unsafe 165 | .replace(/&/g, "&") 166 | .replace(//g, ">") 168 | .replace(/"/g, """) 169 | .replace(/'/g, "'") 170 | .replace(/\n/g, "
") 171 | .replace(/\s/g, " "); 172 | } 173 | 174 | writeToFile(message) { 175 | // Prüfe Dateigröße vor dem Schreiben 176 | if (this.checkFileSize()) { 177 | // Lösche die alte Datei 178 | fs.unlinkSync(this.logPath); 179 | 180 | // Bei HTML-Format müssen wir den Header neu schreiben 181 | if (this.format === 'html') { 182 | this.initHtmlFile(); 183 | } 184 | } 185 | 186 | fs.appendFileSync(this.logPath, message); 187 | } 188 | 189 | overrideConsoleMethods() { 190 | console.log = (...args) => { 191 | const logMessage = this.formatLogMessage('info', args); 192 | this.originalConsole.log(...args); 193 | this.writeToFile(logMessage); 194 | }; 195 | 196 | console.error = (...args) => { 197 | const logMessage = this.formatLogMessage('error', args); 198 | this.originalConsole.error(...args); 199 | this.writeToFile(logMessage); 200 | }; 201 | 202 | console.warn = (...args) => { 203 | const logMessage = this.formatLogMessage('warn', args); 204 | this.originalConsole.warn(...args); 205 | this.writeToFile(logMessage); 206 | }; 207 | 208 | console.info = (...args) => { 209 | const logMessage = this.formatLogMessage('info', args); 210 | this.originalConsole.info(...args); 211 | this.writeToFile(logMessage); 212 | }; 213 | 214 | console.debug = (...args) => { 215 | const logMessage = this.formatLogMessage('debug', args); 216 | this.originalConsole.debug(...args); 217 | this.writeToFile(logMessage); 218 | }; 219 | } 220 | 221 | closeHtmlFile() { 222 | if (this.format === 'html') { 223 | const htmlFooter = `
224 | 227 | 228 | `; 229 | this.writeToFile(htmlFooter); 230 | } 231 | } 232 | 233 | restore() { 234 | Object.assign(console, this.originalConsole); 235 | if (this.format === 'html') { 236 | this.closeHtmlFile(); 237 | } 238 | } 239 | } 240 | 241 | module.exports = Logger; -------------------------------------------------------------------------------- /services/manualService.js: -------------------------------------------------------------------------------- 1 | const { 2 | calculateTokens, 3 | calculateTotalPromptTokens, 4 | truncateToTokenLimit, 5 | writePromptToFile 6 | } = require('./serviceUtils'); 7 | const axios = require('axios'); 8 | const OpenAI = require('openai'); 9 | const config = require('../config/config'); 10 | const AzureOpenAI = require('openai').AzureOpenAI; 11 | const emptyVar = null; 12 | 13 | class ManualService { 14 | constructor() { 15 | if(config.aiProvider === 'custom'){ 16 | this.openai = new OpenAI({ 17 | apiKey: config.custom.apiKey, 18 | baseUrl: config.custom.apiUrl 19 | }); 20 | }else if (config.aiProvider === 'azure'){ 21 | this.openai = new AzureOpenAI({ 22 | apiKey: config.azure.apiKey, 23 | endpoint: config.azure.endpoint, 24 | deploymentName: config.azure.deploymentName, 25 | apiVersion: config.azure.apiVersion 26 | }); 27 | } else { 28 | this.openai = new OpenAI({ apiKey: config.openai.apiKey }); 29 | this.ollama = axios.create({ 30 | timeout: 300000 31 | }); 32 | } 33 | } 34 | 35 | 36 | async analyzeDocument(content, existingTags, provider) { 37 | try { 38 | if (provider === 'openai') { 39 | return this._analyzeOpenAI(content, existingTags); 40 | } else if (provider === 'ollama') { 41 | return this._analyzeOllama(content, existingTags); 42 | } else if (provider === 'custom') { 43 | return this._analyzeCustom(content, existingTags); 44 | } else if (provider === 'azure') { 45 | return this._analyzeAzure(content, existingTags); 46 | } else { 47 | throw new Error('Invalid provider'); 48 | } 49 | } catch (error) { 50 | console.error('Error analyzing document:', error); 51 | return { tags: [], correspondent: null }; 52 | } 53 | } 54 | 55 | async _analyzeOpenAI(content, existingTags) { 56 | try { 57 | const existingTagsList = existingTags 58 | .map(tag => tag.name) 59 | .join(', '); 60 | const model = process.env.OPENAI_MODEL; 61 | const systemPrompt = process.env.SYSTEM_PROMPT; 62 | await writePromptToFile(systemPrompt, content); 63 | const response = await this.openai.chat.completions.create({ 64 | model: model, 65 | messages: [ 66 | { 67 | role: "system", 68 | content: systemPrompt 69 | }, 70 | { 71 | role: "user", 72 | content: content 73 | } 74 | ], 75 | ...(model !== 'o3-mini' && { temperature: 0.3 }), 76 | }); 77 | 78 | let jsonContent = response.choices[0].message.content; 79 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); 80 | 81 | const parsedResponse = JSON.parse(jsonContent); 82 | try { 83 | parsedResponse = JSON.parse(jsonContent); 84 | fs.appendFile('./logs/response.txt', jsonContent, (err) => { 85 | if (err) throw err; 86 | }); 87 | } catch (error) { 88 | console.error('Failed to parse JSON response:', error); 89 | throw new Error('Invalid JSON response from API'); 90 | } 91 | 92 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') { 93 | throw new Error('Invalid response structure'); 94 | } 95 | 96 | return parsedResponse; 97 | } catch (error) { 98 | console.error('Failed to analyze document with OpenAI:', error); 99 | return { tags: [], correspondent: null }; 100 | } 101 | } 102 | 103 | async _analyzeAzure(content, existingTags) { 104 | try { 105 | const existingTagsList = existingTags 106 | .map(tag => tag.name) 107 | .join(', '); 108 | 109 | const systemPrompt = process.env.SYSTEM_PROMPT; 110 | await writePromptToFile(systemPrompt, content); 111 | const response = await this.openai.chat.completions.create({ 112 | model: process.env.AZURE_DEPLOYMENT_NAME, 113 | messages: [ 114 | { 115 | role: "system", 116 | content: systemPrompt 117 | }, 118 | { 119 | role: "user", 120 | content: content 121 | } 122 | ], 123 | temperature: 0.3, 124 | }); 125 | 126 | let jsonContent = response.choices[0].message.content; 127 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); 128 | 129 | const parsedResponse = JSON.parse(jsonContent); 130 | try { 131 | parsedResponse = JSON.parse(jsonContent); 132 | fs.appendFile('./logs/response.txt', jsonContent, (err) => { 133 | if (err) throw err; 134 | }); 135 | } catch (error) { 136 | console.error('Failed to parse JSON response:', error); 137 | throw new Error('Invalid JSON response from API'); 138 | } 139 | 140 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') { 141 | throw new Error('Invalid response structure'); 142 | } 143 | 144 | return parsedResponse; 145 | } catch (error) { 146 | console.error('Failed to analyze document with OpenAI:', error); 147 | return { tags: [], correspondent: null }; 148 | } 149 | } 150 | 151 | async _analyzeCustom(content, existingTags) { 152 | try { 153 | const existingTagsList = existingTags 154 | .map(tag => tag.name) 155 | .join(', '); 156 | 157 | const systemPrompt = process.env.SYSTEM_PROMPT; 158 | const model = config.custom.model; 159 | const response = await this.openai.chat.completions.create({ 160 | model: model, 161 | messages: [ 162 | { 163 | role: "system", 164 | content: systemPrompt 165 | }, 166 | { 167 | role: "user", 168 | content: content 169 | } 170 | ], 171 | ...(model !== 'o3-mini' && { temperature: 0.3 }), 172 | }); 173 | 174 | let jsonContent = response.choices[0].message.content; 175 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); 176 | 177 | const parsedResponse = JSON.parse(jsonContent); 178 | 179 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') { 180 | throw new Error('Invalid response structure'); 181 | } 182 | 183 | return parsedResponse; 184 | } catch (error) { 185 | console.error('Failed to analyze document with OpenAI:', error); 186 | return { tags: [], correspondent: null }; 187 | } 188 | } 189 | 190 | async _analyzeOllama(content, existingTags) { 191 | try { 192 | const prompt = process.env.SYSTEM_PROMPT; 193 | 194 | const getAvailableMemory = async () => { 195 | const totalMemory = os.totalmem(); 196 | const freeMemory = os.freemem(); 197 | const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0); 198 | const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0); 199 | return { totalMemoryMB, freeMemoryMB }; 200 | }; 201 | 202 | const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => { 203 | const totalTokenUsage = promptTokenCount + expectedResponseTokens; 204 | const maxCtxLimit = Number(config.tokenLimit); 205 | 206 | const numCtx = Math.min(totalTokenUsage, maxCtxLimit); 207 | 208 | console.log('Prompt Token Count:', promptTokenCount); 209 | console.log('Expected Response Tokens:', expectedResponseTokens); 210 | console.log('Dynamic calculated num_ctx:', numCtx); 211 | 212 | return numCtx; 213 | }; 214 | 215 | const calculatePromptTokenCount = (prompt) => { 216 | return Math.ceil(prompt.length / 4); 217 | }; 218 | 219 | const { freeMemoryMB } = await getAvailableMemory(); 220 | const expectedResponseTokens = 1024; 221 | const promptTokenCount = calculatePromptTokenCount(prompt); 222 | 223 | const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens); 224 | 225 | const response = await this.ollama.post(`${config.ollama.apiUrl}/api/generate`, { 226 | model: config.ollama.model, 227 | prompt: prompt, 228 | stream: false, 229 | options: { 230 | temperature: 0.7, 231 | top_p: 0.9, 232 | repeat_penalty: 1.1, 233 | num_ctx: numCtx, 234 | } 235 | }); 236 | 237 | if (!response.data || !response.data.response) { 238 | console.error('Unexpected Ollama response format:', response); 239 | throw new Error('Invalid response from Ollama API'); 240 | } 241 | 242 | return this._parseResponse(response.data.response); 243 | } 244 | 245 | catch (error) { 246 | if (error.code === 'ECONNABORTED') { 247 | console.error('Timeout bei der Ollama-Anfrage:', error); 248 | throw new Error('Die Analyse hat zu lange gedauert. Bitte versuchen Sie es erneut.'); 249 | } 250 | console.error('Error analyzing document with Ollama:', error); 251 | throw error; 252 | } 253 | } 254 | } 255 | 256 | module.exports = ManualService; 257 | -------------------------------------------------------------------------------- /services/ragService.js: -------------------------------------------------------------------------------- 1 | // services/ragService.js 2 | const axios = require('axios'); 3 | const config = require('../config/config'); 4 | const AIServiceFactory = require('./aiServiceFactory'); 5 | const paperlessService = require('./paperlessService'); 6 | 7 | class RagService { 8 | constructor() { 9 | this.baseUrl = process.env.RAG_SERVICE_URL || 'http://localhost:8000'; 10 | } 11 | 12 | /** 13 | * Check if the RAG service is available and ready 14 | * @returns {Promise<{status: string, index_ready: boolean, data_loaded: boolean}>} 15 | */ 16 | async checkStatus() { 17 | try { 18 | const response = await axios.get(`${this.baseUrl}/status`); 19 | //make test call to the LLM service to check if it is available 20 | return response.data; 21 | } catch (error) { 22 | console.error('Error checking RAG service status:', error.message); 23 | return { 24 | server_up: false, 25 | data_loaded: false, 26 | index_ready: false, 27 | error: error.message 28 | }; 29 | } 30 | } 31 | 32 | /** 33 | * Search for documents matching a query 34 | * @param {string} query - The search query 35 | * @param {Object} filters - Optional filters for search 36 | * @returns {Promise} - Array of search results 37 | */ 38 | async search(query, filters = {}) { 39 | try { 40 | const response = await axios.post(`${this.baseUrl}/search`, { 41 | query, 42 | ...filters 43 | }); 44 | return response.data; 45 | } catch (error) { 46 | console.error('Error searching documents:', error); 47 | throw error; 48 | } 49 | } 50 | 51 | /** 52 | * Ask a question about documents and get an AI-generated answer in the same language as the question 53 | * @param {string} question - The question to ask 54 | * @returns {Promise<{answer: string, sources: Array}>} - AI response and source documents 55 | */ 56 | async askQuestion(question) { 57 | try { 58 | // 1. Get context from the RAG service 59 | const response = await axios.post(`${this.baseUrl}/context`, { 60 | question, 61 | max_sources: 5 62 | }); 63 | 64 | const { context, sources } = response.data; 65 | 66 | // 2. Fetch full content for each source document using doc_id 67 | let enhancedContext = context; 68 | 69 | if (sources && sources.length > 0) { 70 | // Fetch full document content for each source 71 | const fullDocContents = await Promise.all( 72 | sources.map(async (source) => { 73 | if (source.doc_id) { 74 | try { 75 | const fullContent = await paperlessService.getDocumentContent(source.doc_id); 76 | return `Full document content for ${source.title || 'Document ' + source.doc_id}:\n${fullContent}`; 77 | } catch (error) { 78 | console.error(`Error fetching content for document ${source.doc_id}:`, error.message); 79 | return ''; 80 | } 81 | } 82 | return ''; 83 | }) 84 | ); 85 | 86 | // Combine original context with full document contents 87 | enhancedContext = context + '\n\n' + fullDocContents.filter(content => content).join('\n\n'); 88 | } 89 | 90 | // 3. Use AI service to generate an answer based on the enhanced context 91 | const aiService = AIServiceFactory.getService(); 92 | 93 | // Create a language-agnostic prompt that works in any language 94 | const prompt = ` 95 | You are a helpful assistant that answers questions about documents. 96 | 97 | Answer the following question precisely, based on the provided documents: 98 | 99 | Question: ${question} 100 | 101 | Context from relevant documents: 102 | ${enhancedContext} 103 | 104 | Important instructions: 105 | - Use ONLY information from the provided documents 106 | - If the answer is not contained in the documents, respond: "This information is not contained in the documents." (in the same language as the question) 107 | - Avoid assumptions or speculation beyond the given context 108 | - Answer in the same language as the question was asked 109 | - Do not mention document numbers or source references, answer as if it were a natural conversation 110 | `; 111 | 112 | let answer; 113 | try { 114 | answer = await aiService.generateText(prompt); 115 | } catch (error) { 116 | console.error('Error generating answer with AI service:', error); 117 | answer = "An error occurred while generating an answer. Please try again later."; 118 | } 119 | 120 | return { 121 | answer, 122 | sources 123 | }; 124 | } catch (error) { 125 | console.error('Error in askQuestion:', error); 126 | throw new Error("An error occurred while processing your question. Please try again later."); 127 | } 128 | } 129 | 130 | /** 131 | * Start indexing documents in the RAG service 132 | * @param {boolean} force - Whether to force refresh from source 133 | * @returns {Promise} - Indexing status 134 | */ 135 | async indexDocuments(force = false) { 136 | try { 137 | const response = await axios.post(`${this.baseUrl}/indexing/start`, { 138 | force, 139 | background: true 140 | }); 141 | return response.data; 142 | } catch (error) { 143 | console.error('Error indexing documents:', error); 144 | throw error; 145 | } 146 | } 147 | 148 | /** 149 | * Check if the RAG service needs document updates 150 | * @returns {Promise<{needs_update: boolean, message: string}>} 151 | */ 152 | async checkForUpdates() { 153 | try { 154 | const response = await axios.post(`${this.baseUrl}/indexing/check`); 155 | return response.data; 156 | } catch (error) { 157 | console.error('Error checking for updates:', error); 158 | throw error; 159 | } 160 | } 161 | 162 | /** 163 | * Get current indexing status 164 | * @returns {Promise} - Current indexing status 165 | */ 166 | async getIndexingStatus() { 167 | try { 168 | const response = await axios.get(`${this.baseUrl}/indexing/status`); 169 | return response.data; 170 | } catch (error) { 171 | console.error('Error getting indexing status:', error); 172 | throw error; 173 | } 174 | } 175 | 176 | /** 177 | * Initialize the RAG service 178 | * @param {boolean} force - Whether to force initialization 179 | * @returns {Promise} - Initialization status 180 | */ 181 | async initialize(force = false) { 182 | try { 183 | const response = await axios.post(`${this.baseUrl}/initialize`, { force }); 184 | return response.data; 185 | } catch (error) { 186 | console.error('Error initializing RAG service:', error); 187 | throw error; 188 | } 189 | } 190 | 191 | /** 192 | * Get AI status 193 | * @returns {Promise<{status: string}>} 194 | */ 195 | async getAIStatus() { 196 | try { 197 | const aiService = AIServiceFactory.getService(); 198 | const status = await aiService.checkStatus(); 199 | return status; 200 | } catch (error) { 201 | console.error('Error checking AI service status:', error); 202 | throw error; 203 | } 204 | } 205 | } 206 | 207 | 208 | module.exports = new RagService(); 209 | -------------------------------------------------------------------------------- /services/serviceUtils.js: -------------------------------------------------------------------------------- 1 | const tiktoken = require('tiktoken'); 2 | const fs = require('fs').promises; 3 | const path = require('path'); 4 | 5 | // Calculate tokens for a given text 6 | async function calculateTokens(text, model = process.env.OPENAI_MODEL || "gpt-4o-mini") { 7 | const tokenizer = tiktoken.encoding_for_model(model); 8 | return tokenizer.encode(text).length; 9 | } 10 | 11 | // Calculate total tokens for a system prompt and additional prompts 12 | async function calculateTotalPromptTokens(systemPrompt, additionalPrompts = [], model = process.env.OPENAI_MODEL || "gpt-4o-mini") { 13 | let totalTokens = 0; 14 | 15 | // Count tokens for system prompt 16 | totalTokens += await calculateTokens(systemPrompt, model); 17 | 18 | // Count tokens for additional prompts 19 | for (const prompt of additionalPrompts) { 20 | if (prompt) { // Only count if prompt exists 21 | totalTokens += await calculateTokens(prompt, model); 22 | } 23 | } 24 | 25 | // Add tokens for message formatting (approximately 4 tokens per message) 26 | const messageCount = 1 + additionalPrompts.filter(p => p).length; // Count system + valid additional prompts 27 | totalTokens += messageCount * 4; 28 | 29 | return totalTokens; 30 | } 31 | 32 | // Truncate text to fit within token limit 33 | async function truncateToTokenLimit(text, maxTokens, model = process.env.OPENAI_MODEL || "gpt-4o-mini") { 34 | 35 | const tokenizer = tiktoken.encoding_for_model(model); 36 | const tokens = tokenizer.encode(text); 37 | 38 | if (tokens.length <= maxTokens) { 39 | tokenizer.free(); 40 | return text; 41 | } 42 | 43 | const truncatedTokens = tokens.slice(0, maxTokens); 44 | const truncatedText = tokenizer.decode(truncatedTokens); 45 | tokenizer.free(); 46 | 47 | const decoder = new TextDecoder("utf-8"); 48 | return decoder.decode(truncatedText); 49 | } 50 | 51 | // Write prompt and content to a file with size management 52 | async function writePromptToFile(systemPrompt, truncatedContent, filePath = './logs/prompt.txt', maxSize = 10 * 1024 * 1024) { 53 | try { 54 | const stats = await fs.stat(filePath); 55 | if (stats.size > maxSize) { 56 | await fs.unlink(filePath); // Delete the file if it exceeds max size 57 | } 58 | } catch (error) { 59 | if (error.code !== 'ENOENT') { 60 | console.warn('[WARNING] Error checking file size:', error); 61 | } 62 | } 63 | 64 | try { 65 | await fs.appendFile(filePath, systemPrompt + truncatedContent + '\n\n'); 66 | } catch (error) { 67 | console.error('[ERROR] Error writing to file:', error); 68 | } 69 | } 70 | 71 | module.exports = { 72 | calculateTokens, 73 | calculateTotalPromptTokens, 74 | truncateToTokenLimit, 75 | writePromptToFile 76 | }; -------------------------------------------------------------------------------- /services/setupService.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs').promises; 2 | const path = require('path'); 3 | const axios = require('axios'); 4 | const { OpenAI } = require('openai'); 5 | const config = require('../config/config'); 6 | const AzureOpenAI = require('openai').AzureOpenAI; 7 | 8 | class SetupService { 9 | constructor() { 10 | this.envPath = path.join(process.cwd(), 'data', '.env'); 11 | this.configured = null; // Variable to store the configuration status 12 | } 13 | 14 | async loadConfig() { 15 | try { 16 | const envContent = await fs.readFile(this.envPath, 'utf8'); 17 | const config = {}; 18 | envContent.split('\n').forEach(line => { 19 | const [key, value] = line.split('='); 20 | if (key && value) { 21 | config[key.trim()] = value.trim(); 22 | } 23 | }); 24 | return config; 25 | } catch (error) { 26 | console.error('Error loading config:', error.message); 27 | return null; 28 | } 29 | } 30 | 31 | async validatePaperlessConfig(url, token) { 32 | try { 33 | console.log('Validating Paperless config for:', url + '/api/documents/'); 34 | const response = await axios.get(`${url}/api/documents/`, { 35 | headers: { 36 | 'Authorization': `Token ${token}` 37 | } 38 | }); 39 | return response.status === 200; 40 | } catch (error) { 41 | console.error('Paperless validation error:', error.message); 42 | return false; 43 | } 44 | } 45 | 46 | async validateApiPermissions(url, token) { 47 | for (const endpoint of ['correspondents', 'tags', 'documents', 'document_types', 'custom_fields', 'users']) { 48 | try { 49 | console.log(`Validating API permissions for ${url}/api/${endpoint}/`); 50 | const response = await axios.get(`${url}/api/${endpoint}/`, { 51 | headers: { 52 | 'Authorization': `Token ${token}` 53 | } 54 | }); 55 | console.log(`API permissions validated for ${endpoint}, ${response.status}`); 56 | if (response.status !== 200) { 57 | console.error(`API permissions validation failed for ${endpoint}`); 58 | return { success: false, message: `API permissions validation failed for endpoint '/api/${endpoint}/'` }; 59 | } 60 | } catch (error) { 61 | console.error(`API permissions validation failed for ${endpoint}:`, error.message); 62 | return { success: false, message: `API permissions validation failed for endpoint '/api/${endpoint}/'` }; 63 | } 64 | } 65 | return { success: true, message: 'API permissions validated successfully' }; 66 | } 67 | 68 | 69 | async validateOpenAIConfig(apiKey) { 70 | if (config.CONFIGURED === false) { 71 | try { 72 | const openai = new OpenAI({ apiKey }); 73 | const response = await openai.chat.completions.create({ 74 | model: "gpt-4o-mini", 75 | messages: [{ role: "user", content: "Test" }], 76 | }); 77 | const now = new Date(); 78 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' }); 79 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`); 80 | return response.choices && response.choices.length > 0; 81 | } catch (error) { 82 | console.error('OpenAI validation error:', error.message); 83 | return false; 84 | } 85 | }else{ 86 | return true; 87 | } 88 | } 89 | 90 | async validateCustomConfig(url, apiKey, model) { 91 | const config = { 92 | baseURL: url, 93 | apiKey: apiKey, 94 | model: model 95 | }; 96 | console.log('Custom AI config:', config); 97 | try { 98 | const openai = new OpenAI({ 99 | apiKey: config.apiKey, 100 | baseURL: config.baseURL, 101 | }); 102 | const completion = await openai.chat.completions.create({ 103 | messages: [{ role: "user", content: "Test" }], 104 | model: config.model, 105 | }); 106 | return completion.choices && completion.choices.length > 0; 107 | } catch (error) { 108 | console.error('Custom AI validation error:', error); 109 | return false; 110 | } 111 | } 112 | 113 | 114 | 115 | async validateOllamaConfig(url, model) { 116 | try { 117 | const response = await axios.post(`${url}/api/generate`, { 118 | model: model || 'llama3.2', 119 | prompt: 'Test', 120 | stream: false 121 | }); 122 | return response.data && response.data.response; 123 | } catch (error) { 124 | console.error('Ollama validation error:', error.message); 125 | return false; 126 | } 127 | } 128 | 129 | async validateAzureConfig(apiKey, endpoint, deploymentName, apiVersion) { 130 | console.log('Endpoint: ', endpoint); 131 | if (config.CONFIGURED === false) { 132 | try { 133 | const openai = new AzureOpenAI({ apiKey: apiKey, 134 | endpoint: endpoint, 135 | deploymentName: deploymentName, 136 | apiVersion: apiVersion }); 137 | const response = await openai.chat.completions.create({ 138 | model: "gpt-4o-mini", 139 | messages: [{ role: "user", content: "Test" }], 140 | }); 141 | const now = new Date(); 142 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' }); 143 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`); 144 | return response.choices && response.choices.length > 0; 145 | } catch (error) { 146 | console.error('OpenAI validation error:', error.message); 147 | return false; 148 | } 149 | }else{ 150 | return true; 151 | } 152 | } 153 | 154 | async validateConfig(config) { 155 | // Validate Paperless config 156 | const paperlessApiUrl = config.PAPERLESS_API_URL.replace(/\/api/g, ''); 157 | const paperlessValid = await this.validatePaperlessConfig( 158 | paperlessApiUrl, 159 | config.PAPERLESS_API_TOKEN 160 | ); 161 | 162 | if (!paperlessValid) { 163 | throw new Error('Invalid Paperless configuration'); 164 | } 165 | 166 | // Validate AI provider config 167 | const aiProvider = config.AI_PROVIDER || 'openai'; 168 | 169 | console.log('AI provider:', aiProvider); 170 | 171 | if (aiProvider === 'openai') { 172 | const openaiValid = await this.validateOpenAIConfig(config.OPENAI_API_KEY); 173 | if (!openaiValid) { 174 | throw new Error('Invalid OpenAI configuration'); 175 | } 176 | } else if (aiProvider === 'ollama') { 177 | const ollamaValid = await this.validateOllamaConfig( 178 | config.OLLAMA_API_URL || 'http://localhost:11434', 179 | config.OLLAMA_MODEL 180 | ); 181 | if (!ollamaValid) { 182 | throw new Error('Invalid Ollama configuration'); 183 | } 184 | } else if (aiProvider === 'custom') { 185 | const customValid = await this.validateCustomConfig( 186 | config.CUSTOM_BASE_URL, 187 | config.CUSTOM_API_KEY, 188 | config.CUSTOM_MODEL 189 | ); 190 | if (!customValid) { 191 | throw new Error('Invalid Custom AI configuration'); 192 | } 193 | } else if (aiProvider === 'azure') { 194 | const azureValid = await this.validateAzureConfig( 195 | config.AZURE_API_KEY, 196 | config.AZURE_ENDPOINT, 197 | config.AZURE_DEPLOYMENT_NAME, 198 | config.AZURE_API_VERSION 199 | ); 200 | if (!azureValid) { 201 | throw new Error('Invalid Azure configuration'); 202 | } 203 | } 204 | 205 | 206 | return true; 207 | } 208 | 209 | async saveConfig(config) { 210 | try { 211 | // Validate the new configuration before saving 212 | await this.validateConfig(config); 213 | 214 | const JSON_STANDARD_PROMPT = ` 215 | Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.: 216 | 217 | { 218 | "title": "xxxxx", 219 | "correspondent": "xxxxxxxx", 220 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"], 221 | "document_date": "YYYY-MM-DD", 222 | "language": "en/de/es/..." 223 | }`; 224 | 225 | // Ensure data directory exists 226 | const dataDir = path.dirname(this.envPath); 227 | await fs.mkdir(dataDir, { recursive: true }); 228 | 229 | const envContent = Object.entries(config) 230 | .map(([key, value]) => { 231 | if (key === "SYSTEM_PROMPT") { 232 | return `${key}=\`${value}\n\``; 233 | } 234 | return `${key}=${value}`; 235 | }) 236 | .join('\n'); 237 | 238 | await fs.writeFile(this.envPath, envContent); 239 | 240 | // Reload environment variables 241 | Object.entries(config).forEach(([key, value]) => { 242 | process.env[key] = value; 243 | }); 244 | } catch (error) { 245 | console.error('Error saving config:', error.message); 246 | throw error; 247 | } 248 | } 249 | 250 | async isConfigured() { 251 | if (this.configured !== null) { 252 | return this.configured; 253 | } 254 | 255 | const maxAttempts = 60; // 5 minutes = 300 seconds, attempting every 5 seconds = 60 attempts 256 | const delayBetweenAttempts = 5000; // 5 seconds in milliseconds 257 | let attempts = 0; 258 | 259 | // First check if .env exists and if PAPERLESS_API_URL is set 260 | try { 261 | // Check if .env file exists 262 | try { 263 | await fs.access(this.envPath, fs.constants.F_OK); 264 | } catch (err) { 265 | console.log('No .env file found. Starting setup process...'); 266 | this.configured = false; 267 | return false; 268 | } 269 | 270 | // Load and check for PAPERLESS_API_URL 271 | const config = await this.loadConfig(); 272 | if (!config || !config.PAPERLESS_API_URL) { 273 | console.log('PAPERLESS_API_URL not set. Starting setup process...'); 274 | this.configured = false; 275 | return false; 276 | } 277 | } catch (error) { 278 | console.error('Error checking initial configuration:', error.message); 279 | this.configured = false; 280 | return false; 281 | } 282 | 283 | const attemptConfiguration = async () => { 284 | try { 285 | // Check data directory and create if needed 286 | const dataDir = path.dirname(this.envPath); 287 | try { 288 | await fs.access(dataDir, fs.constants.F_OK); 289 | } catch (err) { 290 | console.log('Creating data directory...'); 291 | await fs.mkdir(dataDir, { recursive: true }); 292 | } 293 | 294 | // Load and validate full configuration 295 | const config = await this.loadConfig(); 296 | if (!config) { 297 | throw new Error('Failed to load configuration'); 298 | } 299 | 300 | await this.validateConfig(config); 301 | this.configured = true; 302 | return true; 303 | } catch (error) { 304 | console.error('Configuration attempt failed:', error.message); 305 | throw error; 306 | } 307 | }; 308 | 309 | // Only enter retry loop if we have PAPERLESS_API_URL set 310 | while (attempts < maxAttempts) { 311 | try { 312 | const result = await attemptConfiguration(); 313 | return result; 314 | } catch (error) { 315 | attempts++; 316 | if (attempts === maxAttempts) { 317 | console.error('Max configuration attempts reached. Final error:', error.message); 318 | this.configured = false; 319 | return false; 320 | } 321 | console.log(`Retrying configuration (attempt ${attempts}/${maxAttempts}) in 5 seconds...`); 322 | await new Promise(resolve => setTimeout(resolve, delayBetweenAttempts)); 323 | } 324 | } 325 | 326 | this.configured = false; 327 | return false; 328 | } 329 | } 330 | 331 | module.exports = new SetupService(); -------------------------------------------------------------------------------- /setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/setup.png -------------------------------------------------------------------------------- /start-services.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # start-services.sh - Script to start both Node.js and Python services 3 | 4 | # Activate virtual environment for Python 5 | source /app/venv/bin/activate 6 | 7 | # Start the Python RAG service in the background 8 | echo "Starting Python RAG service..." 9 | python main.py --host 127.0.0.1 --port 8000 --initialize & 10 | PYTHON_PID=$! 11 | 12 | # Give it a moment to initialize 13 | sleep 2 14 | echo "Python RAG service started with PID: $PYTHON_PID" 15 | 16 | # Set environment variables for the Node.js service 17 | export RAG_SERVICE_URL="http://localhost:8000" 18 | export RAG_SERVICE_ENABLED="true" 19 | 20 | # Start the Node.js application 21 | echo "Starting Node.js Paperless-AI service..." 22 | pm2-runtime ecosystem.config.js 23 | 24 | # If Node.js exits, kill the Python service 25 | kill $PYTHON_PID 26 | -------------------------------------------------------------------------------- /swagger.js: -------------------------------------------------------------------------------- 1 | const swaggerJSDoc = require('swagger-jsdoc'); 2 | 3 | const swaggerDefinition = { 4 | openapi: '3.0.0', 5 | info: { 6 | title: 'Paperless-AI API Documentation', 7 | version: '1.0.0', 8 | description: 'API documentation for the Paperless-AI application', 9 | license: { 10 | name: 'MIT', 11 | url: 'https://opensource.org/licenses/MIT', 12 | }, 13 | contact: { 14 | name: 'Clusterzx', 15 | url: 'https://github.com/Clusterzx', 16 | }, 17 | }, 18 | servers: [ 19 | { 20 | url: 'http://localhost:3000', 21 | description: 'Development server', 22 | }, 23 | // Add production server details if applicable 24 | ], 25 | components: { 26 | securitySchemes: { 27 | BearerAuth: { 28 | type: 'http', 29 | scheme: 'bearer', 30 | bearerFormat: 'JWT', 31 | description: 'JWT authentication token obtained from the /login endpoint. The token should be included in the Authorization header as "Bearer {token}".' 32 | }, 33 | ApiKeyAuth: { 34 | type: 'apiKey', 35 | in: 'header', 36 | name: 'x-api-key', 37 | description: 'API key for programmatic access. This key can be generated or regenerated using the /api/key-regenerate endpoint. Include the key in the x-api-key header for authentication.' 38 | }, 39 | }, 40 | }, 41 | security: [ 42 | { BearerAuth: [] }, 43 | { ApiKeyAuth: [] } 44 | ] 45 | }; 46 | 47 | const options = { 48 | swaggerDefinition, 49 | apis: ['./server.js', './routes/*.js', './schemas.js'], // Path to the API docs 50 | }; 51 | 52 | const swaggerSpec = swaggerJSDoc(options); 53 | 54 | module.exports = swaggerSpec; -------------------------------------------------------------------------------- /views/chat.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Paperless-AI Chat 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 |
21 | 24 | 25 | 26 | 64 | 65 | 66 |
67 |
68 |
69 |

Document Chat

70 |
71 | 72 |
73 | 74 |
75 |

Select Document

76 | 82 |
83 | 84 | 85 |
86 | 87 |
88 |

Please select a document to start the chat.

89 |
90 | 91 | 92 | 95 | 96 | 97 | 110 |
111 |
112 |
113 |
114 |
115 | 163 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /views/debug.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Debug Interface 7 | 8 | 9 | 10 | 11 | 12 | 55 | 56 | 57 |
58 |
59 |

Debug Interface

60 |
61 |
62 |
63 | 66 | 74 |
75 | 81 |
82 |
83 |
84 |

Response

85 |
86 |
87 |
88 |
89 | 134 | 135 | -------------------------------------------------------------------------------- /views/history.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Modified Documents - Paperless-AI 8 | 9 | 10 | 11 | 12 | 41 | 42 | 43 | 46 | 47 |
48 | 51 | 52 | 53 | 86 | 87 |
88 |
89 |
90 |

Modified Documents

91 |
92 | 95 | 98 |
99 |
100 | 101 | 102 |
103 |
104 | 110 |
111 | 112 |
113 | 119 |
120 |
121 | 122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 |
IDTitleTagsCorrespondentActions
137 |
138 |
139 |
140 |
141 |
142 | 143 | 144 | 160 | 161 | 162 | 178 | 226 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /views/index.ejs: -------------------------------------------------------------------------------- 1 |
2 |

Paperless-AI Dashboard

3 | 4 |
5 | <% if (documents && documents.length > 0) { %> 6 | <% documents.forEach(function(doc) { %> 7 |
8 |

<%= doc.title %>

9 |

Created at: <%= new Date(doc.created).toLocaleDateString('de-DE') %>

10 |

Tags: 11 | <% if (doc.tags && doc.tags.length > 0) { %> 12 | <% doc.tags.forEach(function(tag) { %> 13 | <%= tag %> 14 | <% }); %> 15 | <% } else { %> 16 | Keine Tags 17 | <% } %> 18 |

19 |

Correspondent: <%= doc.correspondent || 'Nicht zugewiesen' %>

20 |
21 | <% }); %> 22 | <% } else { %> 23 |

No documents found.

24 | <% } %> 25 |
26 |
-------------------------------------------------------------------------------- /views/layout.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Paperless Assistant 5 | 33 | 34 | 35 |
36 | <%- content %> 37 |
38 | 39 | -------------------------------------------------------------------------------- /views/login.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Login - Paperless-AI 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 |
17 |
18 |
19 |
20 | Paperless AI Logo 21 |

Paperless-AI

22 |
23 |

Sign in to your account

24 |
25 | 26 |
27 |
28 | 29 | 35 |
36 | 37 |
38 | 39 | 45 |
46 | 47 | <% if (error) { %> 48 |
49 | <%= error %> 50 |
51 | <% } %> 52 | 53 |
54 | 58 |
59 |
60 |
61 |
62 | 63 | 98 | 99 | -------------------------------------------------------------------------------- /views/template.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Paperless-AI Dashboard 7 | 8 | 9 | 10 | 11 | 12 | 13 | 16 | 17 |
18 | 19 | 62 | 63 | 64 |
65 | 66 |
67 |
68 | 69 | 70 | --------------------------------------------------------------------------------