├── .dockerignore
├── .env.example
├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ └── feature_request.md
└── workflows
│ ├── docker-build-push.yml
│ ├── manualPush.yml
│ ├── release-to-discord.yml
│ └── stale.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile.rag
├── LICENSE
├── OPENAPI
└── openapi.json
├── PRIVACY_POLICY.md
├── RAG-DEV-GUIDE.md
├── README.md
├── SECURITY.md
├── config
└── config.js
├── dashboard.png
├── docker-compose.yml
├── docs
├── README.md
├── chat.png
├── favicon.ico
├── hero.png
├── history.png
├── index.html
├── playground.png
├── ppai_icon.png
└── settings.png
├── ecosystem.config.js
├── eslint.config.mjs
├── icon.png
├── icon.webp
├── jsdoc_standards.md
├── main.py
├── models
└── document.js
├── package-lock.json
├── package-lock.json.bak
├── package.json
├── paperless-ai-chrome.zip
├── ppairag.png
├── prettierrc.json
├── preview.png
├── public
├── css
│ ├── chat.css
│ ├── dashboard.css
│ ├── settings.css
│ └── setup.css
├── favicon.ico
└── js
│ ├── chat.js
│ ├── dashboard.js
│ ├── history.js
│ ├── manual.js
│ ├── playground-analyzer.js
│ ├── playground.js
│ ├── settings.js
│ └── setup.js
├── rag_ready.png
├── requirements.txt
├── routes
├── auth.js
├── rag.js
└── setup.js
├── schemas.js
├── server.js
├── services
├── aiServiceFactory.js
├── azureService.js
├── chatService.js
├── customService.js
├── debugService.js
├── documentsService.js
├── loggerService.js
├── manualService.js
├── ollamaService.js
├── openaiService.js
├── paperlessService.js
├── ragService.js
├── serviceUtils.js
└── setupService.js
├── setup.png
├── start-services.sh
├── swagger.js
└── views
├── chat.ejs
├── dashboard.ejs
├── debug.ejs
├── history.ejs
├── index.ejs
├── layout.ejs
├── login.ejs
├── manual.ejs
├── manual.ejs.bak
├── playground.ejs
├── rag.ejs
├── settings.ejs
├── setup.ejs
└── template.ejs
/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | npm-debug.log
3 | data
4 | .git
5 | .gitignore
6 | .env
7 | *.md
8 | api-test.ps1
9 | openai_debug.log
10 | prompt.bak
11 | data_bak/
12 | data_bak2/
13 | data_*/
14 | preview.png
15 | .env.bak
16 | data bak/
17 | .env*
18 | docker-compose-dev.yml
19 | public/images/
20 | delete_all.js
21 | documents.json
22 | apitest.js
23 | logs/*
24 | api_correspondent.js
25 | prompt.txt
26 | api_test_r1.js
27 | openrouter.js
28 | /chromadb
29 | rag_config.conf
30 | indexed.conf
31 | indexing_complete.flag
32 | RAGZ_README.md
33 | /chromadb
34 | /chromadb/*
35 | rag_config.conf
36 | indexed.conf
37 | main.py.bak
38 | __pycache__/
39 |
40 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | PAPERLESS_AI_INITIAL_SETUP=yes
2 | PAPERLESS_API_URL=http://localhost:8000/api
3 | PAPERLESS_API_TOKEN=xxxxxxxxxxxxxxxxxxxx
4 | PAPERLESS_USERNAME=clusterzx
5 | AI_PROVIDER=custom
6 | OPENAI_API_KEY=
7 | OPENAI_MODEL=
8 | OLLAMA_API_URL=http://localhost:11434
9 | OLLAMA_MODEL=llama3.2
10 | SCAN_INTERVAL=*/30 * * * *
11 | SYSTEM_PROMPT=`You are a personalized document analyzer. Your task is to analyze documents and extract relevant information.\n\nAnalyze the document content and extract the following information into a structured JSON object:\n\n1. title: Create a concise, meaningful title for the document\n2. correspondent: Identify the sender/institution but do not include addresses\n3. tags: Select up to 4 relevant thematic tags\n4. document_date: Extract the document date (format: YYYY-MM-DD)\n5. language: Determine the document language (e.g. "de" or "en")\n \nImportant rules for the analysis:\n\nFor tags:\n- FIRST check the existing tags before suggesting new ones\n- Use only relevant categories\n- Maximum 4 tags per document, less if sufficient (at least 1)\n- Avoid generic or too specific tags\n- Use only the most important information for tag creation\n- The output language is the one used in the document! IMPORTANT!\n\nFor the title:\n- Short and concise, NO ADDRESSES\n- Contains the most important identification features\n- For invoices/orders, mention invoice/order number if available\n- The output language is the one used in the document! IMPORTANT!\n\nFor the correspondent:\n- Identify the sender or institution\n When generating the correspondent, always create the shortest possible form of the company name (e.g. "Amazon" instead of "Amazon EU SARL, German branch")\n\nFor the document date:\n- Extract the date of the document\n- Use the format YYYY-MM-DD\n- If multiple dates are present, use the most relevant one\n\nFor the language:\n- Determine the document language\n- Use language codes like "de" for German or "en" for English\n- If the language is not clear, use "und" as a placeholder
12 | `
13 | PROCESS_PREDEFINED_DOCUMENTS=yes
14 | TAGS=pre-process
15 | ADD_AI_PROCESSED_TAG=no
16 | AI_PROCESSED_TAG_NAME=ai-processed
17 | USE_PROMPT_TAGS=no
18 | PROMPT_TAGS=
19 | USE_EXISTING_DATA=no
20 | API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
21 | CUSTOM_API_KEY=xxxxxxxxxxxxxxxxxxxxxxx
22 | CUSTOM_BASE_URL=https://api.deepseek.com/v1
23 | CUSTOM_MODEL=deepseek-chat
24 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [clusterzx]
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: "🐞 Bug Report"
2 | description: "Report a bug or unexpected behavior"
3 | title: "[Bug]: "
4 | labels: ["bug", "triage"]
5 | assignees:
6 | - ""
7 | projects: ["clusterzx/paperless-ai"]
8 |
9 | body:
10 | - type: markdown
11 | attributes:
12 | value: |
13 | ## 🐞 Bug Report
14 | Thanks for taking the time to report an issue! Please fill in as much detail as possible to help us investigate and fix it.
15 |
16 | **⚠️ IMPORTANT: Issues that are missing required information or do not follow this template will be deleted without notice. Please ensure all required fields are completed.**
17 |
18 | - type: input
19 | id: bug_summary
20 | attributes:
21 | label: "🔍 Bug Summary"
22 | description: "Briefly describe the issue."
23 | placeholder: "A short, clear summary of the bug..."
24 | validations:
25 | required: true
26 |
27 | - type: textarea
28 | id: bug_description
29 | attributes:
30 | label: "📖 Description"
31 | description: "Provide a detailed description of the bug, including any observations."
32 | placeholder: "Explain the issue, expected behavior, and actual behavior."
33 | validations:
34 | required: true
35 |
36 | - type: textarea
37 | id: reproduction_steps
38 | attributes:
39 | label: "🔄 Steps to Reproduce"
40 | description: "How do we reproduce the issue?"
41 | placeholder: |
42 | 1. Go to '...'
43 | 2. Click on '...'
44 | 3. Scroll down to '...'
45 | 4. See error
46 | validations:
47 | required: true
48 |
49 | - type: textarea
50 | id: expected_behavior
51 | attributes:
52 | label: "✅ Expected Behavior"
53 | description: "What should happen instead?"
54 | placeholder: "Describe the expected outcome."
55 | validations:
56 | required: true
57 |
58 | - type: textarea
59 | id: actual_behavior
60 | attributes:
61 | label: "❌ Actual Behavior"
62 | description: "What actually happens?"
63 | placeholder: "Describe what you see instead of the expected behavior."
64 | validations:
65 | required: true
66 |
67 | - type: input
68 | id: paperless_ai_version
69 | attributes:
70 | label: "🏷️ Paperless-AI Version"
71 | description: "What version of Paperless-AI are you using? You can find this in your settings or docker-compose file."
72 | placeholder: "e.g. v1.2.3, latest, commit hash, etc."
73 | validations:
74 | required: true
75 |
76 | - type: textarea
77 | id: docker_logs
78 | attributes:
79 | label: "📜 Docker Logs"
80 | description: "Upload Docker logs (or paste relevant log snippets)."
81 | placeholder: "Drag & drop files here or paste logs."
82 | render: shell
83 | validations:
84 | required: true
85 |
86 | - type: textarea
87 | id: paperless_ngx_logs
88 | attributes:
89 | label: "📜 Paperless-ngx Logs"
90 | description: "Upload Paperless-ngx logs (or paste relevant log snippets)."
91 | placeholder: "Drag & drop files here or paste logs."
92 | render: shell
93 |
94 | - type: textarea
95 | id: screenshots
96 | attributes:
97 | label: "🖼️ Screenshots of your settings page"
98 | description: "Attach screenshots of your currenct paperless-ai settings."
99 | placeholder: "Drag & drop images or provide a link."
100 |
101 | - type: dropdown
102 | id: operating_system
103 | attributes:
104 | label: "🖥️ Desktop Environment"
105 | description: "Select the OS where the issue occurs."
106 | options:
107 | - "Windows"
108 | - "macOS"
109 | - "Linux"
110 | - "Other"
111 | validations:
112 | required: true
113 |
114 | - type: input
115 | id: os_version
116 | attributes:
117 | label: "💻 OS Version"
118 | description: "Enter your OS version."
119 | placeholder: "e.g. Windows 11, macOS 14, Ubuntu 22.04"
120 |
121 | - type: dropdown
122 | id: browser
123 | attributes:
124 | label: "🌐 Browser"
125 | description: "Which browser are you using (if applicable)?"
126 | options:
127 | - "Chrome"
128 | - "Safari"
129 | - "Firefox"
130 | - "Edge"
131 | - "Other"
132 |
133 | - type: input
134 | id: browser_version
135 | attributes:
136 | label: "🔢 Browser Version"
137 | description: "Enter your browser version (if applicable)."
138 | placeholder: "e.g. 120.0.6099.199"
139 |
140 | - type: input
141 | id: smartphone_browser
142 | attributes:
143 | label: "🌐 Mobile Browser"
144 | description: "Specify the browser you used on mobile (if applicable)."
145 | placeholder: "e.g. Safari, Chrome, Firefox"
146 |
147 | - type: checkboxes
148 | id: additional_info
149 | attributes:
150 | label: "📝 Additional Information"
151 | description: "Select any that apply."
152 | options:
153 | - label: "I have checked existing issues and this is not a duplicate"
154 | required: true
155 | - label: "I have tried debugging this issue on my own"
156 | - label: "I can provide a fix and submit a PR"
157 | - label: "I am sure that this problem is affecting everyone, not only me"
158 | - label: "I have provided all required information above"
159 | required: true
160 |
161 | - type: textarea
162 | id: extra_notes
163 | attributes:
164 | label: "📌 Extra Notes"
165 | description: "Anything else you'd like to add?"
166 | placeholder: "Additional comments or findings..."
167 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/docker-build-push.yml:
--------------------------------------------------------------------------------
1 | on:
2 | release:
3 | types: [published] # Only triggers when a release is published
4 | schedule:
5 | - cron: "0 0 * * *" # Nightly build
6 | jobs:
7 | docker:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout code
11 | uses: actions/checkout@v3
12 | with:
13 | fetch-depth: 0
14 |
15 | - name: Set up QEMU
16 | uses: docker/setup-qemu-action@v2
17 |
18 | - name: Set up Docker Buildx
19 | uses: docker/setup-buildx-action@v2
20 |
21 | - name: Log in to Docker Hub
22 | uses: docker/login-action@v2
23 | with:
24 | username: ${{ secrets.DOCKER_USERNAME }}
25 | password: ${{ secrets.DOCKER_PASSWORD }}
26 |
27 | - name: Login to GitHub Container Registry
28 | uses: docker/login-action@v2
29 | with:
30 | registry: ghcr.io
31 | username: ${{ github.repository_owner }}
32 | password: ${{ secrets.GHCR_PAT }}
33 |
34 | - name: Extract metadata
35 | id: meta
36 | uses: docker/metadata-action@v4
37 | with:
38 | images: |
39 | ${{ secrets.DOCKER_USERNAME }}/paperless-ai
40 | ghcr.io/${{ github.repository_owner }}/paperless-ai
41 | tags: |
42 | type=schedule,pattern=nightly
43 | type=semver,pattern={{version}},enable=${{ github.event_name == 'release' }}
44 | type=raw,value=latest,enable=${{ github.event_name == 'release' }}
45 |
46 | - name: Build and push
47 | uses: docker/build-push-action@v4
48 | with:
49 | context: .
50 | push: true
51 | platforms: linux/amd64,linux/arm64
52 | tags: ${{ steps.meta.outputs.tags }}
53 | labels: ${{ steps.meta.outputs.labels }}
54 |
--------------------------------------------------------------------------------
/.github/workflows/manualPush.yml:
--------------------------------------------------------------------------------
1 | name: Manual Docker Build and Push
2 |
3 | on:
4 | workflow_dispatch: # Ermöglicht manuelle Auslösung über GitHub UI
5 | inputs:
6 | tag:
7 | description: 'Custom tag (optional, defaults to latest)'
8 | required: false
9 | default: 'latest'
10 | type: string
11 |
12 | jobs:
13 | docker:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout code
17 | uses: actions/checkout@v3
18 | with:
19 | fetch-depth: 0
20 |
21 | - name: Set up QEMU
22 | uses: docker/setup-qemu-action@v2
23 |
24 | - name: Set up Docker Buildx
25 | uses: docker/setup-buildx-action@v2
26 |
27 | - name: Log in to Docker Hub
28 | uses: docker/login-action@v2
29 | with:
30 | username: ${{ secrets.DOCKER_USERNAME }}
31 | password: ${{ secrets.DOCKER_PASSWORD }}
32 |
33 | - name: Login to GitHub Container Registry
34 | uses: docker/login-action@v2
35 | with:
36 | registry: ghcr.io
37 | username: ${{ github.repository_owner }}
38 | password: ${{ secrets.GHCR_PAT }}
39 |
40 | - name: Extract metadata
41 | id: meta
42 | uses: docker/metadata-action@v4
43 | with:
44 | images: |
45 | ${{ secrets.DOCKER_USERNAME }}/paperless-ai
46 | ghcr.io/${{ github.repository_owner }}/paperless-ai
47 | tags: |
48 | type=raw,value=${{ inputs.tag }}
49 | type=sha,prefix={{branch}}-
50 |
51 | - name: Build and push
52 | uses: docker/build-push-action@v4
53 | with:
54 | context: .
55 | push: true
56 | platforms: linux/amd64,linux/arm64
57 | tags: ${{ steps.meta.outputs.tags }}
58 | labels: ${{ steps.meta.outputs.labels }}
59 |
60 | - name: Summary
61 | run: |
62 | echo "## 🚀 Docker Build Complete" >> $GITHUB_STEP_SUMMARY
63 | echo "Successfully built and pushed Docker images with tag: **${{ inputs.tag }}**" >> $GITHUB_STEP_SUMMARY
64 | echo "" >> $GITHUB_STEP_SUMMARY
65 | echo "### Images pushed to:" >> $GITHUB_STEP_SUMMARY
66 | echo "- Docker Hub: \`${{ secrets.DOCKER_USERNAME }}/paperless-ai:${{ inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY
67 | echo "- GitHub Container Registry: \`ghcr.io/${{ github.repository_owner }}/paperless-ai:${{ inputs.tag }}\`" >> $GITHUB_STEP_SUMMARY
68 |
--------------------------------------------------------------------------------
/.github/workflows/release-to-discord.yml:
--------------------------------------------------------------------------------
1 | name: Notify Discord on Release
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | discord_notification:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Send Discord Notification
12 | uses: sarisia/actions-status-discord@v1.15.3
13 | with:
14 | webhook: ${{ secrets.DISCORD_WEBHOOK_URL }}
15 | username: "GitHub Bot 🤖"
16 | avatar_url: "https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png"
17 | content: |
18 | **🚀 New Release Published!**
19 | 📌 **Repository:** `${{ github.repository }}`
20 | 🏷️ **Version:** `${{ github.event.release.tag_name }}`
21 | 📜 **Description:**
22 | ```
23 | ${{ github.event.release.body }}
24 | ```
25 | 🔗 **[View Release](${{ github.event.release.html_url }})**
26 | embeds: |
27 | [
28 | {
29 | "title": "🎉 New Release: ${{ github.event.release.tag_name }}",
30 | "description": "${{ github.event.release.body }}",
31 | "url": "${{ github.event.release.html_url }}",
32 | "color": 16776960,
33 | "footer": {
34 | "text": "GitHub Actions",
35 | "icon_url": "https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png"
36 | },
37 | "timestamp": "${{ github.event.release.published_at }}"
38 | }
39 | ]
40 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Mark stale issues and pull requests
2 |
3 | on:
4 | schedule:
5 | - cron: "0 0 * * *" # Runs daily at midnight
6 |
7 | jobs:
8 | stale:
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - name: Close stale issues and pull requests
13 | uses: actions/stale@v8
14 | with:
15 | repo-token: ${{ secrets.stale_bot }}
16 | stale-issue-message: "This issue has been marked as stale due to inactivity. Please respond to keep it open."
17 | stale-pr-message: "This pull request has been marked as stale due to inactivity. Please update it to keep it open."
18 | close-issue-message: "This issue has been closed due to lack of response."
19 | close-pr-message: "This pull request has been closed due to lack of response."
20 | days-before-stale: 7 # Days before an issue or PR is marked as stale
21 | days-before-close: 3 # Days before a stale issue or PR is closed
22 | stale-issue-label: "stale" # Label added to stale issues
23 | exempt-issue-labels: "wontfix,bug" # Labels that exempt issues from being marked as stale
24 | only-issues: true # Only affect issues, not PRs
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .env.bak
3 | .dockerignore
4 | data/
5 | node_modules/
6 | api-test.ps1
7 | openai_debug.log
8 | prompt.bak
9 | data bak/
10 | data_bak/
11 | data_*/
12 | .env*
13 | docker-compose-dev.yml
14 | public/images/
15 | delete_all.js
16 | documents.json
17 | apitest.js
18 | logs/*
19 | api_correspondent.js
20 | prompt.txt
21 | api_test_r1.js
22 | openrouter.js
23 | /chromadb
24 | rag_config.conf
25 | indexed.conf
26 | indexing_complete.flag
27 | RAGZ_README.md
28 | /chromadb
29 | /chromadb/*
30 | rag_config.conf
31 | indexed.conf
32 | main.py.bak
33 | indexing_complete.flag
34 | __pycache__/
35 | main.pyy*
36 | rag.ejs.old
37 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | .
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series
86 | of actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or
93 | permanent ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 |
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 |
124 | [homepage]: https://www.contributor-covenant.org
125 |
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Contributing
2 |
3 | 1. Fork the repository
4 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
5 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
6 | 4. Push to the branch (`git push origin feature/AmazingFeature`)
7 | 5. Open a Pull Request
8 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use a slim Node.js (LTS) image as base
2 | FROM node:22-slim
3 |
4 | WORKDIR /app
5 |
6 | # Install system dependencies and clean up in single layer
7 | RUN apt-get update && \
8 | apt-get install -y --no-install-recommends \
9 | python3 \
10 | python3-pip \
11 | python3-dev \
12 | python3-venv \
13 | make \
14 | g++ \
15 | curl \
16 | wget && \
17 | apt-get clean && \
18 | rm -rf /var/lib/apt/lists/*
19 |
20 | # Install PM2 process manager globally
21 | RUN npm install pm2 -g
22 |
23 | # Install Python dependencies for RAG service in a virtual environment
24 | COPY requirements.txt /app/
25 | RUN python3 -m venv /app/venv
26 | ENV PATH="/app/venv/bin:$PATH"
27 | RUN pip install --upgrade pip && pip install --no-cache-dir -r requirements.txt
28 |
29 | # Copy package files for dependency installation
30 | COPY package*.json ./
31 |
32 | # Install node dependencies with clean install
33 | RUN npm ci --only=production && npm cache clean --force
34 |
35 | # Copy application source code
36 | COPY . .
37 |
38 | # Make startup script executable
39 | RUN chmod +x start-services.sh
40 |
41 | # Configure persistent data volume
42 | VOLUME ["/app/data"]
43 |
44 | # Configure application port - aber der tatsächliche Port wird durch PAPERLESS_AI_PORT bestimmt
45 | EXPOSE ${PAPERLESS_AI_PORT:-3000}
46 |
47 | # Add health check with dynamic port
48 | HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
49 | CMD curl -f http://localhost:${PAPERLESS_AI_PORT:-3000}/health || exit 1
50 |
51 | # Set production environment
52 | ENV NODE_ENV=production
53 |
54 | # Start both Node.js and Python services using our script
55 | CMD ["./start-services.sh"]
56 |
--------------------------------------------------------------------------------
/Dockerfile.rag:
--------------------------------------------------------------------------------
1 | FROM python:3.10-slim
2 |
3 | WORKDIR /app
4 |
5 | # Install system dependencies
6 | RUN apt-get update && apt-get install -y \
7 | build-essential \
8 | && rm -rf /var/lib/apt/lists/*
9 |
10 | # Copy requirements file
11 | COPY requirements.txt /app/
12 |
13 | # Install Python dependencies
14 | RUN pip install --no-cache-dir -r requirements.txt
15 |
16 | # Copy Python code
17 | COPY main.py /app/
18 |
19 | # Create necessary directories
20 | RUN mkdir -p /app/data /app/data/chromadb
21 |
22 | # Expose port for FastAPI
23 | EXPOSE 8000
24 |
25 | # Run the application with auto-initialization
26 | CMD ["python", "main.py", "--host", "0.0.0.0", "--port", "8000", "--initialize"]
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 clusterzx
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/PRIVACY_POLICY.md:
--------------------------------------------------------------------------------
1 | # Privacy Policy for Paperless-AI Chat Extension
2 |
3 | Last updated: 16.01.2025
4 |
5 | ## 1. General Information
6 |
7 | The Paperless-AI Chat Extension ("the Extension") is a browser extension designed to enhance document interaction in Paperless-ngx through AI-powered chat functionality. We are committed to protecting your privacy and personal data.
8 |
9 | ## 2. Data Controller
10 |
11 | Email: clusterz[at]protonmail.com
12 |
13 | ## 3. Data Collection and Processing
14 |
15 | ### 3.1 Stored Data
16 | The Extension stores the following data locally in your browser:
17 | - URL of your Paperless-ngx installation
18 | - URL of your Paperless-AI server
19 | - API key for the Paperless-AI service
20 |
21 | This data is stored exclusively in the Chrome Storage Sync API and is only accessible by the Extension.
22 |
23 | ### 3.2 Document Content Processing
24 | - The Extension only accesses document content when you actively use the chat function for a specific document
25 | - Document contents are transmitted exclusively to your configured Paperless-AI server
26 | - No document content is transmitted to third parties
27 |
28 | ### 3.3 Chat History
29 | - Chat histories are only temporarily held in browser memory
30 | - This data is deleted when closing the chat window
31 | - No permanent storage of chat histories occurs in the Extension
32 |
33 | ## 4. Data Transmission
34 |
35 | The Extension transmits data exclusively to:
36 | - Your self-hosted Paperless-ngx installation
37 | - Your self-configured Paperless-AI server
38 |
39 | No data is transmitted to the Extension developers or other third parties.
40 |
41 | ## 5. Permissions
42 |
43 | The Extension requires the following browser permissions:
44 | - "storage": For saving your configuration settings
45 | - "activeTab": For integrating chat functionality into the Paperless-ngx interface
46 | - "host_permissions": For communication with your Paperless-ngx and Paperless-AI servers
47 |
48 | ## 6. Data Security
49 |
50 | - All communication with your servers is encrypted via HTTPS
51 | - The API key is securely stored in the Chrome Storage system
52 | - The Extension implements best practices for handling sensitive data
53 |
54 | ## 7. Your Rights
55 |
56 | You have the right to:
57 | - Uninstall the Extension at any time
58 | - Delete your stored settings
59 | - Cease using the Extension at any time
60 |
61 | Under GDPR, you also have the following rights:
62 | - Right to access your personal data
63 | - Right to rectification
64 | - Right to erasure ("right to be forgotten")
65 | - Right to restrict processing
66 | - Right to data portability
67 | - Right to object
68 |
69 | ## 8. Changes to Privacy Policy
70 |
71 | We reserve the right to modify this privacy policy when necessary, in compliance with applicable data protection regulations. The current version can always be found at [Link to Privacy Policy].
72 |
73 | ## 9. Contact
74 |
75 | If you have any questions about data protection, you can contact us at any time:
76 | clusterz[at]protonmail.com
77 |
78 | ## 10. Consent
79 |
80 | By installing and using the Extension, you agree to this privacy policy. You can withdraw your consent at any time by uninstalling the Extension.
81 |
82 | ## 11. Technical Details
83 |
84 | ### 11.1 Data Storage Location
85 | All configuration data is stored locally in your browser using Chrome's secure storage APIs. No data is stored on our servers.
86 |
87 | ### 11.2 Data Processing
88 | - Document content is processed only when explicitly requested through the chat interface
89 | - Processing occurs on your configured Paperless-AI server
90 | - No content caching or storage occurs within the Extension
91 |
92 | ### 11.3 Security Measures
93 | - All API communications use HTTPS encryption
94 | - API keys are stored using Chrome's secure storage system
95 | - No logging or tracking of user activities
96 | - No analytics or tracking code is included in the Extension
97 |
98 | ## 12. Children's Privacy
99 |
100 | The Extension is not intended for use by children under the age of 13. We do not knowingly collect or process data from children under 13 years of age.
101 |
102 | ## 13. International Data Transfers
103 |
104 | As the Extension operates entirely within your browser and communicates only with servers you configure, no international data transfers occur through our services.
105 |
--------------------------------------------------------------------------------
/RAG-DEV-GUIDE.md:
--------------------------------------------------------------------------------
1 | # Using the RAG Service in Development Mode
2 |
3 | This guide explains how to run the Paperless-AI application with the RAG service in a local development environment without Docker.
4 |
5 | ## Understanding the Architecture
6 |
7 | The integration consists of two main components:
8 |
9 | 1. **Python RAG Service (main.py)**: Handles document indexing, search, and context retrieval
10 | 2. **Node.js Integration**: Manages the UI, communicates with the Python service, and uses LLMs to generate responses
11 |
12 | In production, both services run in the same Docker container, but for development, you can run them separately.
13 |
14 | ## Prerequisites
15 |
16 | - Node.js 16+ for the main Paperless-AI application
17 | - Python 3.10+ for the RAG service
18 | - A running Paperless-NGX instance (for document access)
19 |
20 | ## Option 1: Run Both Services Together (Recommended)
21 |
22 | 1. Make sure you have all dependencies installed:
23 |
24 | ```bash
25 | # Install Node.js dependencies
26 | npm install
27 |
28 | # Install Python dependencies
29 | pip install -r requirements.txt
30 | ```
31 |
32 | 2. Configure your `.env` file in the `data` directory with your Paperless-NGX credentials:
33 |
34 | ```
35 | PAPERLESS_API_URL=https://your-paperless-ngx-instance
36 | PAPERLESS_API_TOKEN=your-api-token
37 | ```
38 |
39 | **Note:** The Python service will also read the existing API settings from this file (PAPERLESS_API_URL).
40 |
41 | 3. Run both services using the provided script:
42 |
43 | ```bash
44 | # Make the script executable first (Linux/macOS)
45 | chmod +x start-services.sh
46 |
47 | # Run the services
48 | ./start-services.sh
49 | ```
50 |
51 | ## Option 2: Run Services Separately
52 |
53 | ### Step 1: Set Up the Python RAG Service
54 |
55 | 1. Install Python dependencies:
56 |
57 | ```bash
58 | pip install -r requirements.txt
59 | ```
60 |
61 | 2. Start the Python RAG service:
62 |
63 | ```bash
64 | python main.py --host 127.0.0.1 --port 8000 --initialize
65 | ```
66 |
67 | The `--initialize` flag will build the document index on startup.
68 |
69 | ### Step 2: Configure the Paperless-AI Application
70 |
71 | 1. Set the environment variables for the Node.js application:
72 |
73 | For Windows (Command Prompt):
74 | ```cmd
75 | set RAG_SERVICE_URL=http://localhost:8000
76 | set RAG_SERVICE_ENABLED=true
77 | ```
78 |
79 | For Windows (PowerShell):
80 | ```powershell
81 | $env:RAG_SERVICE_URL="http://localhost:8000"
82 | $env:RAG_SERVICE_ENABLED="true"
83 | ```
84 |
85 | For Linux/macOS:
86 | ```bash
87 | export RAG_SERVICE_URL=http://localhost:8000
88 | export RAG_SERVICE_ENABLED=true
89 | ```
90 |
91 | 2. Start the Paperless-AI application in development mode:
92 |
93 | ```bash
94 | npm run dev
95 | ```
96 |
97 | ## Accessing the RAG Interface
98 |
99 | Open your browser and navigate to:
100 |
101 | ```
102 | http://localhost:3000/rag
103 | ```
104 |
105 | You should see the RAG interface where you can ask questions about your documents.
106 |
107 | ## Troubleshooting
108 |
109 | ### Environment Variables
110 |
111 | - The Python service looks for these variables in this order:
112 | - For API URL: `PAPERLESS_API_URL`, then `PAPERLESS_URL`, then `PAPERLESS_NGX_URL`, then `PAPERLESS_HOST`
113 | - For API Token: `PAPERLESS_TOKEN`, then `PAPERLESS_API_TOKEN`, then `PAPERLESS_APIKEY`
114 |
115 | - If you're using different variable names in your existing `.env` file, the Python service should still find them.
116 |
117 | ### Common Issues
118 |
119 | - **Missing Documents**: Check that the indexing has completed. You can check the status at `http://localhost:8000/indexing/status`.
120 | - **Connection Errors**: Ensure your Paperless-NGX credentials are correct and the instance is accessible.
121 | - **Port Conflicts**: If port 8000 is already in use, specify a different port with the `--port` parameter and update the `RAG_SERVICE_URL` environment variable accordingly.
122 |
123 | ## Development Workflow
124 |
125 | When making changes to the codebase:
126 |
127 | 1. **Python RAG Service Changes**:
128 | - Edit `main.py`
129 | - Restart the Python service to apply changes
130 |
131 | 2. **Paperless-AI Integration Changes**:
132 | - Edit Node.js files (like `services/ragService.js` or `routes/rag.js`)
133 | - If using nodemon (with `npm run dev`), changes should be applied automatically
134 | - For UI changes to `views/rag.ejs`, refresh the browser
135 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |    
2 |
3 | ## Discord:
4 | ### [https://discord.gg/AvNekAfK38](https://discord.gg/AvNekAfK38)
5 |
6 | # Paperless-AI
7 |
8 | An automated document analyzer for Paperless-ngx using OpenAI API, Ollama and all OpenAI API compatible Services to automatically analyze and tag your documents. \
9 | It features: Automode, Manual Mode, Ollama and OpenAI, a Chat function to query your documents with AI, a modern and intuitive Webinterface. \
10 | \
11 | **Following Services and OpenAI API compatible services have been successfully tested:**
12 | - Ollama
13 | - OpenAI
14 | - DeepSeek.ai
15 | - OpenRouter.ai
16 | - Perplexity.ai
17 | - Together.ai
18 | - VLLM
19 | - LiteLLM
20 | - Fastchat
21 | - Gemini (Google)
22 | - ... and there are possibly many more
23 |
24 | > 🚀 **New Feature Announcement**
25 | > **Paperless-AI now includes a powerful, integrated RAG-powered Chat interface!**
26 | > Introducing a whole new way to interact with your Paperless-NGX archive: instead of browsing, filtering, or guessing which tags to search for — just ask.
27 | > Thanks to Retrieval-Augmented Generation (RAG), you can now search semantically across the full content of your documents and get human-like answers instantly.
28 |
29 | > 🔍 **No more guessing. Just ask.**
30 | > Want to know _“When did I receive my electricity contract?”_, _“How much did I pay for the last car repair?”_ or _“Which documents mention my health insurance?”_ — Paperless-AI will find it for you, even if you don’t remember the exact title, sender, or date.
31 |
32 | > 💡 **What does RAG bring to Paperless-NGX?**
33 | > - True full-text understanding of your documents
34 | > - Context-aware responses — beyond keyword search
35 | > - Useful when dealing with large or chaotic document archives
36 | > - Saves time, avoids frustration, and unlocks insights you may have forgotten you had stored
37 | > - Blazingly fast answers backed by your own trusted data
38 |
39 | 
40 |
41 | > ⚠️ **Important Note**: If you're installing Paperless-AI for the **first time**, please **restart the container after completing the setup routine** (where you enter your API keys and preferences). This ensures that all services initialize correctly and your RAG index is built properly.
42 | > ➕ This step is **not required when updating** an existing installation.
43 |
44 |
45 | 
46 |
47 |
48 | ## Features
49 |
50 | ### Automated Document Management
51 | - **Automatic Scanning**: Identifies and processes new documents within Paperless-ngx.
52 | - **AI-Powered Analysis**: Leverages OpenAI API and Ollama (Mistral, Llama, Phi 3, Gemma 2) for precise document analysis.
53 | - **Metadata Assignment**: Automatically assigns titles, tags, document_type and correspondent details.
54 |
55 | ### Advanced Customization Options
56 | - **Predefined Processing Rules**: Specify which documents to process based on existing tags. *(Optional)* 🆕
57 | - **Selective Tag Assignment**: Use only selected tags for processing. *(Disables the prompt dialog)* 🆕
58 | - **Custom Tagging**: Assign a specific tag (of your choice) to AI-processed documents for easy identification. 🆕
59 |
60 | ### Manual Mode
61 | - **AI-Assisted Analysis**: Manually analyze documents with AI support in a modern web interface. *(Accessible via the `/manual` endpoint)* 🆕
62 |
63 | ### Interactive Chat Functionality
64 | - **Document Querying**: Ask questions about your documents and receive accurate, AI-generated answers. 🆕
65 |
66 | ## Installation
67 |
68 | Visit the Wiki for installation:\
69 | [Click here for Installation](https://github.com/clusterzx/paperless-ai/wiki/2.-Installation)
70 | -------------------------------------------
71 |
72 |
73 | ## Docker Support
74 |
75 | The application comes with full Docker support:
76 |
77 | - Automatic container restart on failure
78 | - Health monitoring
79 | - Volume persistence for database
80 | - Resource management
81 | - Graceful shutdown handling
82 |
83 | ## Development
84 |
85 | To run the application locally without Docker:
86 |
87 | 1. Install dependencies:
88 | ```bash
89 | npm install
90 | ```
91 |
92 | 2. Start the development server:
93 | ```bash
94 | npm run test
95 | ```
96 |
97 | ## Contributing
98 |
99 | 1. Fork the repository
100 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
101 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
102 | 4. Push to the branch (`git push origin feature/AmazingFeature`)
103 | 5. Open a Pull Request
104 |
105 | ## License
106 |
107 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
108 |
109 | ## Acknowledgments
110 |
111 | - [Paperless-ngx](https://github.com/paperless-ngx/paperless-ngx) for the amazing document management system
112 | - OpenAI API
113 | - The Express.js and Node.js communities for their excellent tools
114 |
115 | ## Support
116 |
117 | If you encounter any issues or have questions:
118 |
119 | 1. Check the [Issues](https://github.com/clusterzx/paperless-ai/issues) section
120 | 2. Create a new issue if yours isn't already listed
121 | 3. Provide detailed information about your setup and the problem
122 |
123 | ## Roadmap (DONE)
124 |
125 | - [x] Support for custom AI models
126 | - [x] Support for multiple language analysis
127 | - [x] Advanced tag matching algorithms
128 | - [x] Custom rules for document processing
129 | - [x] Enhanced web interface with statistics
130 |
131 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | | Version | Supported |
6 | | ------- | ------------------ |
7 | | 2.5.2 | :white_check_mark: |
8 | | 2.5.0 | :white_check_mark: |
9 | | 1.9.x | :x: |
10 | | < 1.9 | :x: |
11 |
12 | ## Reporting a Vulnerability
13 |
14 | If you find a security vulnerability please open an issue.
15 |
--------------------------------------------------------------------------------
/config/config.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 | const currentDir = decodeURIComponent(process.cwd());
3 | const envPath = path.join(currentDir, 'data', '.env');
4 | console.log('Loading .env from:', envPath); // Debug log
5 | require('dotenv').config({ path: envPath });
6 |
7 | // Helper function to parse boolean-like env vars
8 | const parseEnvBoolean = (value, defaultValue = 'yes') => {
9 | if (!value) return defaultValue;
10 | return value.toLowerCase() === 'true' || value === '1' || value.toLowerCase() === 'yes' ? 'yes' : 'no';
11 | };
12 |
13 | // Initialize limit functions with defaults
14 | const limitFunctions = {
15 | activateTagging: parseEnvBoolean(process.env.ACTIVATE_TAGGING, 'yes'),
16 | activateCorrespondents: parseEnvBoolean(process.env.ACTIVATE_CORRESPONDENTS, 'yes'),
17 | activateDocumentType: parseEnvBoolean(process.env.ACTIVATE_DOCUMENT_TYPE, 'yes'),
18 | activateTitle: parseEnvBoolean(process.env.ACTIVATE_TITLE, 'yes'),
19 | activateCustomFields: parseEnvBoolean(process.env.ACTIVATE_CUSTOM_FIELDS, 'yes')
20 | };
21 |
22 | console.log('Loaded environment variables:', {
23 | PAPERLESS_API_URL: process.env.PAPERLESS_API_URL,
24 | PAPERLESS_API_TOKEN: '******',
25 | LIMIT_FUNCTIONS: limitFunctions
26 | });
27 |
28 | module.exports = {
29 | PAPERLESS_AI_VERSION: '3.0.4',
30 | CONFIGURED: false,
31 | disableAutomaticProcessing: process.env.DISABLE_AUTOMATIC_PROCESSING || 'no',
32 | predefinedMode: process.env.PROCESS_PREDEFINED_DOCUMENTS,
33 | tokenLimit: process.env.TOKEN_LIMIT || 128000,
34 | responseTokens: process.env.RESPONSE_TOKENS || 1000,
35 | addAIProcessedTag: process.env.ADD_AI_PROCESSED_TAG || 'no',
36 | addAIProcessedTags: process.env.AI_PROCESSED_TAG_NAME || 'ai-processed',
37 | paperless: {
38 | apiUrl: process.env.PAPERLESS_API_URL,
39 | apiToken: process.env.PAPERLESS_API_TOKEN
40 | },
41 | openai: {
42 | apiKey: process.env.OPENAI_API_KEY
43 | },
44 | ollama: {
45 | apiUrl: process.env.OLLAMA_API_URL || 'http://localhost:11434',
46 | model: process.env.OLLAMA_MODEL || 'llama3.2'
47 | },
48 | custom: {
49 | apiUrl: process.env.CUSTOM_BASE_URL || '',
50 | apiKey: process.env.CUSTOM_API_KEY || '',
51 | model: process.env.CUSTOM_MODEL || ''
52 | },
53 | azure: {
54 | apiKey: process.env.AZURE_API_KEY || '',
55 | endpoint: process.env.AZURE_ENDPOINT || '',
56 | deploymentName: process.env.AZURE_DEPLOYMENT_NAME || '',
57 | apiVersion: process.env.AZURE_API_VERSION || '2023-05-15'
58 | },
59 | customFields: process.env.CUSTOM_FIELDS || '',
60 | aiProvider: process.env.AI_PROVIDER || 'openai',
61 | scanInterval: process.env.SCAN_INTERVAL || '*/30 * * * *',
62 | useExistingData: process.env.USE_EXISTING_DATA || 'no',
63 | // Add limit functions to config
64 | limitFunctions: {
65 | activateTagging: limitFunctions.activateTagging,
66 | activateCorrespondents: limitFunctions.activateCorrespondents,
67 | activateDocumentType: limitFunctions.activateDocumentType,
68 | activateTitle: limitFunctions.activateTitle,
69 | activateCustomFields: limitFunctions.activateCustomFields
70 | },
71 | specialPromptPreDefinedTags: `You are a document analysis AI. You will analyze the document.
72 | You take the main information to associate tags with the document.
73 | You will also find the correspondent of the document (Sender not receiver). Also you find a meaningful and short title for the document.
74 | You are given a list of tags: ${process.env.PROMPT_TAGS}
75 | Only use the tags from the list and try to find the best fitting tags.
76 | You do not ask for additional information, you only use the information given in the document.
77 |
78 | Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.:
79 | {
80 | "title": "xxxxx",
81 | "correspondent": "xxxxxxxx",
82 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
83 | "document_date": "YYYY-MM-DD",
84 | "language": "en/de/es/..."
85 | }`,
86 | mustHavePrompt: ` Return the result EXCLUSIVELY as a JSON object. The Tags, Title and Document_Type MUST be in the language that is used in the document.:
87 | IMPORTANT: The custom_fields are optional and can be left out if not needed, only try to fill out the values if you find a matching information in the document.
88 | Do not change the value of field_name, only fill out the values. If the field is about money only add the number without currency and always use a . for decimal places.
89 | {
90 | "title": "xxxxx",
91 | "correspondent": "xxxxxxxx",
92 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
93 | "document_type": "Invoice/Contract/...",
94 | "document_date": "YYYY-MM-DD",
95 | "language": "en/de/es/...",
96 | %CUSTOMFIELDS%
97 | }`,
98 | };
--------------------------------------------------------------------------------
/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/dashboard.png
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | paperless-ai:
3 | image: clusterzx/paperless-ai
4 | container_name: paperless-ai
5 | network_mode: bridge
6 | restart: unless-stopped
7 | cap_drop:
8 | - ALL
9 | security_opt:
10 | - no-new-privileges=true
11 | environment:
12 | - PUID=1000
13 | - PGID=1000
14 | - PAPERLESS_AI_PORT=${PAPERLESS_AI_PORT:-3000}
15 | - RAG_SERVICE_URL=http://localhost:8000
16 | - RAG_SERVICE_ENABLED=true
17 | ports:
18 | - "3000:${PAPERLESS_AI_PORT:-3000}"
19 | volumes:
20 | - paperless-ai_data:/app/data
21 |
22 | volumes:
23 | paperless-ai_data:
24 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/chat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/chat.png
--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/favicon.ico
--------------------------------------------------------------------------------
/docs/hero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/hero.png
--------------------------------------------------------------------------------
/docs/history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/history.png
--------------------------------------------------------------------------------
/docs/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/playground.png
--------------------------------------------------------------------------------
/docs/ppai_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/ppai_icon.png
--------------------------------------------------------------------------------
/docs/settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/docs/settings.png
--------------------------------------------------------------------------------
/ecosystem.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | apps: [{
3 | name: 'paperless-ai',
4 | script: 'server.js',
5 | instances: 1,
6 | autorestart: true,
7 | watch: false,
8 | max_memory_restart: '1G',
9 | env: {
10 | NODE_ENV: 'production'
11 | },
12 | exp_backoff_restart_delay: 100
13 | }]
14 | };
--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import globals from "globals";
2 | import pluginJs from "@eslint/js";
3 | import prettier from "eslint-config-prettier";
4 |
5 | /** @type {import('eslint').Linter.Config[]} */
6 | export default [
7 | {
8 | files: ["**/*.js"],
9 | languageOptions: {
10 | sourceType: "commonjs",
11 | globals: {
12 | ...globals.browser,
13 | ...globals.node,
14 | },
15 | },
16 | },
17 | pluginJs.configs.recommended,
18 | prettier, // Prettier integriert
19 | ];
--------------------------------------------------------------------------------
/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/icon.png
--------------------------------------------------------------------------------
/icon.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/icon.webp
--------------------------------------------------------------------------------
/jsdoc_standards.md:
--------------------------------------------------------------------------------
1 | # JSDoc/Swagger Documentation Standards for Paperless-AI API
2 |
3 | The following detailed standard is what all API route documentation should adhere to:
4 |
5 | ## 1. Basic Structure and Format
6 |
7 | Every route in the API must be documented with a JSDoc comment block using the `@swagger` tag following the OpenAPI 3.0.0 specification. The documentation should be placed immediately before the route handler function.
8 |
9 | ```javascript
10 | /**
11 | * @swagger
12 | * /path/to/endpoint:
13 | * method:
14 | * // Documentation content
15 | */
16 | router.method('/path/to/endpoint', async (req, res) => {
17 | ```
18 |
19 | ## 2. Core Documentation Elements
20 |
21 | ### 2.1 Route Path Definition
22 |
23 | - The route path must match exactly the path defined in the Express route handler
24 | - Path parameters should be defined using curly braces: `/path/{paramName}`
25 | - Trailing slashes should be avoided for consistency
26 |
27 | ### 2.2 HTTP Method
28 |
29 | - The HTTP method (get, post, put, delete) should be indented under the path
30 | - Only one method should be defined per documentation block
31 | - Multiple methods for the same path should be documented separately
32 |
33 | ### 2.3 Summary and Description
34 |
35 | - Every endpoint must have a clear, concise `summary` field (single line)
36 | - A more detailed `description` field using the pipe symbol (`|`) for multi-line content
37 | - The description should:
38 | - Explain the purpose of the endpoint in 2-3 sentences
39 | - Describe key functionality and behaviors
40 | - Note any important side effects or dependencies
41 | - Use proper grammar and complete sentences
42 | - For complex endpoints, include usage examples or explanations of how the endpoint works in the larger application context
43 |
44 | Example:
45 | ```javascript
46 | /**
47 | * @swagger
48 | * /api/example:
49 | * get:
50 | * summary: Brief description of what this endpoint does
51 | * description: |
52 | * Detailed explanation of the endpoint functionality.
53 | * This should cover what the endpoint does, how it works,
54 | * and any important behaviors users should know about.
55 | *
56 | * Use multiple paragraphs for complex explanations.
57 | */
58 | ```
59 |
60 | ## 3. Tags and Categorization
61 |
62 | ### 3.1 Tag Requirements
63 |
64 | - Each endpoint must be assigned to at least one tag, often multiple tags
65 | - Tags must come from the predefined list of application tags defined in the `tags` section
66 | - Multiple tags should be used when an endpoint serves multiple purposes
67 | - Common tag combinations include:
68 | - `[Navigation, X]` for UI page routes
69 | - `[API, X]` for data API endpoints
70 | - `[System, Authentication]` for security-related endpoints
71 |
72 | ### 3.2 Defined Tags
73 |
74 | The application uses the following tags for categorization:
75 | - Authentication - User authentication and authorization endpoints
76 | - Documents - Document management and processing endpoints
77 | - History - Document processing history and tracking
78 | - Navigation - General navigation endpoints for the web interface
79 | - System - Configuration, health checks, and administrative functions
80 | - Chat - Document chat functionality
81 | - Setup - Application setup and configuration
82 | - Metadata - Endpoints for managing document metadata
83 | - API - General API endpoints (usually combined with other tags)
84 |
85 | ## 4. Security Requirements
86 |
87 | ### 4.1 Security Definitions
88 |
89 | - Each protected endpoint must include appropriate security requirements
90 | - The application supports two authentication methods:
91 | - `BearerAuth` - JWT-based authentication for web app users
92 | - `ApiKeyAuth` - API key authentication for programmatic access
93 |
94 | ### 4.2 Security Requirement Format
95 |
96 | Security requirements should be specified in the standard format:
97 | ```javascript
98 | * security:
99 | * - BearerAuth: []
100 | * - ApiKeyAuth: []
101 | ```
102 |
103 | ### 4.3 Security Notices
104 |
105 | - For endpoints that modify security settings (like key regeneration), include explicit security notices
106 | - Format these as bold text in the description using Markdown: `**Security Notice**: Important information.`
107 |
108 | ## 5. Parameters Documentation
109 |
110 | ### 5.1 Path Parameters
111 |
112 | Path parameters should be documented with:
113 | - Parameter name matching the path definition
114 | - Schema type (integer, string, etc.)
115 | - Required flag (almost always true for path parameters)
116 | - Description of the parameter purpose
117 | - Example value
118 |
119 | ```javascript
120 | * parameters:
121 | * - in: path
122 | * name: id
123 | * required: true
124 | * schema:
125 | * type: integer
126 | * description: The resource ID
127 | * example: 123
128 | ```
129 |
130 | ### 5.2 Query Parameters
131 |
132 | Query parameters follow a similar format but include:
133 | - Default values where applicable
134 | - Enumerated values if the parameter has a restricted set of options
135 |
136 | ```javascript
137 | * parameters:
138 | * - in: query
139 | * name: limit
140 | * schema:
141 | * type: integer
142 | * default: 10
143 | * description: Maximum number of records to return
144 | ```
145 |
146 | ### 5.3 Request Body
147 |
148 | For POST/PUT endpoints, document the request body with:
149 | - Required flag
150 | - Content type (usually application/json)
151 | - Schema definition including:
152 | - Required properties list
153 | - Property definitions with types
154 | - Property descriptions
155 | - Example values
156 |
157 | ```javascript
158 | * requestBody:
159 | * required: true
160 | * content:
161 | * application/json:
162 | * schema:
163 | * type: object
164 | * required:
165 | * - propertyName
166 | * properties:
167 | * propertyName:
168 | * type: string
169 | * description: Description of the property
170 | * example: "Example value"
171 | ```
172 |
173 | ## 6. Response Documentation
174 |
175 | ### 6.1 Response Status Codes
176 |
177 | Each endpoint must document all possible response status codes:
178 | - 200/201 for successful operations
179 | - 400 for invalid requests
180 | - 401 for authentication failures
181 | - 403 for authorization failures
182 | - 404 for resource not found
183 | - 500 for server errors
184 | - Any other status code the endpoint might return
185 |
186 | ### 6.2 Response Content
187 |
188 | For each status code, document:
189 | - Description of what the status code means in this specific context
190 | - Content type of the response
191 | - Schema definition of the response body
192 | - For complex responses, use schema references to components
193 |
194 | ```javascript
195 | * responses:
196 | * 200:
197 | * description: Detailed description of successful response
198 | * content:
199 | * application/json:
200 | * schema:
201 | * $ref: '#/components/schemas/ResponseSchema'
202 | ```
203 |
204 | ### 6.3 Streaming Responses
205 |
206 | For streaming endpoints (like chat), document:
207 | - The streaming nature of the response
208 | - The format of each chunk
209 | - Examples of the stream events
210 |
211 | ```javascript
212 | * 200:
213 | * description: |
214 | * Response streaming started. Each event contains a message chunk.
215 | * content:
216 | * text/event-stream:
217 | * schema:
218 | * type: string
219 | * example: |
220 | * data: {"chunk":"Example response chunk"}
221 | *
222 | * data: {"done":true}
223 | ```
224 |
225 | ## 7. Schema Definitions and References
226 |
227 | ### 7.1 Schema Components
228 |
229 | - Complex object schemas should be defined as components in a central schema file
230 | - These components should be referenced using `$ref` syntax
231 | - Common schemas like Error responses should always use references
232 |
233 | ```javascript
234 | * schema:
235 | * $ref: '#/components/schemas/Error'
236 | ```
237 |
238 | ### 7.2 Inline Schemas
239 |
240 | - Simple response schemas can be defined inline
241 | - Include:
242 | - Object type
243 | - Properties with types and descriptions
244 | - Example values for each property
245 |
246 | ```javascript
247 | * schema:
248 | * type: object
249 | * properties:
250 | * success:
251 | * type: boolean
252 | * description: Whether the operation succeeded
253 | * example: true
254 | ```
255 |
256 | ### 7.3 Array Schemas
257 |
258 | Arrays should specify the item type, either as a reference or inline schema:
259 |
260 | ```javascript
261 | * schema:
262 | * type: array
263 | * items:
264 | * $ref: '#/components/schemas/Item'
265 | ```
266 |
267 | ## 8. Documentation Style and Formatting
268 |
269 | ### 8.1 Indentation and Formatting
270 |
271 | - Consistent indentation using 2 spaces
272 | - Proper nesting of OpenAPI elements
273 | - Clear separation between different documentation sections
274 |
275 | ### 8.2 Naming Conventions
276 |
277 | - Use camelCase for property names in schemas
278 | - Use snake_case for query parameter names
279 | - Use descriptive names for all elements
280 |
281 | ### 8.3 Example Values
282 |
283 | - Every property should include a realistic example value
284 | - Examples should demonstrate typical usage
285 | - For enums, example should be one of the allowed values
286 |
287 | ## 9. Special Documentation Types
288 |
289 | ### 9.1 Page Routes (Navigation)
290 |
291 | For routes that render HTML pages:
292 | - Tag with [Navigation] and relevant feature tag
293 | - Document the purpose of the page
294 | - Note any data dependencies
295 |
296 | ### 9.2 API Data Endpoints
297 |
298 | For pure data API endpoints:
299 | - Tag with [API] and relevant feature tag
300 | - Document the data structure comprehensively
301 | - Include pagination details if applicable
302 |
303 | ### 9.3 Authentication Endpoints
304 |
305 | For authentication-related endpoints:
306 | - Tag with [Authentication]
307 | - Include detailed security considerations
308 | - Document token/session behaviors
309 |
310 | ## 10. Documentation Quality Standards
311 |
312 | ### 10.1 Completeness
313 |
314 | - No undocumented parameters or responses
315 | - All possible response codes covered
316 | - All security requirements specified
317 |
318 | ### 10.2 Accuracy
319 |
320 | - Documentation must match actual implementation
321 | - Examples must be valid for the described schema
322 | - Security requirements must reflect actual restrictions
323 |
324 | ### 10.3 Consistency
325 |
326 | - Similar endpoints should follow similar documentation patterns
327 | - Standard responses (like errors) should be documented identically
328 | - Terminology should be consistent across all endpoints
329 |
330 | This comprehensive standard ensures that all API documentation in the Paperless-AI application is thorough, consistent, and user-friendly, providing developers with all the information they need to use the API effectively.
331 |
332 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "paperless-ai",
3 | "version": "1.0.0",
4 | "description": "AI based Tag, correspondent and meta data generation",
5 | "main": "server.js",
6 | "scripts": {
7 | "test": "nodemon server.js"
8 | },
9 | "keywords": [
10 | "paperless-ngx",
11 | "ai",
12 | "paperless",
13 | "artificial",
14 | "intelligence"
15 | ],
16 | "nodemonConfig": {
17 | "ignore": [
18 | "test/*",
19 | "docs/*",
20 | "node_modules/*",
21 | ".git/*",
22 | "*.log",
23 | "public/*",
24 | "views/*",
25 | "document.json",
26 | "OPENAPI/openapi.json"
27 | ]
28 | },
29 | "author": "Clusterzx",
30 | "license": "MIT",
31 | "dependencies": {
32 | "axios": "^1.8.2",
33 | "bcryptjs": "^3.0.2",
34 | "better-sqlite3": "^11.8.1",
35 | "body-parser": "^1.20.3",
36 | "cheerio": "^1.0.0",
37 | "cookie-parser": "^1.4.7",
38 | "cors": "^2.8.5",
39 | "date-fns": "^4.1.0",
40 | "dockerode": "^4.0.6",
41 | "dotenv": "^16.4.7",
42 | "ejs": "^3.1.10",
43 | "express": "^4.21.2",
44 | "jsonwebtoken": "^9.0.2",
45 | "node-cron": "^3.0.3",
46 | "nodemon": "^3.1.9",
47 | "openai": "^4.86.2",
48 | "rimraf": "^6.0.1",
49 | "sqlite3": "^5.1.7",
50 | "swagger-jsdoc": "^6.2.8",
51 | "swagger-ui-express": "^5.0.1",
52 | "tiktoken": "^1.0.20"
53 | },
54 | "devDependencies": {
55 | "@eslint/js": "^9.22.0",
56 | "eslint": "^9.22.0",
57 | "eslint-config-prettier": "^10.1.1",
58 | "eslint-plugin-jsdoc": "^50.6.3",
59 | "globals": "^16.0.0",
60 | "prettier": "^3.5.3"
61 | },
62 | "pnpm": {
63 | "onlyBuiltDependencies": [
64 | "@scarf/scarf",
65 | "better-sqlite3"
66 | ]
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/paperless-ai-chrome.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/paperless-ai-chrome.zip
--------------------------------------------------------------------------------
/ppairag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/ppairag.png
--------------------------------------------------------------------------------
/prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "semi": true,
3 | "singleQuote": true,
4 | "tabWidth": 2,
5 | "trailingComma": "es5"
6 | }
--------------------------------------------------------------------------------
/preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/preview.png
--------------------------------------------------------------------------------
/public/css/chat.css:
--------------------------------------------------------------------------------
1 | /* Theme Variables */
2 | :root[data-theme="light"] {
3 | --bg-primary: #ffffff;
4 | --bg-secondary: #f8fafc;
5 | --text-primary: #0f172a;
6 | --text-secondary: #475569;
7 | --accent-primary: #3b82f6;
8 | --border-color: #e2e8f0;
9 | --chat-bg: #ffffff;
10 | --message-bg: #f1f5f9;
11 | --user-message-bg: #eff6ff;
12 | }
13 |
14 | :root[data-theme="dark"] {
15 | --bg-primary: #0f172a;
16 | --bg-secondary: #1e293b;
17 | --text-primary: #f8fafc;
18 | --text-secondary: #e2e8f0;
19 | --accent-primary: #60a5fa;
20 | --border-color: #334155;
21 | --chat-bg: #1e293b;
22 | --message-bg: #334155;
23 | --user-message-bg: #1e3a8a;
24 | }
25 |
26 | body {
27 | background-color: var(--bg-secondary);
28 | color: var(--text-primary);
29 | min-height: 100vh;
30 | transition: background-color 0.3s ease;
31 | font-family: Arial, sans-serif;
32 | margin: 20px;
33 | line-height: 1.6;
34 | }
35 |
36 | .chat-container {
37 | max-width: 1600px;
38 | margin: 2rem auto;
39 | padding: 0 1rem;
40 | display: flex;
41 | flex-direction: column;
42 | gap: 1.5rem;
43 | }
44 |
45 | .document-select-card {
46 | background: var(--chat-bg);
47 | border-radius: 0.5rem;
48 | padding: 1.5rem;
49 | box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
50 | border: 1px solid var(--border-color);
51 | }
52 |
53 | .card-title {
54 | font-size: 1.25rem;
55 | font-weight: 600;
56 | margin-bottom: 1rem;
57 | color: var(--text-primary);
58 | }
59 |
60 | .select-input {
61 | width: 100%;
62 | padding: 0.5rem;
63 | border-radius: 0.375rem;
64 | border: 1px solid var(--border-color);
65 | background: var(--bg-primary);
66 | color: var(--text-primary);
67 | }
68 |
69 | .chat-interface {
70 | background: var(--chat-bg);
71 | border-radius: 0.5rem;
72 | border: 1px solid var(--border-color);
73 | display: flex;
74 | flex-direction: column;
75 | height: 70vh;
76 | }
77 |
78 | .initial-state {
79 | display: flex;
80 | align-items: center;
81 | justify-content: center;
82 | height: 100%;
83 | color: var(--text-secondary);
84 | }
85 |
86 | .chat-history {
87 | flex-grow: 1;
88 | overflow-y: auto;
89 | padding: 1.5rem;
90 | display: flex;
91 | flex-direction: column;
92 | gap: 1rem;
93 | }
94 |
95 | .message {
96 | max-width: 80%;
97 | padding: 1rem;
98 | border-radius: 0.5rem;
99 | background: var(--message-bg);
100 | }
101 |
102 | .user-message {
103 | background: var(--user-message-bg);
104 | align-self: flex-end;
105 | }
106 |
107 | .message-form {
108 | border-top: 1px solid var(--border-color);
109 | padding: 1rem;
110 | display: flex;
111 | gap: 0.5rem;
112 | }
113 |
114 | .message-input {
115 | flex-grow: 1;
116 | padding: 0.75rem;
117 | border-radius: 0.375rem;
118 | border: 1px solid var(--border-color);
119 | background: var(--bg-primary);
120 | color: var(--text-primary);
121 | resize: none;
122 | min-height: 20px;
123 | max-height: 150px;
124 | }
125 |
126 | .send-button {
127 | padding: 0.75rem 1.5rem;
128 | background: var(--accent-primary);
129 | color: white;
130 | border: none;
131 | border-radius: 0.375rem;
132 | cursor: pointer;
133 | transition: opacity 0.2s ease;
134 | }
135 |
136 | .send-button:hover {
137 | opacity: 0.9;
138 | }
139 |
140 | .theme-toggle {
141 | position: fixed;
142 | top: 1rem;
143 | right: 1rem;
144 | padding: 0.5rem;
145 | border-radius: 0.5rem;
146 | background: var(--bg-primary);
147 | border: 1px solid var(--border-color);
148 | cursor: pointer;
149 | transition: background-color 0.3s ease;
150 | }
151 |
152 | .theme-icon {
153 | width: 1.5rem;
154 | height: 1.5rem;
155 | color: var(--text-primary);
156 | }
157 |
158 | .hidden {
159 | display: none !important;
160 | }
161 |
162 | @media (max-width: 768px) {
163 | .chat-container {
164 | margin: 1rem;
165 | }
166 |
167 | .message {
168 | max-width: 90%;
169 | }
170 | }
--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/public/favicon.ico
--------------------------------------------------------------------------------
/public/js/chat.js:
--------------------------------------------------------------------------------
1 | let currentDocumentId = null;
2 |
3 | // Initialize marked with options for code highlighting
4 | marked.setOptions({
5 | highlight: function(code, lang) {
6 | if (lang && hljs.getLanguage(lang)) {
7 | return hljs.highlight(code, { language: lang }).value;
8 | }
9 | return hljs.highlightAuto(code).value;
10 | },
11 | breaks: true,
12 | gfm: true
13 | });
14 |
15 | // Load saved theme on page load
16 | document.addEventListener('DOMContentLoaded', () => {
17 | const savedTheme = localStorage.getItem('theme') || 'light';
18 | setTheme(savedTheme);
19 | setupTextareaAutoResize();
20 | });
21 |
22 | async function initializeChat(documentId) {
23 | try {
24 | const response = await fetch(`/chat/init/${documentId}`);
25 | if (!response.ok) throw new Error('Failed to initialize chat');
26 | const data = await response.json();
27 |
28 | document.getElementById('initialState').classList.add('hidden');
29 | document.getElementById('chatHistory').classList.remove('hidden');
30 | document.getElementById('messageForm').classList.remove('hidden');
31 | document.getElementById('documentId').value = documentId;
32 | document.getElementById('chatHistory').innerHTML = '';
33 |
34 | currentDocumentId = documentId;
35 |
36 | addMessage('Chat initialized for document: ' + data.documentTitle, false);
37 | } catch (error) {
38 | console.error('Error initializing chat:', error);
39 | showError('Failed to initialize chat');
40 | }
41 | }
42 |
43 | async function sendMessage(message) {
44 | try {
45 | const response = await fetch('/chat/message', {
46 | method: 'POST',
47 | headers: {
48 | 'Content-Type': 'application/json',
49 | },
50 | body: JSON.stringify({
51 | documentId: currentDocumentId,
52 | message: message
53 | })
54 | });
55 |
56 | if (!response.ok) throw new Error('Failed to send message');
57 |
58 | // Create message container for streaming response
59 | const containerDiv = document.createElement('div');
60 | containerDiv.className = 'message-container assistant';
61 |
62 | const messageDiv = document.createElement('div');
63 | messageDiv.className = 'message assistant';
64 | containerDiv.appendChild(messageDiv);
65 |
66 | document.getElementById('chatHistory').appendChild(containerDiv);
67 |
68 | let markdown = '';
69 | const reader = response.body.getReader();
70 | const decoder = new TextDecoder();
71 |
72 | while (true) {
73 | const { done, value } = await reader.read();
74 | if (done) break;
75 |
76 | const text = decoder.decode(value);
77 | const lines = text.split('\n');
78 |
79 | for (const line of lines) {
80 | if (line.startsWith('data: ')) {
81 | const data = line.slice(6);
82 | if (data === '[DONE]') continue;
83 |
84 | try {
85 | const parsed = JSON.parse(data);
86 | if (parsed.content) {
87 | markdown += parsed.content;
88 | messageDiv.innerHTML = marked.parse(markdown);
89 |
90 | // Apply syntax highlighting to any code blocks
91 | messageDiv.querySelectorAll('pre code').forEach((block) => {
92 | hljs.highlightBlock(block);
93 | });
94 |
95 | // Scroll to bottom
96 | const chatHistory = document.getElementById('chatHistory');
97 | chatHistory.scrollTop = chatHistory.scrollHeight;
98 | }
99 | } catch (e) {
100 | console.error('Error parsing SSE data:', e);
101 | }
102 | }
103 | }
104 | }
105 |
106 | return null; // No need to return response as it's handled in streaming
107 | } catch (error) {
108 | console.error('Error sending message:', error);
109 | throw error;
110 | }
111 | }
112 |
113 | function addMessage(message, isUser = true) {
114 | const containerDiv = document.createElement('div');
115 | containerDiv.className = `message-container ${isUser ? 'user' : 'assistant'}`;
116 |
117 | const messageDiv = document.createElement('div');
118 | messageDiv.className = `message ${isUser ? 'user' : 'assistant'}`;
119 |
120 | if (isUser) {
121 | messageDiv.innerHTML = `
${escapeHtml(message)}
`;
122 | } else {
123 | let messageContent = message;
124 | try {
125 | if (typeof message === 'string' && message.trim().startsWith('{')) {
126 | const jsonResponse = JSON.parse(message);
127 | messageContent = jsonResponse.reply || jsonResponse.message || message;
128 | }
129 | } catch (e) {
130 | console.log('Message is not JSON, using as is');
131 | }
132 |
133 | messageDiv.innerHTML = marked.parse(messageContent);
134 | messageDiv.querySelectorAll('pre code').forEach((block) => {
135 | hljs.highlightBlock(block);
136 | });
137 | }
138 |
139 | containerDiv.appendChild(messageDiv);
140 | const chatHistory = document.getElementById('chatHistory');
141 | chatHistory.appendChild(containerDiv);
142 | chatHistory.scrollTop = chatHistory.scrollHeight;
143 | }
144 |
145 | function showError(message) {
146 | const errorDiv = document.createElement('div');
147 | errorDiv.className = 'message-container assistant';
148 | errorDiv.innerHTML = `
149 |
150 |
Error: ${escapeHtml(message)}
151 |
152 | `;
153 | document.getElementById('chatHistory').appendChild(errorDiv);
154 | }
155 |
156 | function escapeHtml(unsafe) {
157 | return unsafe
158 | .replace(/&/g, "&")
159 | .replace(//g, ">")
161 | .replace(/"/g, """)
162 | .replace(/'/g, "'");
163 | }
164 |
165 | function toggleTheme() {
166 | const currentTheme = document.body.getAttribute('data-theme');
167 | const newTheme = currentTheme === 'light' ? 'dark' : 'light';
168 | setTheme(newTheme);
169 | }
170 |
171 | function setTheme(theme) {
172 | const body = document.body;
173 | const lightIcon = document.getElementById('lightIcon');
174 | const darkIcon = document.getElementById('darkIcon');
175 |
176 | body.setAttribute('data-theme', theme);
177 | localStorage.setItem('theme', theme);
178 |
179 | if (theme === 'dark') {
180 | lightIcon.classList.add('hidden');
181 | darkIcon.classList.remove('hidden');
182 | } else {
183 | lightIcon.classList.remove('hidden');
184 | darkIcon.classList.add('hidden');
185 | }
186 | }
187 |
188 | function setupTextareaAutoResize() {
189 | const textarea = document.getElementById('messageInput');
190 |
191 | function adjustHeight() {
192 | textarea.style.height = 'auto';
193 | textarea.style.height = (textarea.scrollHeight) + 'px';
194 | }
195 |
196 | textarea.addEventListener('input', adjustHeight);
197 | textarea.addEventListener('keydown', (e) => {
198 | if (e.key === 'Enter' && !e.shiftKey) {
199 | e.preventDefault();
200 | document.getElementById('messageForm').dispatchEvent(new Event('submit'));
201 | }
202 | });
203 | }
204 |
205 | document.getElementById('documentSelect').addEventListener('change', function() {
206 | const documentId = this.value;
207 | if (documentId) {
208 | initializeChat(documentId);
209 | }
210 | });
211 |
212 | document.addEventListener("DOMContentLoaded", function () {
213 | const documentSelect = document.getElementById('documentSelect');
214 | const documentId = documentSelect.value;
215 |
216 | if (documentId) {
217 | initializeChat(documentId);
218 | }
219 | });
220 |
221 | document.getElementById('messageInput').addEventListener('keydown', async (e) => {
222 | if (e.key === 'Enter' && !e.shiftKey) {
223 | e.preventDefault();
224 | await submitForm();
225 | }
226 | });
227 |
228 | async function submitForm() {
229 | const messageInput = document.getElementById('messageInput');
230 | const message = messageInput.value.trim();
231 |
232 | if (!message) return;
233 |
234 | try {
235 | // Show user message immediately
236 | addMessage(message, true);
237 |
238 | // Clear input and reset height
239 | messageInput.value = '';
240 | messageInput.style.height = 'auto';
241 |
242 | // Send message and handle streaming response
243 | await sendMessage(message);
244 | } catch {
245 | showError('Failed to send message');
246 | }
247 | }
--------------------------------------------------------------------------------
/public/js/dashboard.js:
--------------------------------------------------------------------------------
1 | // Theme Management
2 | class ThemeManager {
3 | constructor() {
4 | this.themeToggle = document.getElementById('themeToggle');
5 | this.initialize();
6 | }
7 |
8 | initialize() {
9 | const savedTheme = localStorage.getItem('theme') || 'light';
10 | this.setTheme(savedTheme);
11 | console.log('Theme initialized');
12 | this.themeToggle.addEventListener('click', () => this.toggleTheme());
13 | }
14 |
15 | setTheme(theme) {
16 | document.documentElement.setAttribute('data-theme', theme);
17 | localStorage.setItem('theme', theme);
18 |
19 | const icon = this.themeToggle.querySelector('i');
20 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun';
21 | }
22 |
23 | toggleTheme() {
24 | const currentTheme = document.documentElement.getAttribute('data-theme');
25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light';
26 | this.setTheme(newTheme);
27 | console.log('Theme toggled to: ' + newTheme);
28 | }
29 | }
30 |
31 | // Chart Initialization
32 | class ChartManager {
33 | constructor() {
34 | this.initializeDocumentChart();
35 | }
36 |
37 | initializeDocumentChart() {
38 | const { documentCount, processedCount } = window.dashboardData;
39 | const unprocessedCount = documentCount - processedCount;
40 |
41 | const ctx = document.getElementById('documentChart').getContext('2d');
42 | new Chart(ctx, {
43 | type: 'doughnut',
44 | data: {
45 | labels: ['AI Processed', 'Unprocessed'],
46 | datasets: [{
47 | data: [processedCount, unprocessedCount],
48 | backgroundColor: [
49 | '#3b82f6', // blue-500
50 | '#e2e8f0' // gray-200
51 | ],
52 | borderWidth: 0,
53 | spacing: 2
54 | }]
55 | },
56 | options: {
57 | responsive: true,
58 | maintainAspectRatio: false,
59 | cutout: '70%',
60 | plugins: {
61 | legend: {
62 | display: false
63 | },
64 | tooltip: {
65 | callbacks: {
66 | label: function(context) {
67 | const value = context.raw;
68 | const total = processedCount + unprocessedCount;
69 | const percentage = ((value / total) * 100).toFixed(1);
70 | return `${value} (${percentage}%)`;
71 | }
72 | }
73 | }
74 | }
75 | }
76 | });
77 | }
78 | }
79 |
80 | // Modal Management
81 | class ModalManager {
82 | constructor() {
83 | this.modal = document.getElementById('detailsModal');
84 | this.modalTitle = this.modal.querySelector('.modal-title');
85 | this.modalContent = this.modal.querySelector('.modal-data');
86 | this.modalLoader = this.modal.querySelector('.modal-loader');
87 | this.initializeEventListeners();
88 | }
89 |
90 | initializeEventListeners() {
91 | // Close button click
92 | this.modal.querySelector('.modal-close').addEventListener('click', () => this.hideModal());
93 |
94 | // Overlay click
95 | this.modal.querySelector('.modal-overlay').addEventListener('click', () => this.hideModal());
96 |
97 | // Escape key press
98 | document.addEventListener('keydown', (e) => {
99 | if (e.key === 'Escape' && this.modal.classList.contains('show')) {
100 | this.hideModal();
101 | }
102 | });
103 | }
104 |
105 | showModal(title) {
106 | this.modalTitle.textContent = title;
107 | this.modalContent.innerHTML = '';
108 | this.modal.classList.remove('hidden'); // Fix: Remove 'hidden' class
109 | this.modal.classList.add('show');
110 | document.body.style.overflow = 'hidden';
111 | }
112 |
113 | hideModal() {
114 | this.modal.classList.remove('show');
115 | this.modal.classList.add('hidden'); // Fix: Add 'hidden' class back
116 | document.body.style.overflow = '';
117 | }
118 |
119 | showLoader() {
120 | this.modalLoader.classList.remove('hidden');
121 | this.modalContent.classList.add('hidden');
122 | }
123 |
124 | hideLoader() {
125 | this.modalLoader.classList.add('hidden');
126 | this.modalContent.classList.remove('hidden');
127 | }
128 |
129 | setContent(content) {
130 | this.modalContent.innerHTML = content;
131 | }
132 | }
133 |
134 | // Make showTagDetails and showCorrespondentDetails globally available
135 | window.showTagDetails = async function() {
136 | window.modalManager.showModal('Tag Overview');
137 | window.modalManager.showLoader();
138 |
139 | try {
140 | const response = await fetch('/api/tagsCount');
141 | const tags = await response.json();
142 |
143 | let content = '';
144 | tags.forEach(tag => {
145 | content += `
146 |
147 | ${tag.name}
148 | ${tag.document_count || 0} documents
149 |
150 | `;
151 | });
152 | content += '
';
153 |
154 | window.modalManager.setContent(content);
155 | } catch (error) {
156 | console.error('Error loading tags:', error);
157 | window.modalManager.setContent('Error loading tags. Please try again later.
');
158 | } finally {
159 | window.modalManager.hideLoader();
160 | }
161 | }
162 |
163 | window.showCorrespondentDetails = async function() {
164 | window.modalManager.showModal('Correspondent Overview');
165 | window.modalManager.showLoader();
166 |
167 | try {
168 | const response = await fetch('/api/correspondentsCount');
169 | const correspondents = await response.json();
170 |
171 | let content = '';
172 | correspondents.forEach(correspondent => {
173 | content += `
174 |
175 | ${correspondent.name}
176 | ${correspondent.document_count || 0} documents
177 |
178 | `;
179 | });
180 | content += '
';
181 |
182 | window.modalManager.setContent(content);
183 | } catch (error) {
184 | console.error('Error loading correspondents:', error);
185 | window.modalManager.setContent('Error loading correspondents. Please try again later.
');
186 | } finally {
187 | window.modalManager.hideLoader();
188 | }
189 | }
190 |
191 | // Navigation Management
192 | class NavigationManager {
193 | constructor() {
194 | this.sidebarLinks = document.querySelectorAll('.sidebar-link');
195 | this.initialize();
196 | }
197 |
198 | initialize() {
199 | this.sidebarLinks.forEach(link => {
200 | link.addEventListener('click', (e) => {
201 | // Nur für Links ohne echtes Ziel preventDefault aufrufen
202 | if (link.getAttribute('href') === '#') {
203 | e.preventDefault();
204 | }
205 | this.setActiveLink(link);
206 | });
207 | });
208 | }
209 |
210 | setActiveLink(activeLink) {
211 | this.sidebarLinks.forEach(link => {
212 | link.classList.remove('active');
213 | });
214 | activeLink.classList.add('active');
215 | }
216 | }
217 |
218 | // API Functions
219 | async function showTagDetails() {
220 | modalManager.showModal('Tag Overview');
221 | modalManager.showLoader();
222 |
223 | try {
224 | const response = await fetch('/api/tags');
225 | const tags = await response.json();
226 |
227 | let content = '';
228 | tags.forEach(tag => {
229 | content += `
230 |
231 | ${tag.name}
232 | ${tag.document_count || 0} documents
233 |
234 | `;
235 | });
236 | content += '
';
237 |
238 | modalManager.setContent(content);
239 | } catch (error) {
240 | console.error('Error loading tags:', error);
241 | modalManager.setContent('Error loading tags. Please try again later.
');
242 | } finally {
243 | modalManager.hideLoader();
244 | }
245 | }
246 |
247 | async function showCorrespondentDetails() {
248 | modalManager.showModal('Correspondent Overview');
249 | modalManager.showLoader();
250 |
251 | try {
252 | const response = await fetch('/api/correspondents');
253 | const correspondents = await response.json();
254 |
255 | let content = '';
256 | correspondents.forEach(correspondent => {
257 | content += `
258 |
259 | ${correspondent.name}
260 | ${correspondent.document_count || 0} documents
261 |
262 | `;
263 | });
264 | content += '
';
265 |
266 | modalManager.setContent(content);
267 | } catch (error) {
268 | console.error('Error loading correspondents:', error);
269 | modalManager.setContent('Error loading correspondents. Please try again later.
');
270 | } finally {
271 | modalManager.hideLoader();
272 | }
273 | }
274 |
275 | // Initialize everything when DOM is loaded
276 | document.addEventListener('DOMContentLoaded', () => {
277 | window.themeManager = new ThemeManager();
278 | window.navigationManager = new NavigationManager();
279 | window.chartManager = new ChartManager();
280 | window.modalManager = new ModalManager();
281 | });
--------------------------------------------------------------------------------
/public/js/history.js:
--------------------------------------------------------------------------------
1 | // Theme Management
2 | class ThemeManager {
3 | constructor() {
4 | this.themeToggle = document.getElementById('themeToggle');
5 | this.initialize();
6 | }
7 |
8 | initialize() {
9 | const savedTheme = localStorage.getItem('theme') || 'light';
10 | this.setTheme(savedTheme);
11 | this.themeToggle?.addEventListener('click', () => this.toggleTheme());
12 | }
13 |
14 | setTheme(theme) {
15 | document.documentElement.setAttribute('data-theme', theme);
16 | localStorage.setItem('theme', theme);
17 | const icon = this.themeToggle.querySelector('i');
18 | if (icon) {
19 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun';
20 | }
21 | }
22 |
23 | toggleTheme() {
24 | const currentTheme = document.documentElement.getAttribute('data-theme');
25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light';
26 | this.setTheme(newTheme);
27 | }
28 | }
29 |
30 | class HistoryManager {
31 | constructor() {
32 | this.confirmModal = document.getElementById('confirmModal');
33 | this.confirmModalAll = document.getElementById('confirmModalAll');
34 | this.selectAll = document.getElementById('selectAll');
35 | this.table = null; // Will be initialized in initializeDataTable
36 | this.initialize();
37 | }
38 |
39 | initialize() {
40 | this.table = this.initializeDataTable();
41 | this.initializeModals();
42 | this.initializeResetButtons();
43 | this.initializeFilters();
44 | this.initializeSelectAll();
45 | }
46 |
47 | initializeDataTable() {
48 | return $('#historyTable').DataTable({
49 | serverSide: true,
50 | processing: true,
51 | ajax: {
52 | url: '/api/history',
53 | data: (d) => {
54 | d.tag = $('#tagFilter').val();
55 | d.correspondent = $('#correspondentFilter').val();
56 | }
57 | },
58 | columns: [
59 | {
60 | data: 'document_id',
61 | render: (data) => ``,
62 | orderable: false,
63 | width: '40px'
64 | },
65 | {
66 | data: 'document_id',
67 | width: '60px'
68 | },
69 | {
70 | data: 'title',
71 | render: (data, type, row) => {
72 | if (type === 'display') {
73 | return `
74 | ${data}
75 | Modified: ${new Date(row.created_at).toLocaleString()}
76 | `;
77 | }
78 | return data;
79 | }
80 | },
81 | {
82 | data: 'tags',
83 | render: (data, type) => {
84 | if (type === 'display') {
85 | if (!data?.length) return 'No tags';
86 | return data.map(tag =>
87 | `${tag.name}`
88 | ).join(' ');
89 | }
90 | return data?.map(t => t.name).join(', ') || '';
91 | }
92 | },
93 | { data: 'correspondent' },
94 | {
95 | data: null,
96 | render: (data) => `
97 |
98 |
102 |
106 |
107 | `,
108 | orderable: false,
109 | width: '150px'
110 | }
111 | ],
112 | order: [[2, 'desc']],
113 | pageLength: 10,
114 | dom: '<"flex flex-col sm:flex-row justify-between items-center mb-4"<"flex-1"f><"flex-none"l>>rtip',
115 | language: {
116 | search: "Search documents:",
117 | lengthMenu: "Show _MENU_ entries",
118 | info: "Showing _START_ to _END_ of _TOTAL_ documents",
119 | infoEmpty: "Showing 0 to 0 of 0 documents",
120 | infoFiltered: "(filtered from _MAX_ total documents)"
121 | },
122 | drawCallback: () => {
123 | // Update "Select All" checkbox state after table redraw
124 | this.updateSelectAllState();
125 | // Reattach event listeners to checkboxes
126 | this.attachCheckboxListeners();
127 | }
128 | });
129 | }
130 |
131 | initializeModals() {
132 | // Modal close handlers
133 | [this.confirmModal, this.confirmModalAll].forEach(modal => {
134 | if (!modal) return;
135 |
136 | // Close on overlay click
137 | modal.querySelector('.modal-overlay')?.addEventListener('click', () => {
138 | this.hideModal(modal);
139 | });
140 |
141 | // Close on X button click
142 | modal.querySelector('.modal-close')?.addEventListener('click', () => {
143 | this.hideModal(modal);
144 | });
145 |
146 | // Close on Cancel button click
147 | modal.querySelector('[id^="cancel"]')?.addEventListener('click', () => {
148 | this.hideModal(modal);
149 | });
150 | });
151 |
152 | // Close on Escape key
153 | document.addEventListener('keydown', (e) => {
154 | if (e.key === 'Escape') {
155 | this.hideModal(this.confirmModal);
156 | this.hideModal(this.confirmModalAll);
157 | }
158 | });
159 |
160 | // Reset action handlers
161 | document.getElementById('confirmReset')?.addEventListener('click', async () => {
162 | const selectedDocs = this.getSelectedDocuments();
163 | const success = await this.resetDocuments(selectedDocs);
164 | if (success) {
165 | this.hideModal(this.confirmModal);
166 | }
167 | });
168 |
169 | document.getElementById('confirmResetAll')?.addEventListener('click', async () => {
170 | const success = await this.resetAllDocuments();
171 | if (success) {
172 | this.hideModal(this.confirmModalAll);
173 | }
174 | });
175 | }
176 |
177 | initializeResetButtons() {
178 | // Reset Selected button
179 | document.getElementById('resetSelectedBtn')?.addEventListener('click', () => {
180 | const selectedDocs = this.getSelectedDocuments();
181 | if (selectedDocs.length === 0) {
182 | alert('Please select at least one document to reset.');
183 | return;
184 | }
185 | this.showModal(this.confirmModal);
186 | });
187 |
188 | // Reset All button
189 | document.getElementById('resetAllBtn')?.addEventListener('click', () => {
190 | this.showModal(this.confirmModalAll);
191 | });
192 | }
193 |
194 | initializeFilters() {
195 | $('#tagFilter, #correspondentFilter').on('change', () => {
196 | this.table.ajax.reload();
197 | });
198 | }
199 |
200 | initializeSelectAll() {
201 | if (!this.selectAll) return;
202 |
203 | // Handle "Select All" checkbox
204 | this.selectAll.addEventListener('change', () => {
205 | const isChecked = this.selectAll.checked;
206 | const checkboxes = document.querySelectorAll('.doc-select');
207 | checkboxes.forEach(checkbox => {
208 | checkbox.checked = isChecked;
209 | });
210 | });
211 |
212 | // Initial state check
213 | this.updateSelectAllState();
214 | }
215 |
216 | attachCheckboxListeners() {
217 | const checkboxes = document.querySelectorAll('.doc-select');
218 | checkboxes.forEach(checkbox => {
219 | // Remove existing listeners to prevent duplicates
220 | checkbox.removeEventListener('change', this.handleCheckboxChange);
221 | // Add new listener
222 | checkbox.addEventListener('change', () => this.handleCheckboxChange());
223 | });
224 | }
225 |
226 | handleCheckboxChange() {
227 | this.updateSelectAllState();
228 | }
229 |
230 | updateSelectAllState() {
231 | if (!this.selectAll) return;
232 |
233 | const checkboxes = document.querySelectorAll('.doc-select');
234 | const checkedBoxes = document.querySelectorAll('.doc-select:checked');
235 |
236 | // Update "Select All" checkbox state
237 | this.selectAll.checked = checkboxes.length > 0 && checkboxes.length === checkedBoxes.length;
238 |
239 | // Update indeterminate state
240 | this.selectAll.indeterminate = checkedBoxes.length > 0 && checkedBoxes.length < checkboxes.length;
241 | }
242 |
243 | showModal(modal) {
244 | if (modal) {
245 | modal.classList.remove('hidden');
246 | modal.classList.add('show');
247 | }
248 | }
249 |
250 | hideModal(modal) {
251 | if (modal) {
252 | modal.classList.remove('show');
253 | modal.classList.add('hidden');
254 | }
255 | }
256 |
257 | getSelectedDocuments() {
258 | return Array.from(document.querySelectorAll('.doc-select:checked'))
259 | .map(checkbox => checkbox.value);
260 | }
261 |
262 | async resetDocuments(ids) {
263 | try {
264 | const response = await fetch('/api/reset-documents', {
265 | method: 'POST',
266 | headers: { 'Content-Type': 'application/json' },
267 | body: JSON.stringify({ ids })
268 | });
269 |
270 | if (!response.ok) {
271 | throw new Error('Failed to reset documents');
272 | }
273 |
274 | await this.table.ajax.reload();
275 | return true;
276 | } catch (error) {
277 | console.error('Error resetting documents:', error);
278 | alert('Failed to reset documents. Please try again.');
279 | return false;
280 | }
281 | }
282 |
283 | async resetAllDocuments() {
284 | try {
285 | const response = await fetch('/api/reset-all-documents', {
286 | method: 'POST',
287 | headers: { 'Content-Type': 'application/json' }
288 | });
289 |
290 | if (!response.ok) {
291 | throw new Error('Failed to reset all documents');
292 | }
293 |
294 | await this.table.ajax.reload();
295 | return true;
296 | } catch (error) {
297 | console.error('Error resetting all documents:', error);
298 | alert('Failed to reset all documents. Please try again.');
299 | return false;
300 | }
301 | }
302 | }
303 |
304 | // Initialize when DOM is loaded
305 | document.addEventListener('DOMContentLoaded', () => {
306 | window.themeManager = new ThemeManager();
307 | window.historyManager = new HistoryManager();
308 | });
--------------------------------------------------------------------------------
/public/js/manual.js:
--------------------------------------------------------------------------------
1 | // Theme Management
2 | class ThemeManager {
3 | constructor() {
4 | this.themeToggle = document.getElementById('themeToggle');
5 | this.initialize();
6 | }
7 |
8 | initialize() {
9 | const savedTheme = localStorage.getItem('theme') || 'light';
10 | this.setTheme(savedTheme);
11 |
12 | this.themeToggle.addEventListener('click', () => this.toggleTheme());
13 | }
14 |
15 | setTheme(theme) {
16 | document.documentElement.setAttribute('data-theme', theme);
17 | localStorage.setItem('theme', theme);
18 |
19 | const icon = this.themeToggle.querySelector('i');
20 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun';
21 | }
22 |
23 | toggleTheme() {
24 | const currentTheme = document.documentElement.getAttribute('data-theme');
25 | const newTheme = currentTheme === 'light' ? 'dark' : 'light';
26 | this.setTheme(newTheme);
27 | }
28 | }
29 |
30 | // Chart Initialization
31 | class ChartManager {
32 | constructor() {
33 | this.initializeDocumentChart();
34 | }
35 |
36 | initializeDocumentChart() {
37 | const { documentCount, processedCount } = window.dashboardData;
38 | const unprocessedCount = documentCount - processedCount;
39 |
40 | const ctx = document.getElementById('documentChart').getContext('2d');
41 | new Chart(ctx, {
42 | type: 'doughnut',
43 | data: {
44 | labels: ['AI Processed', 'Unprocessed'],
45 | datasets: [{
46 | data: [processedCount, unprocessedCount],
47 | backgroundColor: [
48 | '#3b82f6', // blue-500
49 | '#e2e8f0' // gray-200
50 | ],
51 | borderWidth: 0,
52 | spacing: 2
53 | }]
54 | },
55 | options: {
56 | responsive: true,
57 | maintainAspectRatio: false,
58 | cutout: '70%',
59 | plugins: {
60 | legend: {
61 | display: false
62 | },
63 | tooltip: {
64 | callbacks: {
65 | label: function(context) {
66 | const value = context.raw;
67 | const total = processedCount + unprocessedCount;
68 | const percentage = ((value / total) * 100).toFixed(1);
69 | return `${value} (${percentage}%)`;
70 | }
71 | }
72 | }
73 | }
74 | }
75 | });
76 | }
77 | }
78 |
79 | // Modal Management
80 | class ModalManager {
81 | constructor() {
82 | this.modal = document.getElementById('detailsModal');
83 | this.modalTitle = this.modal.querySelector('.modal-title');
84 | this.modalContent = this.modal.querySelector('.modal-data');
85 | this.modalLoader = this.modal.querySelector('.modal-loader');
86 | this.initializeEventListeners();
87 | }
88 |
89 | initializeEventListeners() {
90 | // Close button click
91 | this.modal.querySelector('.modal-close').addEventListener('click', () => this.hideModal());
92 |
93 | // Overlay click
94 | this.modal.querySelector('.modal-overlay').addEventListener('click', () => this.hideModal());
95 |
96 | // Escape key press
97 | document.addEventListener('keydown', (e) => {
98 | if (e.key === 'Escape' && this.modal.classList.contains('show')) {
99 | this.hideModal();
100 | }
101 | });
102 | }
103 |
104 | showModal(title) {
105 | this.modalTitle.textContent = title;
106 | this.modalContent.innerHTML = '';
107 | this.modal.classList.remove('hidden'); // Fix: Remove 'hidden' class
108 | this.modal.classList.add('show');
109 | document.body.style.overflow = 'hidden';
110 | }
111 |
112 | hideModal() {
113 | this.modal.classList.remove('show');
114 | this.modal.classList.add('hidden'); // Fix: Add 'hidden' class back
115 | document.body.style.overflow = '';
116 | }
117 |
118 | showLoader() {
119 | this.modalLoader.classList.remove('hidden');
120 | this.modalContent.classList.add('hidden');
121 | }
122 |
123 | hideLoader() {
124 | this.modalLoader.classList.add('hidden');
125 | this.modalContent.classList.remove('hidden');
126 | }
127 |
128 | setContent(content) {
129 | this.modalContent.innerHTML = content;
130 | }
131 | }
132 |
133 | // Make showTagDetails and showCorrespondentDetails globally available
134 | window.showTagDetails = async function() {
135 | window.modalManager.showModal('Tag Overview');
136 | window.modalManager.showLoader();
137 |
138 | try {
139 | const response = await fetch('/api/tagsCount');
140 | const tags = await response.json();
141 |
142 | let content = '';
143 | tags.forEach(tag => {
144 | content += `
145 |
146 | ${tag.name}
147 | ${tag.document_count || 0} documents
148 |
149 | `;
150 | });
151 | content += '
';
152 |
153 | window.modalManager.setContent(content);
154 | } catch (error) {
155 | console.error('Error loading tags:', error);
156 | window.modalManager.setContent('Error loading tags. Please try again later.
');
157 | } finally {
158 | window.modalManager.hideLoader();
159 | }
160 | }
161 |
162 | window.showCorrespondentDetails = async function() {
163 | window.modalManager.showModal('Correspondent Overview');
164 | window.modalManager.showLoader();
165 |
166 | try {
167 | const response = await fetch('/api/correspondentsCount');
168 | const correspondents = await response.json();
169 |
170 | let content = '';
171 | correspondents.forEach(correspondent => {
172 | content += `
173 |
174 | ${correspondent.name}
175 | ${correspondent.document_count || 0} documents
176 |
177 | `;
178 | });
179 | content += '
';
180 |
181 | window.modalManager.setContent(content);
182 | } catch (error) {
183 | console.error('Error loading correspondents:', error);
184 | window.modalManager.setContent('Error loading correspondents. Please try again later.
');
185 | } finally {
186 | window.modalManager.hideLoader();
187 | }
188 | }
189 |
190 | // Navigation Management
191 | class NavigationManager {
192 | constructor() {
193 | this.sidebarLinks = document.querySelectorAll('.sidebar-link');
194 | this.initialize();
195 | }
196 |
197 | initialize() {
198 | this.sidebarLinks.forEach(link => {
199 | link.addEventListener('click', (e) => {
200 | // Nur für Links ohne echtes Ziel preventDefault aufrufen
201 | if (link.getAttribute('href') === '#') {
202 | e.preventDefault();
203 | }
204 | this.setActiveLink(link);
205 | });
206 | });
207 | }
208 |
209 | setActiveLink(activeLink) {
210 | this.sidebarLinks.forEach(link => {
211 | link.classList.remove('active');
212 | });
213 | activeLink.classList.add('active');
214 | }
215 | }
216 |
217 | // API Functions
218 | async function showTagDetails() {
219 | modalManager.showModal('Tag Overview');
220 | modalManager.showLoader();
221 |
222 | try {
223 | const response = await fetch('/api/tags');
224 | const tags = await response.json();
225 |
226 | let content = '';
227 | tags.forEach(tag => {
228 | content += `
229 |
230 | ${tag.name}
231 | ${tag.document_count || 0} documents
232 |
233 | `;
234 | });
235 | content += '
';
236 |
237 | modalManager.setContent(content);
238 | } catch (error) {
239 | console.error('Error loading tags:', error);
240 | modalManager.setContent('Error loading tags. Please try again later.
');
241 | } finally {
242 | modalManager.hideLoader();
243 | }
244 | }
245 |
246 | async function showCorrespondentDetails() {
247 | modalManager.showModal('Correspondent Overview');
248 | modalManager.showLoader();
249 |
250 | try {
251 | const response = await fetch('/api/correspondents');
252 | const correspondents = await response.json();
253 |
254 | let content = '';
255 | correspondents.forEach(correspondent => {
256 | content += `
257 |
258 | ${correspondent.name}
259 | ${correspondent.document_count || 0} documents
260 |
261 | `;
262 | });
263 | content += '
';
264 |
265 | modalManager.setContent(content);
266 | } catch (error) {
267 | console.error('Error loading correspondents:', error);
268 | modalManager.setContent('Error loading correspondents. Please try again later.
');
269 | } finally {
270 | modalManager.hideLoader();
271 | }
272 | }
273 |
274 | // Initialize everything when DOM is loaded
275 | document.addEventListener('DOMContentLoaded', () => {
276 | window.themeManager = new ThemeManager();
277 | window.navigationManager = new NavigationManager();
278 | window.chartManager = new ChartManager();
279 | window.modalManager = new ModalManager();
280 | });
--------------------------------------------------------------------------------
/public/js/playground.js:
--------------------------------------------------------------------------------
1 | // Theme Management für Playground
2 | class ThemeManager {
3 | constructor() {
4 | this.themeToggle = document.getElementById('themeToggle');
5 | this.initialize();
6 | }
7 |
8 | initialize() {
9 | const savedTheme = localStorage.getItem('theme') || 'light';
10 | this.setTheme(savedTheme);
11 |
12 | this.themeToggle.addEventListener('click', () => this.toggleTheme());
13 |
14 | // Add logo to no-invert class
15 | const logo = document.querySelector('.sidebar-header img');
16 | if (logo) {
17 | logo.classList.add('no-invert');
18 | }
19 | }
20 |
21 | setTheme(theme) {
22 | document.documentElement.setAttribute('data-theme', theme);
23 | localStorage.setItem('theme', theme);
24 |
25 | const icon = this.themeToggle.querySelector('i');
26 | icon.className = theme === 'light' ? 'fas fa-moon' : 'fas fa-sun';
27 | }
28 |
29 | toggleTheme() {
30 | const currentTheme = document.documentElement.getAttribute('data-theme');
31 | const newTheme = currentTheme === 'light' ? 'dark' : 'light';
32 | this.setTheme(newTheme);
33 | }
34 | }
35 |
36 | // Initialize everything when DOM is loaded
37 | document.addEventListener('DOMContentLoaded', () => {
38 | window.themeManager = new ThemeManager();
39 | });
--------------------------------------------------------------------------------
/rag_ready.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clusterzx/paperless-ai/a8bfc7eed8dbf47e8b7f1264c87ece8c3acd5ebb/rag_ready.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi>=0.95.0
2 | uvicorn>=0.21.1
3 | python-dotenv>=1.0.0
4 | requests>=2.28.2
5 | numpy>=1.24.2
6 | torch>=2.0.0
7 | sentence-transformers>=2.2.2
8 | chromadb>=0.3.21
9 | rank-bm25>=0.2.2
10 | nltk>=3.8.1
11 | tqdm>=4.65.0
12 | pydantic>=1.10.7
13 |
--------------------------------------------------------------------------------
/routes/auth.js:
--------------------------------------------------------------------------------
1 | const jwt = require('jsonwebtoken');
2 | const config = require('../config/config');
3 |
4 | // JWT secret key - should be moved to environment variables
5 | const JWT_SECRET = process.env.JWT_SECRET || 'your-secret-key';
6 |
7 | // JWT middleware to verify token
8 | const authenticateJWT = (req, res, next) => {
9 | const token = req.cookies.jwt || req.headers.authorization?.split(' ')[1];
10 | const apiKey = req.headers['x-api-key'];
11 |
12 | if (apiKey && apiKey === process.env.API_KEY) {
13 | req.user = { apiKey: true };
14 | return next();
15 | }
16 |
17 | if (!token) {
18 | return res.status(401).json({ message: 'Authentication required' });
19 | }
20 |
21 | try {
22 | const decoded = jwt.verify(token, JWT_SECRET);
23 | req.user = decoded;
24 | next();
25 | } catch (error) {
26 | return res.status(403).json({ message: 'Invalid or expired token' });
27 | }
28 | };
29 |
30 | const isAuthenticated = (req, res, next) => {
31 | const token = req.cookies.jwt || req.headers.authorization?.split(' ')[1];
32 | const apiKey = req.headers['x-api-key'];
33 |
34 | if (apiKey && apiKey === process.env.API_KEY) {
35 | req.user = { apiKey: true };
36 | return next();
37 | }
38 |
39 | if (!token) {
40 | return res.redirect('/login');
41 | }
42 |
43 | try {
44 | const decoded = jwt.verify(token, JWT_SECRET);
45 | req.user = decoded;
46 | next();
47 | } catch (error) {
48 | res.clearCookie('jwt');
49 | return res.redirect('/login');
50 | }
51 | };
52 |
53 | module.exports = { authenticateJWT, isAuthenticated };
--------------------------------------------------------------------------------
/routes/rag.js:
--------------------------------------------------------------------------------
1 | // routes/rag.js
2 | const express = require('express');
3 | const router = express.Router();
4 | const ragService = require('../services/ragService');
5 |
6 | /**
7 | * Search documents
8 | */
9 | router.post('/search', async (req, res) => {
10 | try {
11 | const { query, from_date, to_date, correspondent } = req.body;
12 |
13 | if (!query) {
14 | return res.status(400).json({ error: 'Query is required' });
15 | }
16 |
17 | const filters = {};
18 | if (from_date) filters.from_date = from_date;
19 | if (to_date) filters.to_date = to_date;
20 | if (correspondent) filters.correspondent = correspondent;
21 |
22 | const results = await ragService.search(query, filters);
23 | res.json(results);
24 | } catch (error) {
25 | console.error('Error in /api/rag/search:', error);
26 | res.status(500).json({ error: error.message || 'Internal server error' });
27 | }
28 | });
29 |
30 | /**
31 | * Ask a question about documents
32 | */
33 | router.post('/ask', async (req, res) => {
34 | try {
35 | const { question } = req.body;
36 |
37 | if (!question) {
38 | return res.status(400).json({ error: 'Question is required' });
39 | }
40 |
41 | const result = await ragService.askQuestion(question);
42 | res.json(result);
43 | } catch (error) {
44 | console.error('Error in /api/rag/ask:', error);
45 | res.status(500).json({ error: error.message || 'Internal server error' });
46 | }
47 | });
48 |
49 | /**
50 | * Start document indexing
51 | */
52 | router.post('/index', async (req, res) => {
53 | try {
54 | const { force = false } = req.body;
55 | const result = await ragService.indexDocuments(force);
56 | res.json(result);
57 | } catch (error) {
58 | console.error('Error in /api/rag/index:', error);
59 | res.status(500).json({ error: error.message || 'Internal server error' });
60 | }
61 | });
62 |
63 | /**
64 | * Get indexing status
65 | */
66 | router.get('/index/status', async (req, res) => {
67 | try {
68 | const status = await ragService.getIndexingStatus();
69 | res.json(status);
70 | } catch (error) {
71 | console.error('Error in /api/rag/index/status:', error);
72 | res.status(500).json({ error: error.message || 'Internal server error' });
73 | }
74 | });
75 |
76 | /**
77 | * Check if updates are needed
78 | */
79 | router.get('/index/check', async (req, res) => {
80 | try {
81 | const result = await ragService.checkForUpdates();
82 | res.json(result);
83 | } catch (error) {
84 | console.error('Error in /api/rag/index/check:', error);
85 | res.status(500).json({ error: error.message || 'Internal server error' });
86 | }
87 | });
88 |
89 | /**
90 | * Get RAG service status
91 | */
92 | router.get('/status', async (req, res) => {
93 | try {
94 | const status = await ragService.checkStatus();
95 | const aiStatus = await ragService.getAIStatus();
96 | // Combine RAG and AI status
97 | status.ai_status = aiStatus.status;
98 | status.ai_model = aiStatus.model;
99 | // console.log('RAG Status:', status);
100 | // console.log('AI Status:', aiStatus);
101 | res.json(status);
102 | } catch (error) {
103 | console.error('Error in /api/rag/status:', error);
104 | res.status(500).json({ error: error.message || 'Internal server error' });
105 | }
106 | });
107 |
108 | /**
109 | * Initialize RAG service
110 | */
111 | router.post('/initialize', async (req, res) => {
112 | try {
113 | const { force = false } = req.body;
114 | const result = await ragService.initialize(force);
115 | res.json(result);
116 | } catch (error) {
117 | console.error('Error in /api/rag/initialize:', error);
118 | res.status(500).json({ error: error.message || 'Internal server error' });
119 | }
120 | });
121 |
122 | module.exports = router;
123 |
--------------------------------------------------------------------------------
/services/aiServiceFactory.js:
--------------------------------------------------------------------------------
1 | const config = require('../config/config');
2 | const openaiService = require('./openaiService');
3 | const ollamaService = require('./ollamaService');
4 | const customService = require('./customService');
5 | const azureService = require('./azureService');
6 |
7 | class AIServiceFactory {
8 | static getService() {
9 | switch (config.aiProvider) {
10 | case 'ollama':
11 | return ollamaService;
12 | case 'openai':
13 | default:
14 | return openaiService;
15 | case 'custom':
16 | return customService;
17 | case 'azure':
18 | return azureService;
19 | }
20 | }
21 | }
22 |
23 | module.exports = AIServiceFactory;
--------------------------------------------------------------------------------
/services/chatService.js:
--------------------------------------------------------------------------------
1 | // services/chatService.js
2 | const OpenAIService = require('./openaiService');
3 | const PaperlessService = require('./paperlessService');
4 | const config = require('../config/config');
5 | const fs = require('fs');
6 | const path = require('path');
7 | const os = require('os');
8 | const stream = require('stream');
9 | const { promisify } = require('util');
10 | const pipeline = promisify(stream.pipeline);
11 | const { OpenAI } = require('openai');
12 |
13 | class ChatService {
14 | constructor() {
15 | this.chats = new Map(); // Stores chat histories: documentId -> messages[]
16 | this.tempDir = path.join(os.tmpdir(), 'paperless-chat');
17 |
18 | // Create temporary directory if it doesn't exist
19 | if (!fs.existsSync(this.tempDir)) {
20 | fs.mkdirSync(this.tempDir, { recursive: true });
21 | }
22 | }
23 |
24 | /**
25 | * Downloads the original file from Paperless
26 | * @param {string} documentId - The ID of the document
27 | * @returns {Promise<{filePath: string, filename: string, mimeType: string}>}
28 | */
29 | async downloadDocument(documentId) {
30 | try {
31 | const document = await PaperlessService.getDocument(documentId);
32 | const tempFilePath = path.join(this.tempDir, `${documentId}_${document.original_filename}`);
33 |
34 | // Create download stream
35 | const response = await PaperlessService.client.get(`/documents/${documentId}/download/`, {
36 | responseType: 'stream'
37 | });
38 |
39 | // Save file temporarily
40 | await pipeline(
41 | response.data,
42 | fs.createWriteStream(tempFilePath)
43 | );
44 |
45 | return {
46 | filePath: tempFilePath,
47 | filename: document.original_filename,
48 | mimeType: document.mime_type
49 | };
50 | } catch (error) {
51 | console.error(`Error downloading document ${documentId}:`, error);
52 | throw error;
53 | }
54 | }
55 |
56 | /**
57 | * Initializes a new chat for a document
58 | * @param {string} documentId - The ID of the document
59 | */
60 | async initializeChat(documentId) {
61 | try {
62 | // Get document information
63 | const document = await PaperlessService.getDocument(documentId);
64 | let documentContent;
65 |
66 | try {
67 | documentContent = await PaperlessService.getDocumentContent(documentId);
68 | } catch (error) {
69 | console.warn('Could not get direct document content, trying file download...', error);
70 | const { filePath } = await this.downloadDocument(documentId);
71 | documentContent = await fs.promises.readFile(filePath, 'utf8');
72 | }
73 |
74 | // Create initial system prompt
75 | const messages = [
76 | {
77 | role: "system",
78 | content: `You are a helpful assistant for the document "${document.title}".
79 | Use the following document content as context for your responses.
80 | If you don't know something or it's not in the document, please say so honestly.
81 |
82 | Document content:
83 | ${documentContent}`
84 | }
85 | ];
86 |
87 | this.chats.set(documentId, {
88 | messages,
89 | documentTitle: document.title
90 | });
91 |
92 | return {
93 | documentTitle: document.title,
94 | initialized: true
95 | };
96 | } catch (error) {
97 | console.error(`Error initializing chat for document ${documentId}:`, error);
98 | throw error;
99 | }
100 | }
101 |
102 | async sendMessageStream(documentId, userMessage, res) {
103 | try {
104 | if (!this.chats.has(documentId)) {
105 | await this.initializeChat(documentId);
106 | }
107 |
108 | const chatData = this.chats.get(documentId);
109 | chatData.messages.push({
110 | role: "user",
111 | content: userMessage
112 | });
113 |
114 | // Set headers for SSE
115 | res.setHeader('Content-Type', 'text/event-stream');
116 | res.setHeader('Cache-Control', 'no-cache');
117 | res.setHeader('Connection', 'keep-alive');
118 |
119 | let fullResponse = '';
120 | const aiProvider = process.env.AI_PROVIDER;
121 |
122 | if (aiProvider === 'openai') {
123 | // Make sure OpenAIService is initialized
124 | OpenAIService.initialize();
125 |
126 | // Always create a new client instance for this request to ensure it works
127 | const openai = new OpenAI({
128 | apiKey: process.env.OPENAI_API_KEY
129 | });
130 |
131 | const stream = await openai.chat.completions.create({
132 | model: process.env.OPENAI_MODEL || 'gpt-4',
133 | messages: chatData.messages,
134 | stream: true,
135 | });
136 |
137 | for await (const chunk of stream) {
138 | const content = chunk.choices[0]?.delta?.content || '';
139 | if (content) {
140 | fullResponse += content;
141 | res.write(`data: ${JSON.stringify({ content })}\n\n`);
142 | }
143 | }
144 | } else if (aiProvider === 'custom') {
145 | // Use OpenAI SDK with custom base URL
146 | const customOpenAI = new OpenAI({
147 | baseURL: process.env.CUSTOM_BASE_URL,
148 | apiKey: process.env.CUSTOM_API_KEY,
149 | });
150 |
151 | const stream = await customOpenAI.chat.completions.create({
152 | model: process.env.CUSTOM_MODEL,
153 | messages: chatData.messages,
154 | stream: true,
155 | });
156 |
157 | for await (const chunk of stream) {
158 | const content = chunk.choices[0]?.delta?.content || '';
159 | if (content) {
160 | fullResponse += content;
161 | res.write(`data: ${JSON.stringify({ content })}\n\n`);
162 | }
163 | }
164 | } else if (aiProvider === 'azure') {
165 | // Use OpenAI SDK with Azure configuration
166 | const azureOpenAI = new OpenAI({
167 | apiKey: process.env.AZURE_API_KEY,
168 | baseURL: `${process.env.AZURE_ENDPOINT}/openai/deployments/${process.env.AZURE_DEPLOYMENT_NAME}`,
169 | defaultQuery: { 'api-version': process.env.AZURE_API_VERSION },
170 | });
171 |
172 | const stream = await azureOpenAI.chat.completions.create({
173 | model: process.env.AZURE_DEPLOYMENT_NAME,
174 | messages: chatData.messages,
175 | stream: true,
176 | });
177 |
178 | for await (const chunk of stream) {
179 | const content = chunk.choices[0]?.delta?.content || '';
180 | if (content) {
181 | fullResponse += content;
182 | res.write(`data: ${JSON.stringify({ content })}\n\n`);
183 | }
184 | }
185 | } else if (aiProvider === 'ollama') {
186 | // Use OpenAI SDK for Ollama with OpenAI API compatibility
187 | const ollamaOpenAI = new OpenAI({
188 | baseURL: `${process.env.OLLAMA_API_URL}/v1`,
189 | apiKey: 'ollama', // Ollama doesn't require a real API key but the SDK requires some value
190 | });
191 |
192 | const stream = await ollamaOpenAI.chat.completions.create({
193 | model: process.env.OLLAMA_MODEL,
194 | messages: chatData.messages,
195 | stream: true,
196 | });
197 |
198 | for await (const chunk of stream) {
199 | const content = chunk.choices[0]?.delta?.content || '';
200 | if (content) {
201 | fullResponse += content;
202 | res.write(`data: ${JSON.stringify({ content })}\n\n`);
203 | }
204 | }
205 | } else {
206 | throw new Error('AI Provider not configured');
207 | }
208 |
209 | // Add the complete response to chat history
210 | chatData.messages.push({
211 | role: "assistant",
212 | content: fullResponse
213 | });
214 | this.chats.set(documentId, chatData);
215 |
216 | // End the stream
217 | res.write('data: [DONE]\n\n');
218 | res.end();
219 |
220 | } catch (error) {
221 | console.error(`Error in sendMessageStream:`, error);
222 | res.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
223 | res.end();
224 | }
225 | }
226 |
227 | getChatHistory(documentId) {
228 | const chatData = this.chats.get(documentId);
229 | return chatData ? chatData.messages : [];
230 | }
231 |
232 | chatExists(documentId) {
233 | return this.chats.has(documentId);
234 | }
235 |
236 | async cleanup() {
237 | try {
238 | for (const documentId of this.chats.keys()) {
239 | await this.deleteChat(documentId);
240 | }
241 | if (fs.existsSync(this.tempDir)) {
242 | await fs.promises.rmdir(this.tempDir, { recursive: true });
243 | }
244 | } catch (error) {
245 | console.error('Error cleaning up ChatService:', error);
246 | }
247 | }
248 | }
249 |
250 | module.exports = new ChatService();
251 |
--------------------------------------------------------------------------------
/services/customService.js:
--------------------------------------------------------------------------------
1 | const {
2 | calculateTokens,
3 | calculateTotalPromptTokens,
4 | truncateToTokenLimit,
5 | writePromptToFile
6 | } = require('./serviceUtils');
7 | const OpenAI = require('openai');
8 | const config = require('../config/config');
9 | const tiktoken = require('tiktoken');
10 | const paperlessService = require('./paperlessService');
11 | const fs = require('fs').promises;
12 | const path = require('path');
13 |
14 | class CustomOpenAIService {
15 | constructor() {
16 | this.client = null;
17 | this.tokenizer = null;
18 | }
19 |
20 | initialize() {
21 | if (!this.client && config.aiProvider === 'custom') {
22 | this.client = new OpenAI({
23 | baseURL: config.custom.apiUrl,
24 | apiKey: config.custom.apiKey
25 | });
26 | }
27 | }
28 |
29 | async analyzeDocument(content, existingTags = [], existingCorrespondentList = [], id) {
30 | const cachePath = path.join('./public/images', `${id}.png`);
31 | try {
32 | this.initialize();
33 | const now = new Date();
34 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' });
35 |
36 | if (!this.client) {
37 | throw new Error('Custom OpenAI client not initialized');
38 | }
39 |
40 | // Handle thumbnail caching
41 | try {
42 | await fs.access(cachePath);
43 | console.log('[DEBUG] Thumbnail already cached');
44 | } catch (err) {
45 | console.log('Thumbnail not cached, fetching from Paperless');
46 |
47 | const thumbnailData = await paperlessService.getThumbnailImage(id);
48 |
49 | if (!thumbnailData) {
50 | console.warn('Thumbnail nicht gefunden');
51 | }
52 |
53 | await fs.mkdir(path.dirname(cachePath), { recursive: true });
54 | await fs.writeFile(cachePath, thumbnailData);
55 | }
56 |
57 | // Format existing tags
58 | const existingTagsList = existingTags
59 | .map(tag => tag.name)
60 | .join(', ');
61 |
62 |
63 | let systemPrompt = '';
64 | let promptTags = '';
65 | const model = config.custom.model;
66 | // Get system prompt and model
67 | if(process.env.USE_EXISTING_DATA === 'yes') {
68 | systemPrompt = `
69 | Prexisting tags: ${existingTagsList}\n\n
70 | Prexisiting correspondent: ${existingCorrespondentList}\n\n
71 | ` + process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
72 | promptTags = '';
73 | } else {
74 | systemPrompt = process.env.SYSTEM_PROMPT + '\n\n' + config.mustHavePrompt;
75 | promptTags = '';
76 | }
77 | if (process.env.USE_PROMPT_TAGS === 'yes') {
78 | promptTags = process.env.PROMPT_TAGS;
79 | systemPrompt = `
80 | Take these tags and try to match one or more to the document content.\n\n
81 | ` + config.specialPromptPreDefinedTags;
82 | }
83 |
84 | // Calculate total prompt tokens including all components
85 | const totalPromptTokens = await calculateTotalPromptTokens(
86 | systemPrompt,
87 | process.env.USE_PROMPT_TAGS === 'yes' ? [promptTags] : []
88 | );
89 |
90 | // Calculate available tokens
91 | const maxTokens = Number(config.tokenLimit); // Model's maximum context length
92 | const reservedTokens = totalPromptTokens + Number(config.responseTokens);
93 | const availableTokens = maxTokens - reservedTokens;
94 |
95 | // Truncate content if necessary
96 | const truncatedContent = await truncateToTokenLimit(content, availableTokens);
97 |
98 | // Make API request
99 | const response = await this.client.chat.completions.create({
100 | model: model,
101 | messages: [
102 | {
103 | role: "system",
104 | content: systemPrompt
105 | },
106 | {
107 | role: "user",
108 | content: truncatedContent
109 | }
110 | ],
111 | ...(model !== 'o3-mini' && { temperature: 0.3 }),
112 | });
113 |
114 | // Handle response
115 | if (!response?.choices?.[0]?.message?.content) {
116 | throw new Error('Invalid API response structure');
117 | }
118 |
119 | // Log token usage
120 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`);
121 | console.log(`[DEBUG] [${timestamp}] Total tokens: ${response.usage.total_tokens}`);
122 |
123 | const usage = response.usage;
124 | const mappedUsage = {
125 | promptTokens: usage.prompt_tokens,
126 | completionTokens: usage.completion_tokens,
127 | totalTokens: usage.total_tokens
128 | };
129 |
130 | let jsonContent = response.choices[0].message.content;
131 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
132 |
133 | let parsedResponse;
134 | try {
135 | parsedResponse = JSON.parse(jsonContent);
136 | } catch (error) {
137 | console.error('Failed to parse JSON response:', error);
138 | throw new Error('Invalid JSON response from API');
139 | }
140 |
141 | // Validate response structure
142 | if (!parsedResponse || !Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') {
143 | throw new Error('Invalid response structure: missing tags array or correspondent string');
144 | }
145 |
146 | return {
147 | document: parsedResponse,
148 | metrics: mappedUsage,
149 | truncated: truncatedContent.length < content.length
150 | };
151 | } catch (error) {
152 | console.error('Failed to analyze document:', error);
153 | return {
154 | document: { tags: [], correspondent: null },
155 | metrics: null,
156 | error: error.message
157 | };
158 | }
159 | }
160 |
161 |
162 | async analyzePlayground(content, prompt) {
163 | const musthavePrompt = `
164 | Return the result EXCLUSIVELY as a JSON object. The Tags and Title MUST be in the language that is used in the document.:
165 | {
166 | "title": "xxxxx",
167 | "correspondent": "xxxxxxxx",
168 | "tags": ["Tag1", "Tag2", "Tag3", "Tag4"],
169 | "document_date": "YYYY-MM-DD",
170 | "language": "en/de/es/..."
171 | }`;
172 |
173 | try {
174 | this.initialize();
175 | const now = new Date();
176 | const timestamp = now.toLocaleString('de-DE', { dateStyle: 'short', timeStyle: 'short' });
177 |
178 | if (!this.client) {
179 | throw new Error('OpenAI client not initialized - missing API key');
180 | }
181 |
182 | // Calculate total prompt tokens including musthavePrompt
183 | const totalPromptTokens = await calculateTotalPromptTokens(
184 | prompt + musthavePrompt // Combined system prompt
185 | );
186 |
187 | // Calculate available tokens
188 | const maxTokens = Number(config.tokenLimit);
189 | const reservedTokens = totalPromptTokens + Number(config.responseTokens);
190 | const availableTokens = maxTokens - reservedTokens;
191 |
192 | // Truncate content if necessary
193 | const truncatedContent = await truncateToTokenLimit(content, availableTokens);
194 |
195 | // Make API request
196 | const response = await this.client.chat.completions.create({
197 | model: config.custom.model,
198 | messages: [
199 | {
200 | role: "system",
201 | content: prompt + musthavePrompt
202 | },
203 | {
204 | role: "user",
205 | content: truncatedContent
206 | }
207 | ],
208 | temperature: 0.3,
209 | });
210 |
211 | // Handle response
212 | if (!response?.choices?.[0]?.message?.content) {
213 | throw new Error('Invalid API response structure');
214 | }
215 |
216 | // Log token usage
217 | console.log(`[DEBUG] [${timestamp}] OpenAI request sent`);
218 | console.log(`[DEBUG] [${timestamp}] Total tokens: ${response.usage.total_tokens}`);
219 |
220 | const usage = response.usage;
221 | const mappedUsage = {
222 | promptTokens: usage.prompt_tokens,
223 | completionTokens: usage.completion_tokens,
224 | totalTokens: usage.total_tokens
225 | };
226 |
227 | console.log(mappedUsage);
228 |
229 | let jsonContent = response.choices[0].message.content;
230 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
231 |
232 | let parsedResponse;
233 | try {
234 | parsedResponse = JSON.parse(jsonContent);
235 | } catch (error) {
236 | console.error('Failed to parse JSON response:', error);
237 | throw new Error('Invalid JSON response from API');
238 | }
239 |
240 | // Validate response structure
241 | if (!parsedResponse || !Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') {
242 | throw new Error('Invalid response structure: missing tags array or correspondent string');
243 | }
244 |
245 | return {
246 | document: parsedResponse,
247 | metrics: mappedUsage,
248 | truncated: truncatedContent.length < content.length
249 | };
250 | } catch (error) {
251 | console.error('Failed to analyze document:', error);
252 | return {
253 | document: { tags: [], correspondent: null },
254 | metrics: null,
255 | error: error.message
256 | };
257 | }
258 | }
259 |
260 | /**
261 | * Generate text based on a prompt
262 | * @param {string} prompt - The prompt to generate text from
263 | * @returns {Promise} - The generated text
264 | */
265 | async generateText(prompt) {
266 | try {
267 | this.initialize();
268 |
269 | if (!this.client) {
270 | throw new Error('Custom OpenAI client not initialized - missing API key');
271 | }
272 |
273 | const model = config.custom.model;
274 |
275 | const response = await this.client.chat.completions.create({
276 | model: model,
277 | messages: [
278 | {
279 | role: "user",
280 | content: prompt
281 | }
282 | ],
283 | temperature: 0.7,
284 | max_tokens: 128000
285 | });
286 |
287 | if (!response?.choices?.[0]?.message?.content) {
288 | throw new Error('Invalid API response structure');
289 | }
290 |
291 | return response.choices[0].message.content;
292 | } catch (error) {
293 | console.error('Error generating text with Custom OpenAI:', error);
294 | throw error;
295 | }
296 | }
297 |
298 | async checkStatus() {
299 | try {
300 | this.initialize();
301 |
302 | if (!this.client) {
303 | throw new Error('Custom OpenAI client not initialized - missing API key');
304 | }
305 |
306 | const model = config.custom.model;
307 |
308 | const response = await this.client.chat.completions.create({
309 | model: model,
310 | messages: [
311 | {
312 | role: "user",
313 | content: 'Ping'
314 | }
315 | ],
316 | temperature: 0.7,
317 | max_tokens: 1000
318 | });
319 |
320 | if (!response?.choices?.[0]?.message?.content) {
321 | return { status: 'error' };
322 | }
323 |
324 | return { status: 'ok', model: model };
325 | } catch (error) {
326 | console.error('Error generating text with Custom OpenAI:', error);
327 | return { status: 'error' };
328 | }
329 | }
330 | }
331 |
332 | module.exports = new CustomOpenAIService();
333 |
--------------------------------------------------------------------------------
/services/debugService.js:
--------------------------------------------------------------------------------
1 | // service to debug the paperless-ngx api routes
2 | const env = require('dotenv').config();
3 | const axios = require('axios');
4 | const paperless_api = process.env.PAPERLESS_API_URL;
5 | const paperless_token = process.env.PAPERLESS_API_TOKEN;
6 |
7 | const getDocuments = async () => {
8 | try {
9 | const response = await axios.get(`${paperless_api}/documents/`, {
10 | headers: {
11 | 'Authorization': `Token ${paperless_token}`,
12 | 'Content-Type': 'application/json'
13 | }
14 | });
15 | return response.data;
16 | }
17 | catch (error) {
18 | console.error('Paperless validation error:', error.message);
19 | return JSON.stringify(error);
20 | }
21 | }
22 |
23 | const getTags = async () => {
24 | try {
25 | const response = await axios.get(`${paperless_api}/tags/`, {
26 | headers: {
27 | 'Authorization': `Token ${paperless_token}`,
28 | 'Content-Type': 'application/json'
29 | }
30 | });
31 | return response.data;
32 | }
33 | catch (error) {
34 | console.error('Paperless validation error:', error.message);
35 | return JSON.stringify(error);
36 | }
37 | }
38 |
39 | const getCorrespondents = async () => {
40 | try {
41 | const response = await axios.get(`${paperless_api}/correspondents/`, {
42 | headers: {
43 | 'Authorization': `Token ${paperless_token}`,
44 | 'Content-Type': 'application/json'
45 | }
46 | });
47 | return response.data;
48 | }
49 | catch (error) {
50 | console.error('Paperless validation error:', error.message);
51 | return JSON.stringify(error);
52 | }
53 | }
54 |
55 | module.exports = { getDocuments, getTags, getCorrespondents };
56 |
--------------------------------------------------------------------------------
/services/documentsService.js:
--------------------------------------------------------------------------------
1 | // services/documentsService.js
2 | const paperlessService = require('./paperlessService');
3 |
4 | class DocumentsService {
5 | constructor() {
6 | this.tagCache = new Map();
7 | this.correspondentCache = new Map();
8 | }
9 |
10 | async getTagNames() {
11 | if (this.tagCache.size === 0) {
12 | const tags = await paperlessService.getTags();
13 | tags.forEach(tag => {
14 | this.tagCache.set(tag.id, tag.name);
15 | });
16 | }
17 | return Object.fromEntries(this.tagCache);
18 | }
19 |
20 | async getCorrespondentNames() {
21 | if (this.correspondentCache.size === 0) {
22 | const correspondents = await paperlessService.listCorrespondentsNames();
23 | correspondents.forEach(corr => {
24 | this.correspondentCache.set(corr.id, corr.name);
25 | });
26 | }
27 | return Object.fromEntries(this.correspondentCache);
28 | }
29 |
30 | async getDocumentsWithMetadata() {
31 | const [documents, tagNames, correspondentNames] = await Promise.all([
32 | paperlessService.getDocuments(),
33 | this.getTagNames(),
34 | this.getCorrespondentNames()
35 | ]);
36 |
37 | // Sort documents by created date (newest first)
38 | documents.sort((a, b) => new Date(b.created) - new Date(a.created));
39 |
40 | return {
41 | documents,
42 | tagNames,
43 | correspondentNames,
44 | paperlessUrl: process.env.PAPERLESS_API_URL.replace('/api', '')
45 | };
46 | }
47 | }
48 |
49 | module.exports = new DocumentsService();
--------------------------------------------------------------------------------
/services/loggerService.js:
--------------------------------------------------------------------------------
1 | const fs = require('fs');
2 | const util = require('util');
3 | const path = require('path');
4 |
5 | class Logger {
6 | constructor(options = {}) {
7 | this.logFile = options.logFile || 'application.log';
8 | this.logDir = options.logDir || 'logs';
9 | this.timestamp = options.timestamp !== false;
10 | this.format = options.format || 'txt';
11 | this.maxFileSize = options.maxFileSize || 1024 * 1024 * 10; // Standard: 10MB
12 |
13 | if (!fs.existsSync(this.logDir)) {
14 | fs.mkdirSync(this.logDir, { recursive: true });
15 | }
16 |
17 | this.logPath = path.join(this.logDir, this.logFile);
18 |
19 | // Initialisiere Log-Datei
20 | this.initLogFile();
21 |
22 | this.originalConsole = {
23 | log: console.log,
24 | error: console.error,
25 | warn: console.warn,
26 | info: console.info,
27 | debug: console.debug
28 | };
29 |
30 | this.overrideConsoleMethods();
31 | }
32 |
33 | initLogFile() {
34 | // Prüfe ob die Datei die maximale Größe überschreitet
35 | if (this.checkFileSize()) {
36 | // Lösche die alte Datei
37 | try {
38 | fs.unlinkSync(this.logPath);
39 | } catch (error) {
40 | // Ignoriere Fehler wenn Datei nicht existiert
41 | }
42 | }
43 |
44 | // Initialisiere HTML-Datei wenn nötig
45 | if (this.format === 'html') {
46 | this.initHtmlFile();
47 | }
48 | }
49 |
50 | checkFileSize() {
51 | if (fs.existsSync(this.logPath)) {
52 | const stats = fs.statSync(this.logPath);
53 | return stats.size >= this.maxFileSize;
54 | }
55 | return false;
56 | }
57 |
58 | initHtmlFile() {
59 | const htmlHeader = `
60 |
61 |
62 |
63 |
64 | Application Logs
65 |
113 |
132 |
133 |
134 |
135 | `;
136 |
137 | if (!fs.existsSync(this.logPath) || fs.statSync(this.logPath).size === 0) {
138 | fs.writeFileSync(this.logPath, htmlHeader);
139 | }
140 | }
141 |
142 | getTimestamp() {
143 | return new Date().toISOString();
144 | }
145 |
146 | formatLogMessage(type, args) {
147 | const msg = util.format(...args);
148 | if (this.format === 'html') {
149 | const timestamp = this.timestamp ?
150 | `
[${this.getTimestamp()}]` : '';
151 | return `
152 | ${timestamp}
153 | [${type.toUpperCase()}]
154 | ${this.escapeHtml(msg)}
155 |
\n`;
156 | } else {
157 | return this.timestamp ?
158 | `[${this.getTimestamp()}] [${type.toUpperCase()}] ${msg}\n` :
159 | `[${type.toUpperCase()}] ${msg}\n`;
160 | }
161 | }
162 |
163 | escapeHtml(unsafe) {
164 | return unsafe
165 | .replace(/&/g, "&")
166 | .replace(//g, ">")
168 | .replace(/"/g, """)
169 | .replace(/'/g, "'")
170 | .replace(/\n/g, "
")
171 | .replace(/\s/g, " ");
172 | }
173 |
174 | writeToFile(message) {
175 | // Prüfe Dateigröße vor dem Schreiben
176 | if (this.checkFileSize()) {
177 | // Lösche die alte Datei
178 | fs.unlinkSync(this.logPath);
179 |
180 | // Bei HTML-Format müssen wir den Header neu schreiben
181 | if (this.format === 'html') {
182 | this.initHtmlFile();
183 | }
184 | }
185 |
186 | fs.appendFileSync(this.logPath, message);
187 | }
188 |
189 | overrideConsoleMethods() {
190 | console.log = (...args) => {
191 | const logMessage = this.formatLogMessage('info', args);
192 | this.originalConsole.log(...args);
193 | this.writeToFile(logMessage);
194 | };
195 |
196 | console.error = (...args) => {
197 | const logMessage = this.formatLogMessage('error', args);
198 | this.originalConsole.error(...args);
199 | this.writeToFile(logMessage);
200 | };
201 |
202 | console.warn = (...args) => {
203 | const logMessage = this.formatLogMessage('warn', args);
204 | this.originalConsole.warn(...args);
205 | this.writeToFile(logMessage);
206 | };
207 |
208 | console.info = (...args) => {
209 | const logMessage = this.formatLogMessage('info', args);
210 | this.originalConsole.info(...args);
211 | this.writeToFile(logMessage);
212 | };
213 |
214 | console.debug = (...args) => {
215 | const logMessage = this.formatLogMessage('debug', args);
216 | this.originalConsole.debug(...args);
217 | this.writeToFile(logMessage);
218 | };
219 | }
220 |
221 | closeHtmlFile() {
222 | if (this.format === 'html') {
223 | const htmlFooter = `
224 |
227 |
228 | `;
229 | this.writeToFile(htmlFooter);
230 | }
231 | }
232 |
233 | restore() {
234 | Object.assign(console, this.originalConsole);
235 | if (this.format === 'html') {
236 | this.closeHtmlFile();
237 | }
238 | }
239 | }
240 |
241 | module.exports = Logger;
--------------------------------------------------------------------------------
/services/manualService.js:
--------------------------------------------------------------------------------
1 | const {
2 | calculateTokens,
3 | calculateTotalPromptTokens,
4 | truncateToTokenLimit,
5 | writePromptToFile
6 | } = require('./serviceUtils');
7 | const axios = require('axios');
8 | const OpenAI = require('openai');
9 | const config = require('../config/config');
10 | const AzureOpenAI = require('openai').AzureOpenAI;
11 | const emptyVar = null;
12 |
13 | class ManualService {
14 | constructor() {
15 | if(config.aiProvider === 'custom'){
16 | this.openai = new OpenAI({
17 | apiKey: config.custom.apiKey,
18 | baseUrl: config.custom.apiUrl
19 | });
20 | }else if (config.aiProvider === 'azure'){
21 | this.openai = new AzureOpenAI({
22 | apiKey: config.azure.apiKey,
23 | endpoint: config.azure.endpoint,
24 | deploymentName: config.azure.deploymentName,
25 | apiVersion: config.azure.apiVersion
26 | });
27 | } else {
28 | this.openai = new OpenAI({ apiKey: config.openai.apiKey });
29 | this.ollama = axios.create({
30 | timeout: 300000
31 | });
32 | }
33 | }
34 |
35 |
36 | async analyzeDocument(content, existingTags, provider) {
37 | try {
38 | if (provider === 'openai') {
39 | return this._analyzeOpenAI(content, existingTags);
40 | } else if (provider === 'ollama') {
41 | return this._analyzeOllama(content, existingTags);
42 | } else if (provider === 'custom') {
43 | return this._analyzeCustom(content, existingTags);
44 | } else if (provider === 'azure') {
45 | return this._analyzeAzure(content, existingTags);
46 | } else {
47 | throw new Error('Invalid provider');
48 | }
49 | } catch (error) {
50 | console.error('Error analyzing document:', error);
51 | return { tags: [], correspondent: null };
52 | }
53 | }
54 |
55 | async _analyzeOpenAI(content, existingTags) {
56 | try {
57 | const existingTagsList = existingTags
58 | .map(tag => tag.name)
59 | .join(', ');
60 | const model = process.env.OPENAI_MODEL;
61 | const systemPrompt = process.env.SYSTEM_PROMPT;
62 | await writePromptToFile(systemPrompt, content);
63 | const response = await this.openai.chat.completions.create({
64 | model: model,
65 | messages: [
66 | {
67 | role: "system",
68 | content: systemPrompt
69 | },
70 | {
71 | role: "user",
72 | content: content
73 | }
74 | ],
75 | ...(model !== 'o3-mini' && { temperature: 0.3 }),
76 | });
77 |
78 | let jsonContent = response.choices[0].message.content;
79 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
80 |
81 | const parsedResponse = JSON.parse(jsonContent);
82 | try {
83 | parsedResponse = JSON.parse(jsonContent);
84 | fs.appendFile('./logs/response.txt', jsonContent, (err) => {
85 | if (err) throw err;
86 | });
87 | } catch (error) {
88 | console.error('Failed to parse JSON response:', error);
89 | throw new Error('Invalid JSON response from API');
90 | }
91 |
92 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') {
93 | throw new Error('Invalid response structure');
94 | }
95 |
96 | return parsedResponse;
97 | } catch (error) {
98 | console.error('Failed to analyze document with OpenAI:', error);
99 | return { tags: [], correspondent: null };
100 | }
101 | }
102 |
103 | async _analyzeAzure(content, existingTags) {
104 | try {
105 | const existingTagsList = existingTags
106 | .map(tag => tag.name)
107 | .join(', ');
108 |
109 | const systemPrompt = process.env.SYSTEM_PROMPT;
110 | await writePromptToFile(systemPrompt, content);
111 | const response = await this.openai.chat.completions.create({
112 | model: process.env.AZURE_DEPLOYMENT_NAME,
113 | messages: [
114 | {
115 | role: "system",
116 | content: systemPrompt
117 | },
118 | {
119 | role: "user",
120 | content: content
121 | }
122 | ],
123 | temperature: 0.3,
124 | });
125 |
126 | let jsonContent = response.choices[0].message.content;
127 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
128 |
129 | const parsedResponse = JSON.parse(jsonContent);
130 | try {
131 | parsedResponse = JSON.parse(jsonContent);
132 | fs.appendFile('./logs/response.txt', jsonContent, (err) => {
133 | if (err) throw err;
134 | });
135 | } catch (error) {
136 | console.error('Failed to parse JSON response:', error);
137 | throw new Error('Invalid JSON response from API');
138 | }
139 |
140 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') {
141 | throw new Error('Invalid response structure');
142 | }
143 |
144 | return parsedResponse;
145 | } catch (error) {
146 | console.error('Failed to analyze document with OpenAI:', error);
147 | return { tags: [], correspondent: null };
148 | }
149 | }
150 |
151 | async _analyzeCustom(content, existingTags) {
152 | try {
153 | const existingTagsList = existingTags
154 | .map(tag => tag.name)
155 | .join(', ');
156 |
157 | const systemPrompt = process.env.SYSTEM_PROMPT;
158 | const model = config.custom.model;
159 | const response = await this.openai.chat.completions.create({
160 | model: model,
161 | messages: [
162 | {
163 | role: "system",
164 | content: systemPrompt
165 | },
166 | {
167 | role: "user",
168 | content: content
169 | }
170 | ],
171 | ...(model !== 'o3-mini' && { temperature: 0.3 }),
172 | });
173 |
174 | let jsonContent = response.choices[0].message.content;
175 | jsonContent = jsonContent.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim();
176 |
177 | const parsedResponse = JSON.parse(jsonContent);
178 |
179 | if (!Array.isArray(parsedResponse.tags) || typeof parsedResponse.correspondent !== 'string') {
180 | throw new Error('Invalid response structure');
181 | }
182 |
183 | return parsedResponse;
184 | } catch (error) {
185 | console.error('Failed to analyze document with OpenAI:', error);
186 | return { tags: [], correspondent: null };
187 | }
188 | }
189 |
190 | async _analyzeOllama(content, existingTags) {
191 | try {
192 | const prompt = process.env.SYSTEM_PROMPT;
193 |
194 | const getAvailableMemory = async () => {
195 | const totalMemory = os.totalmem();
196 | const freeMemory = os.freemem();
197 | const totalMemoryMB = (totalMemory / (1024 * 1024)).toFixed(0);
198 | const freeMemoryMB = (freeMemory / (1024 * 1024)).toFixed(0);
199 | return { totalMemoryMB, freeMemoryMB };
200 | };
201 |
202 | const calculateNumCtx = (promptTokenCount, expectedResponseTokens) => {
203 | const totalTokenUsage = promptTokenCount + expectedResponseTokens;
204 | const maxCtxLimit = Number(config.tokenLimit);
205 |
206 | const numCtx = Math.min(totalTokenUsage, maxCtxLimit);
207 |
208 | console.log('Prompt Token Count:', promptTokenCount);
209 | console.log('Expected Response Tokens:', expectedResponseTokens);
210 | console.log('Dynamic calculated num_ctx:', numCtx);
211 |
212 | return numCtx;
213 | };
214 |
215 | const calculatePromptTokenCount = (prompt) => {
216 | return Math.ceil(prompt.length / 4);
217 | };
218 |
219 | const { freeMemoryMB } = await getAvailableMemory();
220 | const expectedResponseTokens = 1024;
221 | const promptTokenCount = calculatePromptTokenCount(prompt);
222 |
223 | const numCtx = calculateNumCtx(promptTokenCount, expectedResponseTokens);
224 |
225 | const response = await this.ollama.post(`${config.ollama.apiUrl}/api/generate`, {
226 | model: config.ollama.model,
227 | prompt: prompt,
228 | stream: false,
229 | options: {
230 | temperature: 0.7,
231 | top_p: 0.9,
232 | repeat_penalty: 1.1,
233 | num_ctx: numCtx,
234 | }
235 | });
236 |
237 | if (!response.data || !response.data.response) {
238 | console.error('Unexpected Ollama response format:', response);
239 | throw new Error('Invalid response from Ollama API');
240 | }
241 |
242 | return this._parseResponse(response.data.response);
243 | }
244 |
245 | catch (error) {
246 | if (error.code === 'ECONNABORTED') {
247 | console.error('Timeout bei der Ollama-Anfrage:', error);
248 | throw new Error('Die Analyse hat zu lange gedauert. Bitte versuchen Sie es erneut.');
249 | }
250 | console.error('Error analyzing document with Ollama:', error);
251 | throw error;
252 | }
253 | }
254 | }
255 |
256 | module.exports = ManualService;
257 |
--------------------------------------------------------------------------------
/services/ragService.js:
--------------------------------------------------------------------------------
1 | // services/ragService.js
2 | const axios = require('axios');
3 | const config = require('../config/config');
4 | const AIServiceFactory = require('./aiServiceFactory');
5 | const paperlessService = require('./paperlessService');
6 |
7 | class RagService {
8 | constructor() {
9 | this.baseUrl = process.env.RAG_SERVICE_URL || 'http://localhost:8000';
10 | }
11 |
12 | /**
13 | * Check if the RAG service is available and ready
14 | * @returns {Promise<{status: string, index_ready: boolean, data_loaded: boolean}>}
15 | */
16 | async checkStatus() {
17 | try {
18 | const response = await axios.get(`${this.baseUrl}/status`);
19 | //make test call to the LLM service to check if it is available
20 | return response.data;
21 | } catch (error) {
22 | console.error('Error checking RAG service status:', error.message);
23 | return {
24 | server_up: false,
25 | data_loaded: false,
26 | index_ready: false,
27 | error: error.message
28 | };
29 | }
30 | }
31 |
32 | /**
33 | * Search for documents matching a query
34 | * @param {string} query - The search query
35 | * @param {Object} filters - Optional filters for search
36 | * @returns {Promise} - Array of search results
37 | */
38 | async search(query, filters = {}) {
39 | try {
40 | const response = await axios.post(`${this.baseUrl}/search`, {
41 | query,
42 | ...filters
43 | });
44 | return response.data;
45 | } catch (error) {
46 | console.error('Error searching documents:', error);
47 | throw error;
48 | }
49 | }
50 |
51 | /**
52 | * Ask a question about documents and get an AI-generated answer in the same language as the question
53 | * @param {string} question - The question to ask
54 | * @returns {Promise<{answer: string, sources: Array}>} - AI response and source documents
55 | */
56 | async askQuestion(question) {
57 | try {
58 | // 1. Get context from the RAG service
59 | const response = await axios.post(`${this.baseUrl}/context`, {
60 | question,
61 | max_sources: 5
62 | });
63 |
64 | const { context, sources } = response.data;
65 |
66 | // 2. Fetch full content for each source document using doc_id
67 | let enhancedContext = context;
68 |
69 | if (sources && sources.length > 0) {
70 | // Fetch full document content for each source
71 | const fullDocContents = await Promise.all(
72 | sources.map(async (source) => {
73 | if (source.doc_id) {
74 | try {
75 | const fullContent = await paperlessService.getDocumentContent(source.doc_id);
76 | return `Full document content for ${source.title || 'Document ' + source.doc_id}:\n${fullContent}`;
77 | } catch (error) {
78 | console.error(`Error fetching content for document ${source.doc_id}:`, error.message);
79 | return '';
80 | }
81 | }
82 | return '';
83 | })
84 | );
85 |
86 | // Combine original context with full document contents
87 | enhancedContext = context + '\n\n' + fullDocContents.filter(content => content).join('\n\n');
88 | }
89 |
90 | // 3. Use AI service to generate an answer based on the enhanced context
91 | const aiService = AIServiceFactory.getService();
92 |
93 | // Create a language-agnostic prompt that works in any language
94 | const prompt = `
95 | You are a helpful assistant that answers questions about documents.
96 |
97 | Answer the following question precisely, based on the provided documents:
98 |
99 | Question: ${question}
100 |
101 | Context from relevant documents:
102 | ${enhancedContext}
103 |
104 | Important instructions:
105 | - Use ONLY information from the provided documents
106 | - If the answer is not contained in the documents, respond: "This information is not contained in the documents." (in the same language as the question)
107 | - Avoid assumptions or speculation beyond the given context
108 | - Answer in the same language as the question was asked
109 | - Do not mention document numbers or source references, answer as if it were a natural conversation
110 | `;
111 |
112 | let answer;
113 | try {
114 | answer = await aiService.generateText(prompt);
115 | } catch (error) {
116 | console.error('Error generating answer with AI service:', error);
117 | answer = "An error occurred while generating an answer. Please try again later.";
118 | }
119 |
120 | return {
121 | answer,
122 | sources
123 | };
124 | } catch (error) {
125 | console.error('Error in askQuestion:', error);
126 | throw new Error("An error occurred while processing your question. Please try again later.");
127 | }
128 | }
129 |
130 | /**
131 | * Start indexing documents in the RAG service
132 | * @param {boolean} force - Whether to force refresh from source
133 | * @returns {Promise